diff --git a/.github/workflows/muelu_clang_format.yml b/.github/workflows/muelu_clang_format.yml new file mode 100644 index 000000000000..fc4ef5e00c34 --- /dev/null +++ b/.github/workflows/muelu_clang_format.yml @@ -0,0 +1,29 @@ +name: Check MueLu clang-format + +on: [pull_request] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: DoozyX/clang-format-lint-action@v0.16.2 + with: + source: './packages/muelu' + exclude: '' + extensions: 'cpp,hpp' + clangFormatVersion: 14 + inplace: true + - run: git diff HEAD > format_patch.txt + - run: if [ "$(cat format_patch.txt)" == "" ] ; then rm format_patch.txt ; fi + - uses: actions/upload-artifact@v3 + if: ${{ hashFiles('format_patch.txt') != '' }} + with: + name: MueLu format patch + path: format_patch.txt + - uses: actions/github-script@v3 + if: ${{ hashFiles('format_patch.txt') != '' }} + with: + script: | + core.setFailed('Please download and apply the formatting patch! It is located at the bottom of the summary tab for this workflow.') diff --git a/packages/muelu/.clang-format b/packages/muelu/.clang-format new file mode 100644 index 000000000000..137569c6fc4c --- /dev/null +++ b/packages/muelu/.clang-format @@ -0,0 +1,13 @@ +#Official Tool: clang-format version 14.0.0 +#Kokkos options +BasedOnStyle: google +SortIncludes: false +AlignConsecutiveAssignments: true +AllowShortCaseLabelsOnASingleLine: true +AllowShortIfStatementsOnASingleLine: true +#MueLu-specific options +ColumnLimit: 0 +BreakConstructorInitializersBeforeComma: true +BreakConstructorInitializers: BeforeComma +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 2 diff --git a/packages/muelu/adapters/amgx/MueLu_AMGXOperator_decl.hpp b/packages/muelu/adapters/amgx/MueLu_AMGXOperator_decl.hpp index 65d13a03d4a4..b233bbc226f6 100644 --- a/packages/muelu/adapters/amgx/MueLu_AMGXOperator_decl.hpp +++ b/packages/muelu/adapters/amgx/MueLu_AMGXOperator_decl.hpp @@ -46,7 +46,7 @@ #ifndef MUELU_AMGXOPERATOR_DECL_HPP #define MUELU_AMGXOPERATOR_DECL_HPP -#if defined (HAVE_MUELU_AMGX) +#if defined(HAVE_MUELU_AMGX) #include #include @@ -67,468 +67,461 @@ namespace MueLu { - - /*! @class AMGXOperator - @ingroup MueLuAdapters - @brief Adapter for AmgX library from Nvidia. - - This templated version of the class throws errors in all methods as AmgX is not implemented for datatypes where scalar!=double/float and ordinal !=int - */ - template - class AMGXOperator : public TpetraOperator, public BaseClass { - private: - typedef Scalar SC; - typedef LocalOrdinal LO; - typedef GlobalOrdinal GO; - typedef Node NO; - - typedef Tpetra::Map Map; - typedef Tpetra::MultiVector MultiVector; - - public: - - //! @name Constructor/Destructor - //@{ - - //! Constructor - AMGXOperator(const Teuchos::RCP > &InA, Teuchos::ParameterList ¶mListIn) { } - - //! Destructor. - virtual ~AMGXOperator() {} - - //@} - - //! Returns the Tpetra::Map object associated with the domain of this operator. - Teuchos::RCP getDomainMap() const{ - throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n"); - } - - //! Returns the Tpetra::Map object associated with the range of this operator. - Teuchos::RCP getRangeMap() const{ - throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n"); - } - - //! Returns a solution for the linear system AX=Y in the Tpetra::MultiVector X. - /*! - \param[in] X - Tpetra::MultiVector of dimension NumVectors that contains the solution to the linear system. - \param[out] Y -Tpetra::MultiVector of dimension NumVectors containing the RHS of the linear system. - */ - void apply(const MultiVector& X, MultiVector& Y, Teuchos::ETransp mode = Teuchos::NO_TRANS, - Scalar alpha = Teuchos::ScalarTraits::one(), Scalar beta = Teuchos::ScalarTraits::zero()) const { - throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n"); - } - - //! Indicates whether this operator supports applying the adjoint operator - bool hasTransposeApply() const{ - throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n"); - } - - RCP > GetHierarchy() const { - throw Exceptions::RuntimeError("AMGXOperator does not hold a MueLu::Hierarchy object \n"); - } - - private: - }; - - /*! @class AMGXOperator - @ingroup MueLuAdapters - @brief Adapter for AmgX library from Nvidia. - - Creates an AmgX Solver object with a Tpetra Matrix. Partial specialization of the template for data types supported by AmgX. +/*! @class AMGXOperator + @ingroup MueLuAdapters + @brief Adapter for AmgX library from Nvidia. + + This templated version of the class throws errors in all methods as AmgX is not implemented for datatypes where scalar!=double/float and ordinal !=int +*/ +template +class AMGXOperator : public TpetraOperator, public BaseClass { + private: + typedef Scalar SC; + typedef LocalOrdinal LO; + typedef GlobalOrdinal GO; + typedef Node NO; + + typedef Tpetra::Map Map; + typedef Tpetra::MultiVector MultiVector; + + public: + //! @name Constructor/Destructor + //@{ + + //! Constructor + AMGXOperator(const Teuchos::RCP >& InA, Teuchos::ParameterList& paramListIn) {} + + //! Destructor. + virtual ~AMGXOperator() {} + + //@} + + //! Returns the Tpetra::Map object associated with the domain of this operator. + Teuchos::RCP getDomainMap() const { + throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n"); + } + + //! Returns the Tpetra::Map object associated with the range of this operator. + Teuchos::RCP getRangeMap() const { + throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n"); + } + + //! Returns a solution for the linear system AX=Y in the Tpetra::MultiVector X. + /*! + \param[in] X - Tpetra::MultiVector of dimension NumVectors that contains the solution to the linear system. + \param[out] Y -Tpetra::MultiVector of dimension NumVectors containing the RHS of the linear system. */ - template - class AMGXOperator : public TpetraOperator { - private: - typedef double SC; - typedef int LO; - typedef int GO; - typedef Node NO; - - typedef Tpetra::Map Map; - typedef Tpetra::MultiVector MultiVector; - - - void printMaps(Teuchos::RCP >& comm, const std::vector >& vec, const std::vector& perm, - const int* nbrs, const Map& map, const std::string& label) { - for (int p = 0; p < comm->getSize(); p++) { - if (comm->getRank() == p) { - std::cout << "========\n" << label << ", lid (gid), PID " << p << "\n========" << std::endl; - - for (size_t i = 0; i < vec.size(); ++i) { - std::cout << " neighbor " << nbrs[i] << " :"; - for (size_t j = 0; j < vec[i].size(); ++j) - std::cout << " " << vec[i][j] << " (" << map.getGlobalElement(perm[vec[i][j]]) << ")"; - std::cout << std::endl; - } + void apply(const MultiVector& X, MultiVector& Y, Teuchos::ETransp mode = Teuchos::NO_TRANS, + Scalar alpha = Teuchos::ScalarTraits::one(), Scalar beta = Teuchos::ScalarTraits::zero()) const { + throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n"); + } + + //! Indicates whether this operator supports applying the adjoint operator + bool hasTransposeApply() const { + throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n"); + } + + RCP > GetHierarchy() const { + throw Exceptions::RuntimeError("AMGXOperator does not hold a MueLu::Hierarchy object \n"); + } + + private: +}; + +/*! @class AMGXOperator + @ingroup MueLuAdapters + @brief Adapter for AmgX library from Nvidia. + + Creates an AmgX Solver object with a Tpetra Matrix. Partial specialization of the template for data types supported by AmgX. +*/ +template +class AMGXOperator : public TpetraOperator { + private: + typedef double SC; + typedef int LO; + typedef int GO; + typedef Node NO; + + typedef Tpetra::Map Map; + typedef Tpetra::MultiVector MultiVector; + + void printMaps(Teuchos::RCP >& comm, const std::vector >& vec, const std::vector& perm, + const int* nbrs, const Map& map, const std::string& label) { + for (int p = 0; p < comm->getSize(); p++) { + if (comm->getRank() == p) { + std::cout << "========\n" + << label << ", lid (gid), PID " << p << "\n========" << std::endl; + + for (size_t i = 0; i < vec.size(); ++i) { + std::cout << " neighbor " << nbrs[i] << " :"; + for (size_t j = 0; j < vec[i].size(); ++j) + std::cout << " " << vec[i][j] << " (" << map.getGlobalElement(perm[vec[i][j]]) << ")"; std::cout << std::endl; - } else { - sleep(1); } - comm->barrier(); + std::cout << std::endl; + } else { + sleep(1); } + comm->barrier(); } - - public: - - //! @name Constructor/Destructor - //@{ - AMGXOperator(const Teuchos::RCP > &inA, Teuchos::ParameterList ¶mListIn) { - RCP > comm = inA->getRowMap()->getComm(); - int numProcs = comm->getSize(); - int myRank = comm->getRank(); - - - RCP amgxTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: initialize"); - amgxTimer->start(); - // Initialize - //AMGX_SAFE_CALL(AMGX_initialize()); - //AMGX_SAFE_CALL(AMGX_initialize_plugins()); - - - /*system*/ - //AMGX_SAFE_CALL(AMGX_register_print_callback(&print_callback)); - AMGX_SAFE_CALL(AMGX_install_signal_handler()); - Teuchos::ParameterList configs = paramListIn.sublist("amgx:params", true); - if (configs.isParameter("json file")) { - AMGX_SAFE_CALL(AMGX_config_create_from_file(&Config_, (const char *) &configs.get("json file")[0])); - } else { - std::ostringstream oss; - oss << ""; - ParameterList::ConstIterator itr; - for (itr = configs.begin(); itr != configs.end(); ++itr) { - const std::string& name = configs.name(itr); - const ParameterEntry& entry = configs.entry(itr); - oss << name << "=" << filterValueToString(entry) << ", "; - } - oss << "\0"; - std::string configString = oss.str(); - if (configString == "") { - //print msg that using defaults - //GetOStream(Warnings0) << "Warning: No configuration parameters specified, using default AMGX configuration parameters. \n"; - } - AMGX_SAFE_CALL(AMGX_config_create(&Config_, configString.c_str())); + } + + public: + //! @name Constructor/Destructor + //@{ + AMGXOperator(const Teuchos::RCP >& inA, Teuchos::ParameterList& paramListIn) { + RCP > comm = inA->getRowMap()->getComm(); + int numProcs = comm->getSize(); + int myRank = comm->getRank(); + + RCP amgxTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: initialize"); + amgxTimer->start(); + // Initialize + // AMGX_SAFE_CALL(AMGX_initialize()); + // AMGX_SAFE_CALL(AMGX_initialize_plugins()); + + /*system*/ + // AMGX_SAFE_CALL(AMGX_register_print_callback(&print_callback)); + AMGX_SAFE_CALL(AMGX_install_signal_handler()); + Teuchos::ParameterList configs = paramListIn.sublist("amgx:params", true); + if (configs.isParameter("json file")) { + AMGX_SAFE_CALL(AMGX_config_create_from_file(&Config_, (const char*)&configs.get("json file")[0])); + } else { + std::ostringstream oss; + oss << ""; + ParameterList::ConstIterator itr; + for (itr = configs.begin(); itr != configs.end(); ++itr) { + const std::string& name = configs.name(itr); + const ParameterEntry& entry = configs.entry(itr); + oss << name << "=" << filterValueToString(entry) << ", "; + } + oss << "\0"; + std::string configString = oss.str(); + if (configString == "") { + // print msg that using defaults + // GetOStream(Warnings0) << "Warning: No configuration parameters specified, using default AMGX configuration parameters. \n"; } + AMGX_SAFE_CALL(AMGX_config_create(&Config_, configString.c_str())); + } - // TODO: we probably need to add "exception_handling=1" to the parameter list - // to switch on internal error handling (with no need for AMGX_SAFE_CALL) + // TODO: we probably need to add "exception_handling=1" to the parameter list + // to switch on internal error handling (with no need for AMGX_SAFE_CALL) - //AMGX_SAFE_CALL(AMGX_config_add_parameters(&Config_, "exception_handling=1")) + // AMGX_SAFE_CALL(AMGX_config_add_parameters(&Config_, "exception_handling=1")) #define NEW_COMM #ifdef NEW_COMM - // NOTE: MPI communicator used in AMGX_resources_create must exist in the scope of AMGX_matrix_comm_from_maps_one_ring - // FIXME: fix for serial comm - RCP > tmpic = Teuchos::rcp_dynamic_cast >(comm->duplicate()); - TEUCHOS_TEST_FOR_EXCEPTION(tmpic.is_null(), Exceptions::RuntimeError, "Communicator is not MpiComm"); + // NOTE: MPI communicator used in AMGX_resources_create must exist in the scope of AMGX_matrix_comm_from_maps_one_ring + // FIXME: fix for serial comm + RCP > tmpic = Teuchos::rcp_dynamic_cast >(comm->duplicate()); + TEUCHOS_TEST_FOR_EXCEPTION(tmpic.is_null(), Exceptions::RuntimeError, "Communicator is not MpiComm"); - RCP > rawMpiComm = tmpic->getRawMpiComm(); - MPI_Comm mpiComm = *rawMpiComm; + RCP > rawMpiComm = tmpic->getRawMpiComm(); + MPI_Comm mpiComm = *rawMpiComm; #endif - // Construct AMGX resources - if (numProcs == 1) { - AMGX_resources_create_simple(&Resources_, Config_); + // Construct AMGX resources + if (numProcs == 1) { + AMGX_resources_create_simple(&Resources_, Config_); - } else { - int numGPUDevices; - cudaGetDeviceCount(&numGPUDevices); - int device[] = {(comm->getRank() % numGPUDevices)}; + } else { + int numGPUDevices; + cudaGetDeviceCount(&numGPUDevices); + int device[] = {(comm->getRank() % numGPUDevices)}; - AMGX_config_add_parameters(&Config_, "communicator=MPI"); + AMGX_config_add_parameters(&Config_, "communicator=MPI"); #ifdef NEW_COMM - AMGX_resources_create(&Resources_, Config_, &mpiComm, 1/* number of GPU devices utilized by this rank */, device); + AMGX_resources_create(&Resources_, Config_, &mpiComm, 1 /* number of GPU devices utilized by this rank */, device); #else - AMGX_resources_create(&Resources_, Config_, MPI_COMM_WORLD, 1/* number of GPU devices utilized by this rank */, device); + AMGX_resources_create(&Resources_, Config_, MPI_COMM_WORLD, 1 /* number of GPU devices utilized by this rank */, device); #endif - } + } - AMGX_Mode mode = AMGX_mode_dDDI; - AMGX_solver_create(&Solver_, Resources_, mode, Config_); - AMGX_matrix_create(&A_, Resources_, mode); - AMGX_vector_create(&X_, Resources_, mode); - AMGX_vector_create(&Y_, Resources_, mode); + AMGX_Mode mode = AMGX_mode_dDDI; + AMGX_solver_create(&Solver_, Resources_, mode, Config_); + AMGX_matrix_create(&A_, Resources_, mode); + AMGX_vector_create(&X_, Resources_, mode); + AMGX_vector_create(&Y_, Resources_, mode); - amgxTimer->stop(); - amgxTimer->incrementNumCalls(); + amgxTimer->stop(); + amgxTimer->incrementNumCalls(); - std::vector amgx2muelu; + std::vector amgx2muelu; - // Construct AMGX communication pattern - if (numProcs > 1) { - RCP > importer = inA->getCrsGraph()->getImporter(); + // Construct AMGX communication pattern + if (numProcs > 1) { + RCP > importer = inA->getCrsGraph()->getImporter(); - TEUCHOS_TEST_FOR_EXCEPTION(importer.is_null(), MueLu::Exceptions::RuntimeError, "The matrix A has no Import object."); + TEUCHOS_TEST_FOR_EXCEPTION(importer.is_null(), MueLu::Exceptions::RuntimeError, "The matrix A has no Import object."); - Tpetra::Distributor distributor = importer->getDistributor(); + Tpetra::Distributor distributor = importer->getDistributor(); - Array sendRanks = distributor.getProcsTo(); - Array recvRanks = distributor.getProcsFrom(); + Array sendRanks = distributor.getProcsTo(); + Array recvRanks = distributor.getProcsFrom(); - std::sort(sendRanks.begin(), sendRanks.end()); - std::sort(recvRanks.begin(), recvRanks.end()); + std::sort(sendRanks.begin(), sendRanks.end()); + std::sort(recvRanks.begin(), recvRanks.end()); - bool match = true; - if (sendRanks.size() != recvRanks.size()) { - match = false; - } else { - for (int i = 0; i < sendRanks.size(); i++) { - if (recvRanks[i] != sendRanks[i]) - match = false; - break; - } + bool match = true; + if (sendRanks.size() != recvRanks.size()) { + match = false; + } else { + for (int i = 0; i < sendRanks.size(); i++) { + if (recvRanks[i] != sendRanks[i]) + match = false; + break; } - TEUCHOS_TEST_FOR_EXCEPTION(!match, MueLu::Exceptions::RuntimeError, "AMGX requires that the processors that we send to and receive from are the same. " - "This is not the case: we send to {" << sendRanks << "} and receive from {" << recvRanks << "}"); - - int num_neighbors = sendRanks.size(); // does not include the calling process - const int* neighbors = &sendRanks[0]; - - // Later on, we'll have to organize the send and recv data by PIDs, - // i.e, a vector V of vectors, where V[i] is PID i's vector of data. - // Hence we need to be able to quickly look up an array index - // associated with each PID. - Tpetra::Details::HashTable hashTable(3*num_neighbors); - for (int i = 0; i < num_neighbors; i++) - hashTable.add(neighbors[i], i); - - // Get some information out - ArrayView exportLIDs = importer->getExportLIDs(); - ArrayView exportPIDs = importer->getExportPIDs(); - Array importPIDs; - Tpetra::Import_Util::getPids(*importer, importPIDs, true/* make local -1 */); - - // Construct the reordering for AMGX as in AMGX_matrix_upload_all documentation - RCP rowMap = inA->getRowMap(); - RCP colMap = inA->getColMap(); - - int N = rowMap->getLocalNumElements(), Nc = colMap->getLocalNumElements(); - muelu2amgx_.resize(Nc, -1); - - int numUniqExports = 0; - for (int i = 0; i < exportLIDs.size(); i++) - if (muelu2amgx_[exportLIDs[i]] == -1) { - numUniqExports++; - muelu2amgx_[exportLIDs[i]] = -2; - } - - int localOffset = 0, exportOffset = N - numUniqExports; - // Go through exported LIDs and put them at the end of LIDs - for (int i = 0; i < exportLIDs.size(); i++) - if (muelu2amgx_[exportLIDs[i]] < 0) // exportLIDs are not unique - muelu2amgx_[exportLIDs[i]] = exportOffset++; - // Go through all non-export LIDs, and put them at the beginning of LIDs - for (int i = 0; i < N; i++) - if (muelu2amgx_[i] == -1) - muelu2amgx_[i] = localOffset++; - // Go through the tail (imported LIDs), and order those by neighbors - int importOffset = N; - for (int k = 0; k < num_neighbors; k++) - for (int i = 0; i < importPIDs.size(); i++) - if (importPIDs[i] != -1 && hashTable.get(importPIDs[i]) == k) - muelu2amgx_[i] = importOffset++; - - amgx2muelu.resize(muelu2amgx_.size()); - for (int i = 0; i < (int)muelu2amgx_.size(); i++) - amgx2muelu[muelu2amgx_[i]] = i; - - // Construct send arrays - std::vector > sendDatas (num_neighbors); - std::vector send_sizes(num_neighbors, 0); - for (int i = 0; i < exportPIDs.size(); i++) { - int index = hashTable.get(exportPIDs[i]); - sendDatas [index].push_back(muelu2amgx_[exportLIDs[i]]); - send_sizes[index]++; + } + TEUCHOS_TEST_FOR_EXCEPTION(!match, MueLu::Exceptions::RuntimeError, + "AMGX requires that the processors that we send to and receive from are the same. " + "This is not the case: we send to {" + << sendRanks << "} and receive from {" << recvRanks << "}"); + + int num_neighbors = sendRanks.size(); // does not include the calling process + const int* neighbors = &sendRanks[0]; + + // Later on, we'll have to organize the send and recv data by PIDs, + // i.e, a vector V of vectors, where V[i] is PID i's vector of data. + // Hence we need to be able to quickly look up an array index + // associated with each PID. + Tpetra::Details::HashTable hashTable(3 * num_neighbors); + for (int i = 0; i < num_neighbors; i++) + hashTable.add(neighbors[i], i); + + // Get some information out + ArrayView exportLIDs = importer->getExportLIDs(); + ArrayView exportPIDs = importer->getExportPIDs(); + Array importPIDs; + Tpetra::Import_Util::getPids(*importer, importPIDs, true /* make local -1 */); + + // Construct the reordering for AMGX as in AMGX_matrix_upload_all documentation + RCP rowMap = inA->getRowMap(); + RCP colMap = inA->getColMap(); + + int N = rowMap->getLocalNumElements(), Nc = colMap->getLocalNumElements(); + muelu2amgx_.resize(Nc, -1); + + int numUniqExports = 0; + for (int i = 0; i < exportLIDs.size(); i++) + if (muelu2amgx_[exportLIDs[i]] == -1) { + numUniqExports++; + muelu2amgx_[exportLIDs[i]] = -2; } - // FIXME: sendDatas must be sorted (based on GIDs) - std::vector send_maps(num_neighbors); - for (int i = 0; i < num_neighbors; i++) - send_maps[i] = &(sendDatas[i][0]); - - // Debugging - // printMaps(comm, sendDatas, amgx2muelu, neighbors, *importer->getTargetMap(), "send_map_vector"); - - // Construct recv arrays - std::vector > recvDatas (num_neighbors); - std::vector recv_sizes(num_neighbors, 0); + int localOffset = 0, exportOffset = N - numUniqExports; + // Go through exported LIDs and put them at the end of LIDs + for (int i = 0; i < exportLIDs.size(); i++) + if (muelu2amgx_[exportLIDs[i]] < 0) // exportLIDs are not unique + muelu2amgx_[exportLIDs[i]] = exportOffset++; + // Go through all non-export LIDs, and put them at the beginning of LIDs + for (int i = 0; i < N; i++) + if (muelu2amgx_[i] == -1) + muelu2amgx_[i] = localOffset++; + // Go through the tail (imported LIDs), and order those by neighbors + int importOffset = N; + for (int k = 0; k < num_neighbors; k++) for (int i = 0; i < importPIDs.size(); i++) - if (importPIDs[i] != -1) { - int index = hashTable.get(importPIDs[i]); - recvDatas [index].push_back(muelu2amgx_[i]); - recv_sizes[index]++; - } - // FIXME: recvDatas must be sorted (based on GIDs) - - std::vector recv_maps(num_neighbors); - for (int i = 0; i < num_neighbors; i++) - recv_maps[i] = &(recvDatas[i][0]); - - // Debugging - // printMaps(comm, recvDatas, amgx2muelu, neighbors, *importer->getTargetMap(), "recv_map_vector"); - - AMGX_SAFE_CALL(AMGX_matrix_comm_from_maps_one_ring(A_, 1, num_neighbors, neighbors, &send_sizes[0], &send_maps[0], &recv_sizes[0], &recv_maps[0])); - - AMGX_vector_bind(X_, A_); - AMGX_vector_bind(Y_, A_); + if (importPIDs[i] != -1 && hashTable.get(importPIDs[i]) == k) + muelu2amgx_[i] = importOffset++; + + amgx2muelu.resize(muelu2amgx_.size()); + for (int i = 0; i < (int)muelu2amgx_.size(); i++) + amgx2muelu[muelu2amgx_[i]] = i; + + // Construct send arrays + std::vector > sendDatas(num_neighbors); + std::vector send_sizes(num_neighbors, 0); + for (int i = 0; i < exportPIDs.size(); i++) { + int index = hashTable.get(exportPIDs[i]); + sendDatas[index].push_back(muelu2amgx_[exportLIDs[i]]); + send_sizes[index]++; } - - RCP matrixTransformTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transform matrix"); - matrixTransformTimer->start(); - - ArrayRCP ia_s; - ArrayRCP ja; - ArrayRCP a; - inA->getAllValues(ia_s, ja, a); - - ArrayRCP ia(ia_s.size()); - for (int i = 0; i < ia.size(); i++) - ia[i] = Teuchos::as(ia_s[i]); - - N_ = inA->getLocalNumRows(); - int nnz = inA->getLocalNumEntries(); - - matrixTransformTimer->stop(); - matrixTransformTimer->incrementNumCalls(); - - - // Upload matrix - // TODO Do we need to pin memory here through AMGX_pin_memory? - RCP matrixTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer matrix CPU->GPU"); - matrixTimer->start(); - if (numProcs == 1) { - AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia[0], &ja[0], &a[0], NULL); - - } else { - // Transform the matrix - std::vector ia_new(ia.size()); - std::vector ja_new(ja.size()); - std::vector a_new (a.size()); - - ia_new[0] = 0; - for (int i = 0; i < N_; i++) { - int oldRow = amgx2muelu[i]; - - ia_new[i+1] = ia_new[i] + (ia[oldRow+1] - ia[oldRow]); - - for (int j = ia[oldRow]; j < ia[oldRow+1]; j++) { - int offset = j - ia[oldRow]; - ja_new[ia_new[i] + offset] = muelu2amgx_[ja[j]]; - a_new [ia_new[i] + offset] = a[j]; - } - // Do bubble sort on two arrays - // NOTE: There are multiple possible optimizations here (even of bubble sort) - bool swapped; - do { - swapped = false; - - for (int j = ia_new[i]; j < ia_new[i+1]-1; j++) - if (ja_new[j] > ja_new[j+1]) { - std::swap(ja_new[j], ja_new[j+1]); - std::swap(a_new [j], a_new [j+1]); - swapped = true; - } - } while (swapped == true); + // FIXME: sendDatas must be sorted (based on GIDs) + + std::vector send_maps(num_neighbors); + for (int i = 0; i < num_neighbors; i++) + send_maps[i] = &(sendDatas[i][0]); + + // Debugging + // printMaps(comm, sendDatas, amgx2muelu, neighbors, *importer->getTargetMap(), "send_map_vector"); + + // Construct recv arrays + std::vector > recvDatas(num_neighbors); + std::vector recv_sizes(num_neighbors, 0); + for (int i = 0; i < importPIDs.size(); i++) + if (importPIDs[i] != -1) { + int index = hashTable.get(importPIDs[i]); + recvDatas[index].push_back(muelu2amgx_[i]); + recv_sizes[index]++; } + // FIXME: recvDatas must be sorted (based on GIDs) - AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia_new[0], &ja_new[0], &a_new[0], NULL); - } - matrixTimer->stop(); - matrixTimer->incrementNumCalls(); - - domainMap_ = inA->getDomainMap(); - rangeMap_ = inA->getRangeMap(); - - RCP realSetupTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: setup (total)"); - realSetupTimer->start(); - AMGX_solver_setup(Solver_, A_); - realSetupTimer->stop(); - realSetupTimer->incrementNumCalls(); - - vectorTimer1_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer vectors CPU->GPU"); - vectorTimer2_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer vector GPU->CPU"); - solverTimer_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: Solve (total)"); - } - - //! Destructor. - virtual ~AMGXOperator() - { - // Comment this out if you need rebuild to work. This causes AMGX_solver_destroy memory issues. - AMGX_SAFE_CALL(AMGX_solver_destroy(Solver_)); - AMGX_SAFE_CALL(AMGX_vector_destroy(X_)); - AMGX_SAFE_CALL(AMGX_vector_destroy(Y_)); - AMGX_SAFE_CALL(AMGX_matrix_destroy(A_)); - AMGX_SAFE_CALL(AMGX_resources_destroy(Resources_)); - AMGX_SAFE_CALL(AMGX_config_destroy(Config_)); - } - - - //! Returns the Tpetra::Map object associated with the domain of this operator. - Teuchos::RCP getDomainMap() const; + std::vector recv_maps(num_neighbors); + for (int i = 0; i < num_neighbors; i++) + recv_maps[i] = &(recvDatas[i][0]); - //! Returns the Tpetra::Map object associated with the range of this operator. - Teuchos::RCP getRangeMap() const; + // Debugging + // printMaps(comm, recvDatas, amgx2muelu, neighbors, *importer->getTargetMap(), "recv_map_vector"); - //! Returns in X the solution to the linear system AX=Y. - /*! - \param[out] X - Tpetra::MultiVector of dimension NumVectors containing the RHS of the linear system - \param[in] Y - Tpetra::MultiVector of dimension NumVectors containing the solution to the linear system - */ - void apply(const MultiVector& X, MultiVector& Y, Teuchos::ETransp mode = Teuchos::NO_TRANS, - SC alpha = Teuchos::ScalarTraits::one(), SC beta = Teuchos::ScalarTraits::zero()) const; + AMGX_SAFE_CALL(AMGX_matrix_comm_from_maps_one_ring(A_, 1, num_neighbors, neighbors, &send_sizes[0], &send_maps[0], &recv_sizes[0], &recv_maps[0])); - //! Indicates whether this operator supports applying the adjoint operator. - bool hasTransposeApply() const; - - RCP > GetHierarchy() const { - throw Exceptions::RuntimeError("AMGXOperator does not hold a MueLu::Hierarchy object \n"); + AMGX_vector_bind(X_, A_); + AMGX_vector_bind(Y_, A_); } - std::string filterValueToString(const Teuchos::ParameterEntry& entry ) { - return ( entry.isList() ? std::string("...") : toString(entry.getAny()) ); - } + RCP matrixTransformTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transform matrix"); + matrixTransformTimer->start(); - int sizeA() { - int sizeX, sizeY, n; - AMGX_matrix_get_size(A_, &n, &sizeX, &sizeY); - return n; - } + ArrayRCP ia_s; + ArrayRCP ja; + ArrayRCP a; + inA->getAllValues(ia_s, ja, a); - int iters() { - int it; - AMGX_solver_get_iterations_number(Solver_, &it); - return it; - } + ArrayRCP ia(ia_s.size()); + for (int i = 0; i < ia.size(); i++) + ia[i] = Teuchos::as(ia_s[i]); - AMGX_SOLVE_STATUS getStatus() { - AMGX_SOLVE_STATUS status; - AMGX_solver_get_status(Solver_, &status); - return status; - } + N_ = inA->getLocalNumRows(); + int nnz = inA->getLocalNumEntries(); + matrixTransformTimer->stop(); + matrixTransformTimer->incrementNumCalls(); - private: - AMGX_solver_handle Solver_; - AMGX_resources_handle Resources_; - AMGX_config_handle Config_; - AMGX_matrix_handle A_; - AMGX_vector_handle X_; - AMGX_vector_handle Y_; - int N_; + // Upload matrix + // TODO Do we need to pin memory here through AMGX_pin_memory? + RCP matrixTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer matrix CPU->GPU"); + matrixTimer->start(); + if (numProcs == 1) { + AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia[0], &ja[0], &a[0], NULL); - RCP domainMap_; - RCP rangeMap_; + } else { + // Transform the matrix + std::vector ia_new(ia.size()); + std::vector ja_new(ja.size()); + std::vector a_new(a.size()); - std::vector muelu2amgx_; + ia_new[0] = 0; + for (int i = 0; i < N_; i++) { + int oldRow = amgx2muelu[i]; - RCP vectorTimer1_; - RCP vectorTimer2_; - RCP solverTimer_; - }; + ia_new[i + 1] = ia_new[i] + (ia[oldRow + 1] - ia[oldRow]); -} // namespace + for (int j = ia[oldRow]; j < ia[oldRow + 1]; j++) { + int offset = j - ia[oldRow]; + ja_new[ia_new[i] + offset] = muelu2amgx_[ja[j]]; + a_new[ia_new[i] + offset] = a[j]; + } + // Do bubble sort on two arrays + // NOTE: There are multiple possible optimizations here (even of bubble sort) + bool swapped; + do { + swapped = false; + + for (int j = ia_new[i]; j < ia_new[i + 1] - 1; j++) + if (ja_new[j] > ja_new[j + 1]) { + std::swap(ja_new[j], ja_new[j + 1]); + std::swap(a_new[j], a_new[j + 1]); + swapped = true; + } + } while (swapped == true); + } -#endif //HAVE_MUELU_AMGX -#endif // MUELU_AMGXOPERATOR_DECL_HPP + AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia_new[0], &ja_new[0], &a_new[0], NULL); + } + matrixTimer->stop(); + matrixTimer->incrementNumCalls(); + + domainMap_ = inA->getDomainMap(); + rangeMap_ = inA->getRangeMap(); + + RCP realSetupTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: setup (total)"); + realSetupTimer->start(); + AMGX_solver_setup(Solver_, A_); + realSetupTimer->stop(); + realSetupTimer->incrementNumCalls(); + + vectorTimer1_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer vectors CPU->GPU"); + vectorTimer2_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer vector GPU->CPU"); + solverTimer_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: Solve (total)"); + } + + //! Destructor. + virtual ~AMGXOperator() { + // Comment this out if you need rebuild to work. This causes AMGX_solver_destroy memory issues. + AMGX_SAFE_CALL(AMGX_solver_destroy(Solver_)); + AMGX_SAFE_CALL(AMGX_vector_destroy(X_)); + AMGX_SAFE_CALL(AMGX_vector_destroy(Y_)); + AMGX_SAFE_CALL(AMGX_matrix_destroy(A_)); + AMGX_SAFE_CALL(AMGX_resources_destroy(Resources_)); + AMGX_SAFE_CALL(AMGX_config_destroy(Config_)); + } + + //! Returns the Tpetra::Map object associated with the domain of this operator. + Teuchos::RCP getDomainMap() const; + + //! Returns the Tpetra::Map object associated with the range of this operator. + Teuchos::RCP getRangeMap() const; + + //! Returns in X the solution to the linear system AX=Y. + /*! + \param[out] X - Tpetra::MultiVector of dimension NumVectors containing the RHS of the linear system + \param[in] Y - Tpetra::MultiVector of dimension NumVectors containing the solution to the linear system + */ + void apply(const MultiVector& X, MultiVector& Y, Teuchos::ETransp mode = Teuchos::NO_TRANS, + SC alpha = Teuchos::ScalarTraits::one(), SC beta = Teuchos::ScalarTraits::zero()) const; + + //! Indicates whether this operator supports applying the adjoint operator. + bool hasTransposeApply() const; + + RCP > GetHierarchy() const { + throw Exceptions::RuntimeError("AMGXOperator does not hold a MueLu::Hierarchy object \n"); + } + + std::string filterValueToString(const Teuchos::ParameterEntry& entry) { + return (entry.isList() ? std::string("...") : toString(entry.getAny())); + } + + int sizeA() { + int sizeX, sizeY, n; + AMGX_matrix_get_size(A_, &n, &sizeX, &sizeY); + return n; + } + + int iters() { + int it; + AMGX_solver_get_iterations_number(Solver_, &it); + return it; + } + + AMGX_SOLVE_STATUS getStatus() { + AMGX_SOLVE_STATUS status; + AMGX_solver_get_status(Solver_, &status); + return status; + } + + private: + AMGX_solver_handle Solver_; + AMGX_resources_handle Resources_; + AMGX_config_handle Config_; + AMGX_matrix_handle A_; + AMGX_vector_handle X_; + AMGX_vector_handle Y_; + int N_; + + RCP domainMap_; + RCP rangeMap_; + + std::vector muelu2amgx_; + + RCP vectorTimer1_; + RCP vectorTimer2_; + RCP solverTimer_; +}; + +} // namespace MueLu + +#endif // HAVE_MUELU_AMGX +#endif // MUELU_AMGXOPERATOR_DECL_HPP diff --git a/packages/muelu/adapters/amgx/MueLu_AMGXOperator_def.hpp b/packages/muelu/adapters/amgx/MueLu_AMGXOperator_def.hpp index 767ab6a540b9..afaee1ef36a9 100644 --- a/packages/muelu/adapters/amgx/MueLu_AMGXOperator_def.hpp +++ b/packages/muelu/adapters/amgx/MueLu_AMGXOperator_def.hpp @@ -47,100 +47,98 @@ #ifndef MUELU_AMGXOPERATOR_DEF_HPP #define MUELU_AMGXOPERATOR_DEF_HPP - -#if defined (HAVE_MUELU_AMGX) +#if defined(HAVE_MUELU_AMGX) #include "MueLu_AMGXOperator_decl.hpp" namespace MueLu { - template - Teuchos::RCP > - AMGXOperator::getDomainMap() const { - return domainMap_; - } +template +Teuchos::RCP > +AMGXOperator::getDomainMap() const { + return domainMap_; +} - template - Teuchos::RCP > AMGXOperator::getRangeMap() const { - return rangeMap_; - } +template +Teuchos::RCP > AMGXOperator::getRangeMap() const { + return rangeMap_; +} - template - void AMGXOperator::apply(const Tpetra::MultiVector& X, - Tpetra::MultiVector& Y, - Teuchos::ETransp mode, double alpha, double beta) const { +template +void AMGXOperator::apply(const Tpetra::MultiVector& X, + Tpetra::MultiVector& Y, + Teuchos::ETransp mode, double alpha, double beta) const { + RCP > comm = Y.getMap()->getComm(); - RCP > comm = Y.getMap()->getComm(); - - ArrayRCP mueluXdata, amgxXdata; - ArrayRCP mueluYdata, amgxYdata; + ArrayRCP mueluXdata, amgxXdata; + ArrayRCP mueluYdata, amgxYdata; - try { - for (int i = 0; i < (int)Y.getNumVectors(); i++) { - { - vectorTimer1_->start(); + try { + for (int i = 0; i < (int)Y.getNumVectors(); i++) { + { + vectorTimer1_->start(); - mueluXdata = X.getData(i); - mueluYdata = Y.getDataNonConst(i); + mueluXdata = X.getData(i); + mueluYdata = Y.getDataNonConst(i); - if (comm->getSize() == 1) { - amgxXdata = mueluXdata; - amgxYdata = mueluYdata; + if (comm->getSize() == 1) { + amgxXdata = mueluXdata; + amgxYdata = mueluYdata; - } else { - int n = mueluXdata.size(); + } else { + int n = mueluXdata.size(); - amgxXdata.resize(n); - amgxYdata.resize(n); + amgxXdata.resize(n); + amgxYdata.resize(n); - ArrayRCP amgxXdata_nonConst = Teuchos::arcp_const_cast(amgxXdata); - for (int j = 0; j < n; j++) { - amgxXdata_nonConst[muelu2amgx_[j]] = mueluXdata[j]; - amgxYdata [muelu2amgx_[j]] = mueluYdata[j]; - } + ArrayRCP amgxXdata_nonConst = Teuchos::arcp_const_cast(amgxXdata); + for (int j = 0; j < n; j++) { + amgxXdata_nonConst[muelu2amgx_[j]] = mueluXdata[j]; + amgxYdata[muelu2amgx_[j]] = mueluYdata[j]; } + } - AMGX_vector_upload(X_, N_, 1, &amgxXdata[0]); - AMGX_vector_upload(Y_, N_, 1, &amgxYdata[0]); + AMGX_vector_upload(X_, N_, 1, &amgxXdata[0]); + AMGX_vector_upload(Y_, N_, 1, &amgxYdata[0]); - vectorTimer1_->stop(); - vectorTimer1_->incrementNumCalls(); - } - - // Solve the system and time. - solverTimer_->start(); - AMGX_solver_solve(Solver_, X_, Y_); - solverTimer_->stop(); - solverTimer_->incrementNumCalls(); + vectorTimer1_->stop(); + vectorTimer1_->incrementNumCalls(); + } - { - vectorTimer2_->start(); + // Solve the system and time. + solverTimer_->start(); + AMGX_solver_solve(Solver_, X_, Y_); + solverTimer_->stop(); + solverTimer_->incrementNumCalls(); - AMGX_vector_download(Y_, &amgxYdata[0]); + { + vectorTimer2_->start(); - if (comm->getSize() > 1) { - int n = mueluYdata.size(); + AMGX_vector_download(Y_, &amgxYdata[0]); - for (int j = 0; j < n; j++) - mueluYdata[j] = amgxYdata[muelu2amgx_[j]]; - } + if (comm->getSize() > 1) { + int n = mueluYdata.size(); - vectorTimer2_->stop(); - vectorTimer2_->incrementNumCalls(); + for (int j = 0; j < n; j++) + mueluYdata[j] = amgxYdata[muelu2amgx_[j]]; } - } - } catch (std::exception& e) { - std::string errMsg = std::string("Caught an exception in MueLu::AMGXOperator::Apply():\n") + e.what() + "\n"; - throw Exceptions::RuntimeError(errMsg); + vectorTimer2_->stop(); + vectorTimer2_->incrementNumCalls(); + } } - } - template - bool AMGXOperator::hasTransposeApply() const { - return false; + } catch (std::exception& e) { + std::string errMsg = std::string("Caught an exception in MueLu::AMGXOperator::Apply():\n") + e.what() + "\n"; + throw Exceptions::RuntimeError(errMsg); } +} + +template +bool AMGXOperator::hasTransposeApply() const { + return false; +} -} // namespace -#endif //if defined(HAVE_MUELU_AMGX) +} // namespace MueLu +#endif // if defined(HAVE_MUELU_AMGX) -#endif //ifdef MUELU_AMGXOPERATOR_DEF_HPP +#endif // ifdef MUELU_AMGXOPERATOR_DEF_HPP diff --git a/packages/muelu/adapters/amgx/MueLu_AMGX_Setup.cpp b/packages/muelu/adapters/amgx/MueLu_AMGX_Setup.cpp index e0fe383ca373..2170d252c188 100644 --- a/packages/muelu/adapters/amgx/MueLu_AMGX_Setup.cpp +++ b/packages/muelu/adapters/amgx/MueLu_AMGX_Setup.cpp @@ -48,25 +48,20 @@ namespace MueLu { - void MueLu_AMGX_initialize() - { - AMGX_SAFE_CALL(AMGX_initialize()); - } +void MueLu_AMGX_initialize() { + AMGX_SAFE_CALL(AMGX_initialize()); +} - void MueLu_AMGX_initialize_plugins() - { - AMGX_SAFE_CALL(AMGX_initialize_plugins()); - } +void MueLu_AMGX_initialize_plugins() { + AMGX_SAFE_CALL(AMGX_initialize_plugins()); +} - void MueLu_AMGX_finalize() - { - AMGX_SAFE_CALL(AMGX_finalize()); - } - - void MueLu_AMGX_finalize_plugins() - { - AMGX_print_summary(); - AMGX_SAFE_CALL(AMGX_finalize_plugins()); - } -} // namespace +void MueLu_AMGX_finalize() { + AMGX_SAFE_CALL(AMGX_finalize()); +} +void MueLu_AMGX_finalize_plugins() { + AMGX_print_summary(); + AMGX_SAFE_CALL(AMGX_finalize_plugins()); +} +} // namespace MueLu diff --git a/packages/muelu/adapters/amgx/MueLu_AMGX_Setup.hpp b/packages/muelu/adapters/amgx/MueLu_AMGX_Setup.hpp index 6b645310dd7d..fe4c1c9791c9 100644 --- a/packages/muelu/adapters/amgx/MueLu_AMGX_Setup.hpp +++ b/packages/muelu/adapters/amgx/MueLu_AMGX_Setup.hpp @@ -51,12 +51,12 @@ namespace MueLu { - void MueLu_AMGX_initialize(); - void MueLu_AMGX_initialize_plugins(); +void MueLu_AMGX_initialize(); +void MueLu_AMGX_initialize_plugins(); - void MueLu_AMGX_finalize(); - void MueLu_AMGX_finalize_plugins(); -} +void MueLu_AMGX_finalize(); +void MueLu_AMGX_finalize_plugins(); +} // namespace MueLu -#endif //HAVE_MUELU_AMGX -#endif //ifndef MUELU_AMGX_SETUP_DEF_HPP +#endif // HAVE_MUELU_AMGX +#endif // ifndef MUELU_AMGX_SETUP_DEF_HPP diff --git a/packages/muelu/adapters/aztecoo/MueLu_AztecEpetraOperator.cpp b/packages/muelu/adapters/aztecoo/MueLu_AztecEpetraOperator.cpp index 515f950ced6e..a0e2efb6d066 100644 --- a/packages/muelu/adapters/aztecoo/MueLu_AztecEpetraOperator.cpp +++ b/packages/muelu/adapters/aztecoo/MueLu_AztecEpetraOperator.cpp @@ -6,7 +6,7 @@ #include "Xpetra_CrsMatrixWrap.hpp" #include "Xpetra_EpetraCrsMatrix.hpp" -#include "MueLu_config.hpp" // for HAVE_MUELU_DEBUG +#include "MueLu_config.hpp" // for HAVE_MUELU_DEBUG #include "MueLu_RefMaxwell.hpp" #include "MueLu_Exceptions.hpp" @@ -18,30 +18,30 @@ namespace MueLu { int AztecEpetraOperator::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { try { - // There is no rcpFromRef(const T&), so we need to do const_cast - const Xpetra::EpetraMultiVectorT eX(Teuchos::rcpFromRef(const_cast(X))); - Xpetra::EpetraMultiVectorT eY(Teuchos::rcpFromRef(Y)); - // Generally, we assume two different vectors, but AztecOO uses a single vector - if (X.Values() == Y.Values()) { - // X and Y point to the same memory, use an additional vector - Teuchos::RCP > tmpY = Teuchos::rcp(new Xpetra::EpetraMultiVectorT(eY.getMap(), eY.getNumVectors())); - tmpY->putScalar(0.0); - xOp_->apply(eX, *tmpY); - // deep copy solution from MueLu - eY.update(1.0, *tmpY, 0.0); - } else { - // X and Y point to different memory, pass the vectors through - eY.putScalar(0.0); - xOp_->apply(eX, eY); - } - - } catch (std::exception& e) { - //TODO: error msg directly on std::cerr? - std::cerr << "Caught an exception in MueLu::AztecEpetraOperator::ApplyInverse():" << std::endl - << e.what() << std::endl; - return -1; - } - return 0; + // There is no rcpFromRef(const T&), so we need to do const_cast + const Xpetra::EpetraMultiVectorT eX(Teuchos::rcpFromRef(const_cast(X))); + Xpetra::EpetraMultiVectorT eY(Teuchos::rcpFromRef(Y)); + // Generally, we assume two different vectors, but AztecOO uses a single vector + if (X.Values() == Y.Values()) { + // X and Y point to the same memory, use an additional vector + Teuchos::RCP> tmpY = Teuchos::rcp(new Xpetra::EpetraMultiVectorT(eY.getMap(), eY.getNumVectors())); + tmpY->putScalar(0.0); + xOp_->apply(eX, *tmpY); + // deep copy solution from MueLu + eY.update(1.0, *tmpY, 0.0); + } else { + // X and Y point to different memory, pass the vectors through + eY.putScalar(0.0); + xOp_->apply(eX, eY); + } + + } catch (std::exception& e) { + // TODO: error msg directly on std::cerr? + std::cerr << "Caught an exception in MueLu::AztecEpetraOperator::ApplyInverse():" << std::endl + << e.what() << std::endl; + return -1; + } + return 0; } const Epetra_Comm& AztecEpetraOperator::Comm() const { @@ -50,12 +50,12 @@ const Epetra_Comm& AztecEpetraOperator::Comm() const { } const Epetra_Map& AztecEpetraOperator::OperatorDomainMap() const { - if(Teuchos::rcp_dynamic_cast >(xOp_) != Teuchos::null) { - RCP > A = Teuchos::rcp_dynamic_cast >(xOp_)->getJacobian(); - RCP > crsOp = rcp_dynamic_cast >(A); + if (Teuchos::rcp_dynamic_cast>(xOp_) != Teuchos::null) { + RCP> A = Teuchos::rcp_dynamic_cast>(xOp_)->getJacobian(); + RCP> crsOp = rcp_dynamic_cast>(A); if (crsOp == Teuchos::null) throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed"); - const RCP> &tmp_ECrsMtx = rcp_dynamic_cast>(crsOp->getCrsMatrix()); + const RCP>& tmp_ECrsMtx = rcp_dynamic_cast>(crsOp->getCrsMatrix()); if (tmp_ECrsMtx == Teuchos::null) throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::EpetraCrsMatrix failed"); return tmp_ECrsMtx->getEpetra_CrsMatrixNonConst()->DomainMap(); @@ -67,14 +67,13 @@ const Epetra_Map& AztecEpetraOperator::OperatorDomainMap() const { return Xpetra::toEpetra(map); } -const Epetra_Map & AztecEpetraOperator::OperatorRangeMap() const { - - if(Teuchos::rcp_dynamic_cast >(xOp_) != Teuchos::null) { - RCP > A = Teuchos::rcp_dynamic_cast >(xOp_)->getJacobian(); - RCP > crsOp = rcp_dynamic_cast >(A); +const Epetra_Map& AztecEpetraOperator::OperatorRangeMap() const { + if (Teuchos::rcp_dynamic_cast>(xOp_) != Teuchos::null) { + RCP> A = Teuchos::rcp_dynamic_cast>(xOp_)->getJacobian(); + RCP> crsOp = rcp_dynamic_cast>(A); if (crsOp == Teuchos::null) throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed"); - const RCP> &tmp_ECrsMtx = rcp_dynamic_cast>(crsOp->getCrsMatrix()); + const RCP>& tmp_ECrsMtx = rcp_dynamic_cast>(crsOp->getCrsMatrix()); if (tmp_ECrsMtx == Teuchos::null) throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::EpetraCrsMatrix failed"); return tmp_ECrsMtx->getEpetra_CrsMatrixNonConst()->RangeMap(); @@ -86,7 +85,7 @@ const Epetra_Map & AztecEpetraOperator::OperatorRangeMap() const { return Xpetra::toEpetra(map); } -} +} // namespace MueLu #endif /*#if defined(HAVE_MUELU_SERIAL) and defined(HAVE_MUELU_EPETRA)*/ diff --git a/packages/muelu/adapters/aztecoo/MueLu_AztecEpetraOperator.hpp b/packages/muelu/adapters/aztecoo/MueLu_AztecEpetraOperator.hpp index 5a9fe3d7ef86..feca078f032e 100644 --- a/packages/muelu/adapters/aztecoo/MueLu_AztecEpetraOperator.hpp +++ b/packages/muelu/adapters/aztecoo/MueLu_AztecEpetraOperator.hpp @@ -16,107 +16,104 @@ namespace MueLu { Currently only used for RefMaxwell. */ - class AztecEpetraOperator : public Epetra_Operator { - typedef double SC; - typedef int LO; - typedef int GO; - typedef Xpetra::EpetraNode NO; +class AztecEpetraOperator : public Epetra_Operator { + typedef double SC; + typedef int LO; + typedef int GO; + typedef Xpetra::EpetraNode NO; - typedef Xpetra::Map Map; - typedef Xpetra::EpetraMapT EpetraMap; - typedef Xpetra::Operator Operator; + typedef Xpetra::Map Map; + typedef Xpetra::EpetraMapT EpetraMap; + typedef Xpetra::Operator Operator; - public: + public: + //! @name Constructor/Destructor + //@{ - //! @name Constructor/Destructor - //@{ + //! Constructor + AztecEpetraOperator(const Teuchos::RCP& Op) + : xOp_(Op) {} - //! Constructor - AztecEpetraOperator(const Teuchos::RCP& Op) : xOp_(Op) { } + //! Destructor. + virtual ~AztecEpetraOperator() {} - //! Destructor. - virtual ~AztecEpetraOperator() { } + //@} - //@} + int SetUseTranspose(bool /* UseTransposeBool */) { return -1; } - int SetUseTranspose(bool /* UseTransposeBool */) { return -1; } + //! @name Mathematical functions + //@{ - //! @name Mathematical functions - //@{ + //! Returns the result of a Epetra_Operator applied to a Epetra_MultiVector X in Y. + /*! + \param In + X - A Epetra_MultiVector of dimension NumVectors to multiply with matrix. + \param Out + Y -A Epetra_MultiVector of dimension NumVectors containing result. - //! Returns the result of a Epetra_Operator applied to a Epetra_MultiVector X in Y. - /*! - \param In - X - A Epetra_MultiVector of dimension NumVectors to multiply with matrix. - \param Out - Y -A Epetra_MultiVector of dimension NumVectors containing result. + \return Integer error code, set to 0 if successful. + */ + int Apply(const Epetra_MultiVector& /* X */, Epetra_MultiVector& /* Y */) const { return -1; } - \return Integer error code, set to 0 if successful. - */ - int Apply(const Epetra_MultiVector& /* X */, Epetra_MultiVector& /* Y */) const { return -1; } + //! Returns the result of a Epetra_Operator inverse applied to an Epetra_MultiVector X in Y. + /*! + \param In + X - A Epetra_MultiVector of dimension NumVectors to solve for. + \param Out + Y -A Epetra_MultiVector of dimension NumVectors containing result. - //! Returns the result of a Epetra_Operator inverse applied to an Epetra_MultiVector X in Y. - /*! - \param In - X - A Epetra_MultiVector of dimension NumVectors to solve for. - \param Out - Y -A Epetra_MultiVector of dimension NumVectors containing result. + \return Integer error code, set to 0 if successful. - \return Integer error code, set to 0 if successful. + \warning In order to work with AztecOO, any implementation of this method must + support the case where X and Y are the same object. + */ + int ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const; - \warning In order to work with AztecOO, any implementation of this method must - support the case where X and Y are the same object. - */ - int ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const; + //! Returns the infinity norm of the global matrix. + /* Returns the quantity \f$ \| A \|_\infty\f$ such that + \f[\| A \|_\infty = \max_{1\lei\lem} \sum_{j=1}^n |a_{ij}| \f]. - //! Returns the infinity norm of the global matrix. - /* Returns the quantity \f$ \| A \|_\infty\f$ such that - \f[\| A \|_\infty = \max_{1\lei\lem} \sum_{j=1}^n |a_{ij}| \f]. + \warning This method must not be called unless HasNormInf() returns true. + */ + double NormInf() const { return 0; } + //@} - \warning This method must not be called unless HasNormInf() returns true. - */ - double NormInf() const { return 0; } - //@} + //! @name Attribute access functions + //@{ - //! @name Attribute access functions - //@{ + //! Returns a character string describing the operator + const char* Label() const { return "MueLu::AztecEpetraOperator"; } - //! Returns a character string describing the operator - const char * Label() const { return "MueLu::AztecEpetraOperator"; } + //! Returns the current UseTranspose setting. + bool UseTranspose() const { return false; } - //! Returns the current UseTranspose setting. - bool UseTranspose() const { return false; } + //! Returns true if the \e this object can provide an approximate Inf-norm, false otherwise. + bool HasNormInf() const { return 0; } - //! Returns true if the \e this object can provide an approximate Inf-norm, false otherwise. - bool HasNormInf() const { return 0; } + //! Returns a pointer to the Epetra_Comm communicator associated with this operator. + const Epetra_Comm& Comm() const; - //! Returns a pointer to the Epetra_Comm communicator associated with this operator. - const Epetra_Comm & Comm() const; + //! Returns the Epetra_Map object associated with the domain of this operator. + const Epetra_Map& OperatorDomainMap() const; - //! Returns the Epetra_Map object associated with the domain of this operator. - const Epetra_Map & OperatorDomainMap() const; + //! Returns the Epetra_Map object associated with the range of this operator. + const Epetra_Map& OperatorRangeMap() const; - //! Returns the Epetra_Map object associated with the range of this operator. - const Epetra_Map & OperatorRangeMap() const; + //@} - //@} + //! @name MueLu specific + //@{ - //! @name MueLu specific - //@{ + //! Direct access to the underlying Xpetra::Operator. + Teuchos::RCP GetOperator() const { return xOp_; } - //! Direct access to the underlying Xpetra::Operator. - Teuchos::RCP GetOperator() const { return xOp_; } + //@} - //@} + private: + Teuchos::RCP xOp_; +}; - - private: - - Teuchos::RCP xOp_; - - }; - -} // namespace +} // namespace MueLu #endif diff --git a/packages/muelu/adapters/belos/BelosMueLuAdapter.hpp b/packages/muelu/adapters/belos/BelosMueLuAdapter.hpp index ca66967e24d9..1512269be928 100644 --- a/packages/muelu/adapters/belos/BelosMueLuAdapter.hpp +++ b/packages/muelu/adapters/belos/BelosMueLuAdapter.hpp @@ -67,397 +67,397 @@ #include "MueLu_Hierarchy.hpp" namespace Belos { - using Teuchos::RCP; - using Teuchos::rcpFromRef; +using Teuchos::RCP; +using Teuchos::rcpFromRef; - // - //! @name MueLu Adapter Exceptions - //@{ - - /** \brief MueLuOpFailure is thrown when a return value from an MueLu - * call on an Xpetra::Operator or MueLu::Hierarchy is non-zero. - */ - class MueLuOpFailure : public BelosError { - public: - MueLuOpFailure(const std::string& what_arg) : BelosError(what_arg) {} - }; - - /*! @class MueLuOp - * - * @brief MueLuOp derives from Belos::OperatorT and administrates a MueLu::Hierarchy. It implements the apply - * call which represents the effect of the multigrid preconditioner on a given vector. - * Note, in contrast to Belos::XpetraOp this operator has the multigrid hierarchy. - * - * The Belos::OperatorT class is a generalization of the Belos::Operator<> class, which - * deals with any kind of vector (not only Belos::MultiVec as the Belos::Operator<> interface does). - * - * This is the general implementation for Tpetra only. - */ - template - class MueLuOp : - public OperatorT > +// +//! @name MueLu Adapter Exceptions +//@{ + +/** \brief MueLuOpFailure is thrown when a return value from an MueLu + * call on an Xpetra::Operator or MueLu::Hierarchy is non-zero. + */ +class MueLuOpFailure : public BelosError { + public: + MueLuOpFailure(const std::string& what_arg) + : BelosError(what_arg) {} +}; + +/*! @class MueLuOp + * + * @brief MueLuOp derives from Belos::OperatorT and administrates a MueLu::Hierarchy. It implements the apply + * call which represents the effect of the multigrid preconditioner on a given vector. + * Note, in contrast to Belos::XpetraOp this operator has the multigrid hierarchy. + * + * The Belos::OperatorT class is a generalization of the Belos::Operator<> class, which + * deals with any kind of vector (not only Belos::MultiVec as the Belos::Operator<> interface does). + * + * This is the general implementation for Tpetra only. + */ +template +class MueLuOp : public OperatorT > #ifdef HAVE_XPETRA_TPETRA - , public OperatorT > + , + public OperatorT > #endif - { - public: - - //! @name Constructor/Destructor - //@{ +{ + public: + //! @name Constructor/Destructor + //@{ - //! Default constructor - MueLuOp(const RCP >&H) : Hierarchy_(H) {} + //! Default constructor + MueLuOp(const RCP >& H) + : Hierarchy_(H) {} #ifdef HAVE_MUELU_AMGX - MueLuOp(const RCP >& A) : AMGX_(A) {} + MueLuOp(const RCP >& A) + : AMGX_(A) {} #endif - //! Destructor. - virtual ~MueLuOp() {} - //@} - - //! @name Operator application method - //@{ + //! Destructor. + virtual ~MueLuOp() {} + //@} - /*! \brief This routine takes the Xpetra::MultiVector \c x and applies the operator - to it resulting in the Xpetra::MultiVector \c y, which is returned. - \note It is expected that any problem with applying this operator to \c x will be - indicated by an std::exception being thrown. - */ - void Apply(const Xpetra::MultiVector& x, Xpetra::MultiVector& y, ETrans trans = NOTRANS ) const { + //! @name Operator application method + //@{ - TEUCHOS_TEST_FOR_EXCEPTION(trans!=NOTRANS, MueLuOpFailure, - "Belos::MueLuOp::Apply, transpose mode != NOTRANS not supported by MueLu preconditionners."); + /*! \brief This routine takes the Xpetra::MultiVector \c x and applies the operator + to it resulting in the Xpetra::MultiVector \c y, which is returned. + \note It is expected that any problem with applying this operator to \c x will be + indicated by an std::exception being thrown. + */ + void Apply(const Xpetra::MultiVector& x, Xpetra::MultiVector& y, ETrans trans = NOTRANS) const { + TEUCHOS_TEST_FOR_EXCEPTION(trans != NOTRANS, MueLuOpFailure, + "Belos::MueLuOp::Apply, transpose mode != NOTRANS not supported by MueLu preconditionners."); - // This does not matter for Hierarchy, but matters for AMGX - y.putScalar(0.0); + // This does not matter for Hierarchy, but matters for AMGX + y.putScalar(0.0); #ifdef HAVE_MUELU_AMGX - if (!AMGX_.is_null()) { - Tpetra::MultiVector tX = Xpetra::toTpetra(x); - Tpetra::MultiVector tY = Xpetra::toTpetra(y); - - AMGX_->apply(tX, tY); + if (!AMGX_.is_null()) { + Tpetra::MultiVector tX = Xpetra::toTpetra(x); + Tpetra::MultiVector tY = Xpetra::toTpetra(y); - } -#endif - if (!Hierarchy_.is_null()) - Hierarchy_->Iterate(x, y, 1, true); + AMGX_->apply(tX, tY); } - //@} +#endif + if (!Hierarchy_.is_null()) + Hierarchy_->Iterate(x, y, 1, true); + } + //@} #ifdef HAVE_XPETRA_TPETRA - // TO SKIP THE TRAIT IMPLEMENTATION OF XPETRA::MULTIVECTOR - /*! \brief This routine takes the Tpetra::MultiVector \c x and applies the operator - to it resulting in the Tpetra::MultiVector \c y, which is returned. - \note It is expected that any problem with applying this operator to \c x will be - indicated by an std::exception being thrown. - */ - void Apply(const Tpetra::MultiVector& x, Tpetra::MultiVector& y, ETrans trans = NOTRANS ) const { - - TEUCHOS_TEST_FOR_EXCEPTION(trans!=NOTRANS, MueLuOpFailure, - "Belos::MueLuOp::Apply, transpose mode != NOTRANS not supported by MueLu preconditionners."); - - //FIXME InitialGuessIsZero currently does nothing in MueLu::Hierarchy.Iterate(), but it matters for AMGX - y.putScalar(0.0); + // TO SKIP THE TRAIT IMPLEMENTATION OF XPETRA::MULTIVECTOR + /*! \brief This routine takes the Tpetra::MultiVector \c x and applies the operator + to it resulting in the Tpetra::MultiVector \c y, which is returned. + \note It is expected that any problem with applying this operator to \c x will be + indicated by an std::exception being thrown. + */ + void Apply(const Tpetra::MultiVector& x, Tpetra::MultiVector& y, ETrans trans = NOTRANS) const { + TEUCHOS_TEST_FOR_EXCEPTION(trans != NOTRANS, MueLuOpFailure, + "Belos::MueLuOp::Apply, transpose mode != NOTRANS not supported by MueLu preconditionners."); + + // FIXME InitialGuessIsZero currently does nothing in MueLu::Hierarchy.Iterate(), but it matters for AMGX + y.putScalar(0.0); #ifdef HAVE_MUELU_AMGX - if (!AMGX_.is_null()) - AMGX_->apply(x, y); + if (!AMGX_.is_null()) + AMGX_->apply(x, y); #endif - if (!Hierarchy_.is_null()) { - Tpetra::MultiVector & temp_x = const_cast &>(x); + if (!Hierarchy_.is_null()) { + Tpetra::MultiVector& temp_x = const_cast&>(x); - const Xpetra::TpetraMultiVector tX(rcpFromRef(temp_x)); - Xpetra::TpetraMultiVector tY(rcpFromRef(y)); - Hierarchy_->Iterate(tX, tY, 1, true); - } + const Xpetra::TpetraMultiVector tX(rcpFromRef(temp_x)); + Xpetra::TpetraMultiVector tY(rcpFromRef(y)); + Hierarchy_->Iterate(tX, tY, 1, true); } + } #endif - private: - RCP > Hierarchy_; + private: + RCP > Hierarchy_; #ifdef HAVE_MUELU_AMGX - RCP > AMGX_; + RCP > AMGX_; #endif - }; +}; #ifdef HAVE_XPETRA_EPETRA #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES - /*! @class MueLuOp - * - * @brief MueLuOp derives from Belos::OperatorT and administrates a MueLu::Hierarchy. It implements the apply - * call which represents the effect of the multigrid preconditioner on a given vector. - * Note, in contrast to Belos::XpetraOp this operator has the multigrid hierarchy. - * - * The Belos::OperatorT class is a generalization of the Belos::Operator<> class, which - * deals with any kind of vector (not only Belos::MultiVec as the Belos::Operator<> interface does). - * - * This is the specialization for - */ - template <> - class MueLuOp : - public OperatorT > +/*! @class MueLuOp + * + * @brief MueLuOp derives from Belos::OperatorT and administrates a MueLu::Hierarchy. It implements the apply + * call which represents the effect of the multigrid preconditioner on a given vector. + * Note, in contrast to Belos::XpetraOp this operator has the multigrid hierarchy. + * + * The Belos::OperatorT class is a generalization of the Belos::Operator<> class, which + * deals with any kind of vector (not only Belos::MultiVec as the Belos::Operator<> interface does). + * + * This is the specialization for + */ +template <> +class MueLuOp : public OperatorT > #ifdef HAVE_XPETRA_TPETRA - // check whether Tpetra is instantiated on double,int,int,EpetraNode +// check whether Tpetra is instantiated on double,int,int,EpetraNode #if ((defined(EPETRA_HAVE_OMP) && (defined(HAVE_TPETRA_INST_OPENMP) && defined(HAVE_TPETRA_INST_INT_INT))) || \ - (!defined(EPETRA_HAVE_OMP) && (defined(HAVE_TPETRA_INST_SERIAL) && defined(HAVE_TPETRA_INST_INT_INT)))) - , public OperatorT > + (!defined(EPETRA_HAVE_OMP) && (defined(HAVE_TPETRA_INST_SERIAL) && defined(HAVE_TPETRA_INST_INT_INT)))) + , + public OperatorT > #endif #endif #ifdef HAVE_XPETRA_EPETRA - , public OperatorT - , public Belos::Operator + , + public OperatorT, + public Belos::Operator #endif - { - typedef double Scalar; - typedef int LocalOrdinal; - typedef int GlobalOrdinal; - typedef Xpetra::EpetraNode Node; - - public: - - MueLuOp(const RCP >& H) : Hierarchy_(H) {} +{ + typedef double Scalar; + typedef int LocalOrdinal; + typedef int GlobalOrdinal; + typedef Xpetra::EpetraNode Node; + + public: + MueLuOp(const RCP >& H) + : Hierarchy_(H) {} #ifdef HAVE_MUELU_AMGX - MueLuOp(const RCP >& A) : AMGX_(A) {} + MueLuOp(const RCP >& A) + : AMGX_(A) {} #endif - virtual ~MueLuOp() {} - - void Apply(const Xpetra::MultiVector& x, Xpetra::MultiVector& y, ETrans trans = NOTRANS ) const { + virtual ~MueLuOp() {} - TEUCHOS_TEST_FOR_EXCEPTION(trans != NOTRANS, MueLuOpFailure, - "Belos::MueLuOp::Apply, transpose mode != NOTRANS not supported by MueLu preconditionners."); + void Apply(const Xpetra::MultiVector& x, Xpetra::MultiVector& y, ETrans trans = NOTRANS) const { + TEUCHOS_TEST_FOR_EXCEPTION(trans != NOTRANS, MueLuOpFailure, + "Belos::MueLuOp::Apply, transpose mode != NOTRANS not supported by MueLu preconditionners."); - //FIXME InitialGuessIsZero currently does nothing in MueLu::Hierarchy.Iterate(), but it matters for AMGX - y.putScalar(0.0); + // FIXME InitialGuessIsZero currently does nothing in MueLu::Hierarchy.Iterate(), but it matters for AMGX + y.putScalar(0.0); #ifdef HAVE_MUELU_AMGX - if (!AMGX_.is_null()) { - Tpetra::MultiVector tX = Xpetra::toTpetra(x); - Tpetra::MultiVector tY = Xpetra::toTpetra(y); + if (!AMGX_.is_null()) { + Tpetra::MultiVector tX = Xpetra::toTpetra(x); + Tpetra::MultiVector tY = Xpetra::toTpetra(y); - AMGX_->apply(tX, tY); - } -#endif - if (!Hierarchy_.is_null()) - Hierarchy_->Iterate(x, y, 1, true); + AMGX_->apply(tX, tY); } +#endif + if (!Hierarchy_.is_null()) + Hierarchy_->Iterate(x, y, 1, true); + } #ifdef HAVE_XPETRA_TPETRA #if ((defined(EPETRA_HAVE_OMP) && (defined(HAVE_TPETRA_INST_OPENMP) && defined(HAVE_TPETRA_INST_INT_INT))) || \ - (!defined(EPETRA_HAVE_OMP) && (defined(HAVE_TPETRA_INST_SERIAL) && defined(HAVE_TPETRA_INST_INT_INT)))) - void Apply ( const Tpetra::MultiVector& x, Tpetra::MultiVector& y, ETrans trans=NOTRANS ) const { - TEUCHOS_TEST_FOR_EXCEPTION(trans != NOTRANS, MueLuOpFailure, - "Belos::MueLuOp::Apply, transpose mode != NOTRANS not supported by MueLu preconditionners."); + (!defined(EPETRA_HAVE_OMP) && (defined(HAVE_TPETRA_INST_SERIAL) && defined(HAVE_TPETRA_INST_INT_INT)))) + void Apply(const Tpetra::MultiVector& x, Tpetra::MultiVector& y, ETrans trans = NOTRANS) const { + TEUCHOS_TEST_FOR_EXCEPTION(trans != NOTRANS, MueLuOpFailure, + "Belos::MueLuOp::Apply, transpose mode != NOTRANS not supported by MueLu preconditionners."); - //FIXME InitialGuessIsZero currently does nothing in MueLu::Hierarchy.Iterate(), but it matters for AMGX - y.putScalar(0.0); + // FIXME InitialGuessIsZero currently does nothing in MueLu::Hierarchy.Iterate(), but it matters for AMGX + y.putScalar(0.0); #ifdef HAVE_MUELU_AMGX - if (!AMGX_.is_null()) - AMGX_->apply(x, y); + if (!AMGX_.is_null()) + AMGX_->apply(x, y); #endif - if (!Hierarchy_.is_null()) { - Tpetra::MultiVector & temp_x = const_cast &>(x); + if (!Hierarchy_.is_null()) { + Tpetra::MultiVector& temp_x = const_cast&>(x); - const Xpetra::TpetraMultiVector tX(rcpFromRef(temp_x)); - Xpetra::TpetraMultiVector tY(rcpFromRef(y)); + const Xpetra::TpetraMultiVector tX(rcpFromRef(temp_x)); + Xpetra::TpetraMultiVector tY(rcpFromRef(y)); - tY.putScalar(0.0); + tY.putScalar(0.0); - Hierarchy_->Iterate(tX, tY, 1, true); - } + Hierarchy_->Iterate(tX, tY, 1, true); } + } #endif #endif #ifdef HAVE_XPETRA_EPETRA - // TO SKIP THE TRAIT IMPLEMENTATION OF XPETRA::MULTIVECTOR - /*! \brief This routine takes the Tpetra::MultiVector \c x and applies the operator - to it resulting in the Tpetra::MultiVector \c y, which is returned. - \note It is expected that any problem with applying this operator to \c x will be - indicated by an std::exception being thrown. - */ - void Apply(const Epetra_MultiVector& x, Epetra_MultiVector& y, ETrans trans = NOTRANS) const { - TEUCHOS_TEST_FOR_EXCEPTION(trans != NOTRANS, MueLuOpFailure, - "Belos::MueLuOp::Apply, transpose mode != NOTRANS not supported by MueLu preconditionners."); - - Epetra_MultiVector& temp_x = const_cast(x); - - const Xpetra::EpetraMultiVectorT tX(rcpFromRef(temp_x)); - Xpetra::EpetraMultiVectorT tY(rcpFromRef(y)); - - //FIXME InitialGuessIsZero currently does nothing in MueLu::Hierarchy.Iterate(). - tY.putScalar(0.0); - - Hierarchy_->Iterate(tX, tY, 1, true); - } - - /*! \brief This routine takes the Belos::MultiVec \c x and applies the operator - to it resulting in the Belos::MultiVec \c y, which is returned. - \note It is expected that any problem with applying this operator to \c x will be - indicated by an std::exception being thrown. - */ - void Apply(const Belos::MultiVec& x, Belos::MultiVec& y, ETrans trans = NOTRANS ) const { - const Epetra_MultiVector* vec_x = dynamic_cast(&x); - Epetra_MultiVector* vec_y = dynamic_cast(&y); - - TEUCHOS_TEST_FOR_EXCEPTION(vec_x==NULL || vec_y==NULL, MueLuOpFailure, - "Belos::MueLuOp::Apply, x and/or y cannot be dynamic cast to an Epetra_MultiVector."); - - Apply(*vec_x, *vec_y, trans); - } + // TO SKIP THE TRAIT IMPLEMENTATION OF XPETRA::MULTIVECTOR + /*! \brief This routine takes the Tpetra::MultiVector \c x and applies the operator + to it resulting in the Tpetra::MultiVector \c y, which is returned. + \note It is expected that any problem with applying this operator to \c x will be + indicated by an std::exception being thrown. + */ + void Apply(const Epetra_MultiVector& x, Epetra_MultiVector& y, ETrans trans = NOTRANS) const { + TEUCHOS_TEST_FOR_EXCEPTION(trans != NOTRANS, MueLuOpFailure, + "Belos::MueLuOp::Apply, transpose mode != NOTRANS not supported by MueLu preconditionners."); + + Epetra_MultiVector& temp_x = const_cast(x); + + const Xpetra::EpetraMultiVectorT tX(rcpFromRef(temp_x)); + Xpetra::EpetraMultiVectorT tY(rcpFromRef(y)); + + // FIXME InitialGuessIsZero currently does nothing in MueLu::Hierarchy.Iterate(). + tY.putScalar(0.0); + + Hierarchy_->Iterate(tX, tY, 1, true); + } + + /*! \brief This routine takes the Belos::MultiVec \c x and applies the operator + to it resulting in the Belos::MultiVec \c y, which is returned. + \note It is expected that any problem with applying this operator to \c x will be + indicated by an std::exception being thrown. + */ + void Apply(const Belos::MultiVec& x, Belos::MultiVec& y, ETrans trans = NOTRANS) const { + const Epetra_MultiVector* vec_x = dynamic_cast(&x); + Epetra_MultiVector* vec_y = dynamic_cast(&y); + + TEUCHOS_TEST_FOR_EXCEPTION(vec_x == NULL || vec_y == NULL, MueLuOpFailure, + "Belos::MueLuOp::Apply, x and/or y cannot be dynamic cast to an Epetra_MultiVector."); + + Apply(*vec_x, *vec_y, trans); + } #endif - private: - RCP > Hierarchy_; + private: + RCP > Hierarchy_; #ifdef HAVE_MUELU_AMGX - RCP > AMGX_; + RCP > AMGX_; #endif - }; -#endif // !EPETRA_NO_32BIT_GLOBAL_INDICES -#endif // HAVE_XPETRA_EPETRA - +}; +#endif // !EPETRA_NO_32BIT_GLOBAL_INDICES +#endif // HAVE_XPETRA_EPETRA #ifdef HAVE_XPETRA_EPETRA -#ifndef EPETRA_NO_64BIT_GLOBAL_INDICES - /*! @class MueLuOp - * - * @brief MueLuOp derives from Belos::OperatorT and administrates a MueLu::Hierarchy. It implements the apply - * call which represents the effect of the multigrid preconditioner on a given vector. - * Note, in contrast to Belos::XpetraOp this operator has the multigrid hierarchy. - * - * The Belos::OperatorT class is a generalization of the Belos::Operator<> class, which - * deals with any kind of vector (not only Belos::MultiVec as the Belos::Operator<> interface does). - * - * This is the specialization for - */ - template <> - class MueLuOp : - public OperatorT > +#ifndef EPETRA_NO_64BIT_GLOBAL_INDICES +/*! @class MueLuOp + * + * @brief MueLuOp derives from Belos::OperatorT and administrates a MueLu::Hierarchy. It implements the apply + * call which represents the effect of the multigrid preconditioner on a given vector. + * Note, in contrast to Belos::XpetraOp this operator has the multigrid hierarchy. + * + * The Belos::OperatorT class is a generalization of the Belos::Operator<> class, which + * deals with any kind of vector (not only Belos::MultiVec as the Belos::Operator<> interface does). + * + * This is the specialization for + */ +template <> +class MueLuOp : public OperatorT > #ifdef HAVE_XPETRA_TPETRA - // check whether Tpetra is instantiated on double,int,int,EpetraNode +// check whether Tpetra is instantiated on double,int,int,EpetraNode #if ((defined(EPETRA_HAVE_OMP) && (defined(HAVE_TPETRA_INST_OPENMP) && defined(HAVE_TPETRA_INST_INT_LONG_LONG))) || \ - (!defined(EPETRA_HAVE_OMP) && (defined(HAVE_TPETRA_INST_SERIAL) && defined(HAVE_TPETRA_INST_INT_LONG_LONG)))) - , public OperatorT > + (!defined(EPETRA_HAVE_OMP) && (defined(HAVE_TPETRA_INST_SERIAL) && defined(HAVE_TPETRA_INST_INT_LONG_LONG)))) + , + public OperatorT > #endif #endif #ifdef HAVE_XPETRA_EPETRA - , public OperatorT - , public Belos::Operator + , + public OperatorT, + public Belos::Operator #endif - { - typedef double Scalar; - typedef int LocalOrdinal; - typedef long long GlobalOrdinal; - typedef Xpetra::EpetraNode Node; - - public: - - MueLuOp(const RCP >& H) : Hierarchy_(H) {} +{ + typedef double Scalar; + typedef int LocalOrdinal; + typedef long long GlobalOrdinal; + typedef Xpetra::EpetraNode Node; + + public: + MueLuOp(const RCP >& H) + : Hierarchy_(H) {} #ifdef HAVE_MUELU_AMGX - MueLuOp(const RCP >& A) : AMGX_(A) {} + MueLuOp(const RCP >& A) + : AMGX_(A) {} #endif - virtual ~MueLuOp() {} + virtual ~MueLuOp() {} - void Apply(const Xpetra::MultiVector& x, Xpetra::MultiVector& y, ETrans trans = NOTRANS ) const { + void Apply(const Xpetra::MultiVector& x, Xpetra::MultiVector& y, ETrans trans = NOTRANS) const { + TEUCHOS_TEST_FOR_EXCEPTION(trans != NOTRANS, MueLuOpFailure, + "Belos::MueLuOp::Apply, transpose mode != NOTRANS not supported by MueLu preconditionners."); - TEUCHOS_TEST_FOR_EXCEPTION(trans != NOTRANS, MueLuOpFailure, - "Belos::MueLuOp::Apply, transpose mode != NOTRANS not supported by MueLu preconditionners."); - - //FIXME InitialGuessIsZero currently does nothing in MueLu::Hierarchy.Iterate(), but it matters for AMGX - y.putScalar(0.0); + // FIXME InitialGuessIsZero currently does nothing in MueLu::Hierarchy.Iterate(), but it matters for AMGX + y.putScalar(0.0); #ifdef HAVE_MUELU_AMGX - if (!AMGX_.is_null()) { - Tpetra::MultiVector tX = Xpetra::toTpetra(x); - Tpetra::MultiVector tY = Xpetra::toTpetra(y); + if (!AMGX_.is_null()) { + Tpetra::MultiVector tX = Xpetra::toTpetra(x); + Tpetra::MultiVector tY = Xpetra::toTpetra(y); - AMGX_->apply(tX, tY); - } -#endif - if (!Hierarchy_.is_null()) - Hierarchy_->Iterate(x, y, 1, true); + AMGX_->apply(tX, tY); } +#endif + if (!Hierarchy_.is_null()) + Hierarchy_->Iterate(x, y, 1, true); + } #ifdef HAVE_XPETRA_TPETRA #if ((defined(EPETRA_HAVE_OMP) && (defined(HAVE_TPETRA_INST_OPENMP) && defined(HAVE_TPETRA_INST_INT_LONG_LONG))) || \ - (!defined(EPETRA_HAVE_OMP) && (defined(HAVE_TPETRA_INST_SERIAL) && defined(HAVE_TPETRA_INST_INT_LONG_LONG)))) - void Apply ( const Tpetra::MultiVector& x, Tpetra::MultiVector& y, ETrans trans=NOTRANS ) const { - TEUCHOS_TEST_FOR_EXCEPTION(trans != NOTRANS, MueLuOpFailure, - "Belos::MueLuOp::Apply, transpose mode != NOTRANS not supported by MueLu preconditionners."); + (!defined(EPETRA_HAVE_OMP) && (defined(HAVE_TPETRA_INST_SERIAL) && defined(HAVE_TPETRA_INST_INT_LONG_LONG)))) + void Apply(const Tpetra::MultiVector& x, Tpetra::MultiVector& y, ETrans trans = NOTRANS) const { + TEUCHOS_TEST_FOR_EXCEPTION(trans != NOTRANS, MueLuOpFailure, + "Belos::MueLuOp::Apply, transpose mode != NOTRANS not supported by MueLu preconditionners."); - //FIXME InitialGuessIsZero currently does nothing in MueLu::Hierarchy.Iterate(), but it matters for AMGX - y.putScalar(0.0); + // FIXME InitialGuessIsZero currently does nothing in MueLu::Hierarchy.Iterate(), but it matters for AMGX + y.putScalar(0.0); #ifdef HAVE_MUELU_AMGX - if (!AMGX_.is_null()) - AMGX_->apply(x, y); + if (!AMGX_.is_null()) + AMGX_->apply(x, y); #endif - if (!Hierarchy_.is_null()) { - Tpetra::MultiVector & temp_x = const_cast &>(x); + if (!Hierarchy_.is_null()) { + Tpetra::MultiVector& temp_x = const_cast&>(x); - const Xpetra::TpetraMultiVector tX(rcpFromRef(temp_x)); - Xpetra::TpetraMultiVector tY(rcpFromRef(y)); + const Xpetra::TpetraMultiVector tX(rcpFromRef(temp_x)); + Xpetra::TpetraMultiVector tY(rcpFromRef(y)); - tY.putScalar(0.0); + tY.putScalar(0.0); - Hierarchy_->Iterate(tX, tY, 1, true); - } + Hierarchy_->Iterate(tX, tY, 1, true); } + } #endif #endif #ifdef HAVE_XPETRA_EPETRA - // TO SKIP THE TRAIT IMPLEMENTATION OF XPETRA::MULTIVECTOR - /*! \brief This routine takes the Tpetra::MultiVector \c x and applies the operator - to it resulting in the Tpetra::MultiVector \c y, which is returned. - \note It is expected that any problem with applying this operator to \c x will be - indicated by an std::exception being thrown. - */ - void Apply(const Epetra_MultiVector& x, Epetra_MultiVector& y, ETrans trans = NOTRANS) const { - TEUCHOS_TEST_FOR_EXCEPTION(trans != NOTRANS, MueLuOpFailure, - "Belos::MueLuOp::Apply, transpose mode != NOTRANS not supported by MueLu preconditionners."); - - Epetra_MultiVector& temp_x = const_cast(x); - - const Xpetra::EpetraMultiVectorT tX(rcpFromRef(temp_x)); - Xpetra::EpetraMultiVectorT tY(rcpFromRef(y)); - - //FIXME InitialGuessIsZero currently does nothing in MueLu::Hierarchy.Iterate(). - tY.putScalar(0.0); - - Hierarchy_->Iterate(tX, tY, 1, true); - } - - /*! \brief This routine takes the Belos::MultiVec \c x and applies the operator - to it resulting in the Belos::MultiVec \c y, which is returned. - \note It is expected that any problem with applying this operator to \c x will be - indicated by an std::exception being thrown. - */ - void Apply(const Belos::MultiVec& x, Belos::MultiVec& y, ETrans trans = NOTRANS ) const { - const Epetra_MultiVector* vec_x = dynamic_cast(&x); - Epetra_MultiVector* vec_y = dynamic_cast(&y); - - TEUCHOS_TEST_FOR_EXCEPTION(vec_x==NULL || vec_y==NULL, MueLuOpFailure, - "Belos::MueLuOp::Apply, x and/or y cannot be dynamic cast to an Epetra_MultiVector."); - - Apply(*vec_x, *vec_y, trans); - } + // TO SKIP THE TRAIT IMPLEMENTATION OF XPETRA::MULTIVECTOR + /*! \brief This routine takes the Tpetra::MultiVector \c x and applies the operator + to it resulting in the Tpetra::MultiVector \c y, which is returned. + \note It is expected that any problem with applying this operator to \c x will be + indicated by an std::exception being thrown. + */ + void Apply(const Epetra_MultiVector& x, Epetra_MultiVector& y, ETrans trans = NOTRANS) const { + TEUCHOS_TEST_FOR_EXCEPTION(trans != NOTRANS, MueLuOpFailure, + "Belos::MueLuOp::Apply, transpose mode != NOTRANS not supported by MueLu preconditionners."); + + Epetra_MultiVector& temp_x = const_cast(x); + + const Xpetra::EpetraMultiVectorT tX(rcpFromRef(temp_x)); + Xpetra::EpetraMultiVectorT tY(rcpFromRef(y)); + + // FIXME InitialGuessIsZero currently does nothing in MueLu::Hierarchy.Iterate(). + tY.putScalar(0.0); + + Hierarchy_->Iterate(tX, tY, 1, true); + } + + /*! \brief This routine takes the Belos::MultiVec \c x and applies the operator + to it resulting in the Belos::MultiVec \c y, which is returned. + \note It is expected that any problem with applying this operator to \c x will be + indicated by an std::exception being thrown. + */ + void Apply(const Belos::MultiVec& x, Belos::MultiVec& y, ETrans trans = NOTRANS) const { + const Epetra_MultiVector* vec_x = dynamic_cast(&x); + Epetra_MultiVector* vec_y = dynamic_cast(&y); + + TEUCHOS_TEST_FOR_EXCEPTION(vec_x == NULL || vec_y == NULL, MueLuOpFailure, + "Belos::MueLuOp::Apply, x and/or y cannot be dynamic cast to an Epetra_MultiVector."); + + Apply(*vec_x, *vec_y, trans); + } #endif - private: - RCP > Hierarchy_; + private: + RCP > Hierarchy_; #ifdef HAVE_MUELU_AMGX - RCP > AMGX_; + RCP > AMGX_; #endif - }; -#endif // !EPETRA_NO_64BIT_GLOBAL_INDICES -#endif // HAVE_XPETRA_EPETRA -} // namespace Belos +}; +#endif // !EPETRA_NO_64BIT_GLOBAL_INDICES +#endif // HAVE_XPETRA_EPETRA +} // namespace Belos -#endif // BELOS_MUELU_ADAPTER_HPP +#endif // BELOS_MUELU_ADAPTER_HPP diff --git a/packages/muelu/adapters/belos/BelosXpetraStatusTestGenResSubNorm.hpp b/packages/muelu/adapters/belos/BelosXpetraStatusTestGenResSubNorm.hpp index 6f3f401041c2..355d2e346eea 100644 --- a/packages/muelu/adapters/belos/BelosXpetraStatusTestGenResSubNorm.hpp +++ b/packages/muelu/adapters/belos/BelosXpetraStatusTestGenResSubNorm.hpp @@ -59,27 +59,25 @@ #include #include - namespace Belos { /*! \brief Template specialization of Belos::StatusTestGenResSubNorm class using the * Xpetra::MultiVector and Belos::OperatorT MueLu adapter class. */ template -class StatusTestGenResSubNorm,Belos::OperatorT > > - : public StatusTestResNorm,Belos::OperatorT > > { - +class StatusTestGenResSubNorm, Belos::OperatorT > > + : public StatusTestResNorm, Belos::OperatorT > > { public: // Convenience typedefs - typedef Xpetra::MultiVector MV; - typedef Xpetra::BlockedCrsMatrix BCRS; - typedef Xpetra::MapExtractor ME; + typedef Xpetra::MultiVector MV; + typedef Xpetra::BlockedCrsMatrix BCRS; + typedef Xpetra::MapExtractor ME; typedef Belos::OperatorT OP; typedef Teuchos::ScalarTraits SCT; typedef typename SCT::magnitudeType MagnitudeType; - typedef MultiVecTraits MVT; - typedef OperatorTraits OT; + typedef MultiVecTraits MVT; + typedef OperatorTraits OT; //! @name Constructors/destructors. //@{ @@ -96,27 +94,27 @@ class StatusTestGenResSubNorm::one ()), - status_(Undefined), - curBlksz_(0), - curNumRHS_(0), - curLSNum_(0), - numrhs_(0), - firstcallCheckStatus_(true), - firstcallDefineResForm_(true), - firstcallDefineScaleForm_(true), - mapExtractor_(Teuchos::null) { } + StatusTestGenResSubNorm(MagnitudeType Tolerance, size_t subIdx, int quorum = -1, bool showMaxResNormOnly = false) + : tolerance_(Tolerance) + , subIdx_(subIdx) + , quorum_(quorum) + , showMaxResNormOnly_(showMaxResNormOnly) + , resnormtype_(TwoNorm) + , scaletype_(NormOfInitRes) + , scalenormtype_(TwoNorm) + , scalevalue_(Teuchos::ScalarTraits::one()) + , status_(Undefined) + , curBlksz_(0) + , curNumRHS_(0) + , curLSNum_(0) + , numrhs_(0) + , firstcallCheckStatus_(true) + , firstcallDefineResForm_(true) + , firstcallDefineScaleForm_(true) + , mapExtractor_(Teuchos::null) {} //! Destructor - virtual ~StatusTestGenResSubNorm() { }; + virtual ~StatusTestGenResSubNorm(){}; //@} //! @name Form and parameter definition methods. @@ -130,13 +128,13 @@ class StatusTestGenResSubNorm */ int defineResForm(NormType TypeOfNorm) { - TEUCHOS_TEST_FOR_EXCEPTION(firstcallDefineResForm_==false,StatusTestError, - "StatusTestGenResSubNorm::defineResForm(): The residual form has already been defined."); + TEUCHOS_TEST_FOR_EXCEPTION(firstcallDefineResForm_ == false, StatusTestError, + "StatusTestGenResSubNorm::defineResForm(): The residual form has already been defined."); firstcallDefineResForm_ = false; resnormtype_ = TypeOfNorm; - return(0); + return (0); } //! Define form of the scaling, its norm, its optional weighting std::vector, or, alternatively, define an explicit value. @@ -160,35 +158,47 @@ class StatusTestGenResSubNorm */ - int defineScaleForm( ScaleType TypeOfScaling, NormType TypeOfNorm, MagnitudeType ScaleValue = Teuchos::ScalarTraits::one()) { - TEUCHOS_TEST_FOR_EXCEPTION(firstcallDefineScaleForm_==false,StatusTestError, - "StatusTestGenResSubNorm::defineScaleForm(): The scaling type has already been defined."); + int defineScaleForm(ScaleType TypeOfScaling, NormType TypeOfNorm, MagnitudeType ScaleValue = Teuchos::ScalarTraits::one()) { + TEUCHOS_TEST_FOR_EXCEPTION(firstcallDefineScaleForm_ == false, StatusTestError, + "StatusTestGenResSubNorm::defineScaleForm(): The scaling type has already been defined."); firstcallDefineScaleForm_ = false; - scaletype_ = TypeOfScaling; + scaletype_ = TypeOfScaling; scalenormtype_ = TypeOfNorm; - scalevalue_ = ScaleValue; + scalevalue_ = ScaleValue; - return(0); + return (0); } //! Set the value of the tolerance /*! We allow the tolerance to be reset for cases where, in the process of testing the residual, we find that the initial tolerance was too tight or too lax. */ - int setTolerance(MagnitudeType tolerance) {tolerance_ = tolerance; return(0);} + int setTolerance(MagnitudeType tolerance) { + tolerance_ = tolerance; + return (0); + } //! Set the block index of which we want to check the norm of the sub-residuals /*! It does not really make sense to change/reset the index during the solution process */ - int setSubIdx ( size_t subIdx ) { subIdx_ = subIdx; return(0);} + int setSubIdx(size_t subIdx) { + subIdx_ = subIdx; + return (0); + } //! Sets the number of residuals that must pass the convergence test before Passed is returned. //! \note If \c quorum=-1 then all residuals must pass the convergence test before Passed is returned. - int setQuorum(int quorum) {quorum_ = quorum; return(0);} + int setQuorum(int quorum) { + quorum_ = quorum; + return (0); + } //! Set whether the only maximum residual norm is displayed when the print() method is called - int setShowMaxResNormOnly(bool showMaxResNormOnly) {showMaxResNormOnly_ = showMaxResNormOnly; return(0);} + int setShowMaxResNormOnly(bool showMaxResNormOnly) { + showMaxResNormOnly_ = showMaxResNormOnly; + return (0); + } //@} @@ -201,55 +211,57 @@ class StatusTestGenResSubNorm* iSolver) { - MagnitudeType zero = Teuchos::ScalarTraits::zero(); - const LinearProblem& lp = iSolver->getProblem(); + StatusType checkStatus(Iteration* iSolver) { + MagnitudeType zero = Teuchos::ScalarTraits::zero(); + const LinearProblem& lp = iSolver->getProblem(); // Compute scaling term (done once for each block that's being solved) if (firstcallCheckStatus_) { StatusType status = firstCallCheckStatusSetup(iSolver); - if(status==Failed) { + if (status == Failed) { status_ = Failed; - return(status_); + return (status_); } } // // This section computes the norm of the residual std::vector // - if ( curLSNum_ != lp.getLSNumber() ) { + if (curLSNum_ != lp.getLSNumber()) { // // We have moved on to the next rhs block // - curLSNum_ = lp.getLSNumber(); - curLSIdx_ = lp.getLSIndex(); - curBlksz_ = (int)curLSIdx_.size(); + curLSNum_ = lp.getLSNumber(); + curLSIdx_ = lp.getLSIndex(); + curBlksz_ = (int)curLSIdx_.size(); int validLS = 0; - for (int i=0; i -1 && curLSIdx_[i] < numrhs_) validLS++; } curNumRHS_ = validLS; - curSoln_ = Teuchos::null; + curSoln_ = Teuchos::null; // } else { // // We are in the same rhs block, return if we are converged // - if (status_==Passed) { return status_; } + if (status_ == Passed) { + return status_; + } } // // Request the true residual for this block of right-hand sides. // Teuchos::RCP cur_update = iSolver->getCurrentUpdate(); - curSoln_ = lp.updateSolution( cur_update ); - Teuchos::RCP cur_res = MVT::Clone( *curSoln_, MVT::GetNumberVecs( *curSoln_ ) ); - lp.computeCurrResVec( &*cur_res, &*curSoln_ ); - std::vector tmp_resvector( MVT::GetNumberVecs( *cur_res ) ); - MvSubNorm( *cur_res, subIdx_, tmp_resvector, resnormtype_ ); + curSoln_ = lp.updateSolution(cur_update); + Teuchos::RCP cur_res = MVT::Clone(*curSoln_, MVT::GetNumberVecs(*curSoln_)); + lp.computeCurrResVec(&*cur_res, &*curSoln_); + std::vector tmp_resvector(MVT::GetNumberVecs(*cur_res)); + MvSubNorm(*cur_res, subIdx_, tmp_resvector, resnormtype_); typename std::vector::iterator p = curLSIdx_.begin(); - for (int i=0; p 0 ) { + if (scalevector_.size() > 0) { typename std::vector::iterator pp = curLSIdx_.begin(); - for (; pp::iterator pp = curLSIdx_.begin(); - for (; pp::iterator p2 = curLSIdx_.begin(); - for (; p2 tolerance_) { + if (testvector_[*p2] > tolerance_) { // do nothing. - } else if (testvector_[ *p2 ] == Teuchos::ScalarTraits::magnitude(Teuchos::ScalarTraits::zero())) { + } else if (testvector_[*p2] == Teuchos::ScalarTraits::magnitude(Teuchos::ScalarTraits::zero())) { reset(); - } else if (testvector_[ *p2 ] <= tolerance_) { + } else if (testvector_[*p2] <= tolerance_) { ind_[have] = *p2; have++; } else { // Throw an std::exception if a NaN is found. status_ = Failed; - TEUCHOS_TEST_FOR_EXCEPTION(true,StatusTestError,"StatusTestGenResSubNorm::checkStatus(): NaN has been detected."); + TEUCHOS_TEST_FOR_EXCEPTION(true, StatusTestError, "StatusTestGenResSubNorm::checkStatus(): NaN has been detected."); } } } ind_.resize(have); - int need = (quorum_ == -1) ? curNumRHS_: quorum_; - status_ = (have >= need) ? Passed : Failed; + int need = (quorum_ == -1) ? curNumRHS_ : quorum_; + status_ = (have >= need) ? Passed : Failed; // Return the current status return status_; } //! Return the result of the most recent CheckStatus call. - StatusType getStatus() const {return(status_);}; + StatusType getStatus() const { return (status_); }; //@} //! @name Reset methods @@ -320,14 +331,14 @@ class StatusTestGenResSubNorm " ) << tolerance_ << std::endl; - } - else { - for ( int i=0; i ") << tolerance_ << std::endl; + } else { + for (int i = 0; i < numrhs_; i++) { + for (int j = 0; j < indent + 13; j++) os << ' '; - os << "residual [ " << i << " ] = " << testvector_[ i ]; - os << ((testvector_[i]tolerance_) ? " > " : " " ) << tolerance_ << std::endl; + os << "residual [ " << i << " ] = " << testvector_[i]; + os << ((testvector_[i] < tolerance_) ? " < " : (testvector_[i] == tolerance_) ? " == " + : (testvector_[i] > tolerance_) ? " > " + : " ") + << tolerance_ << std::endl; } } } @@ -371,19 +383,19 @@ class StatusTestGenResSubNorm convIndices() { return ind_; } //! Returns the value of the tolerance, \f$ \tau \f$, set in the constructor. - MagnitudeType getTolerance() const {return(tolerance_);}; + MagnitudeType getTolerance() const { return (tolerance_); }; //! Returns the test value, \f$ \frac{\|r\|}{\sigma} \f$, computed in most recent call to CheckStatus. - const std::vector* getTestValue() const {return(&testvector_);}; + const std::vector* getTestValue() const { return (&testvector_); }; //! Returns the residual norm value, \f$ \|r\| \f$, computed in most recent call to CheckStatus. - const std::vector* getResNormValue() const {return(&resvector_);}; + const std::vector* getResNormValue() const { return (&resvector_); }; //! Returns the scaled norm value, \f$ \sigma \f$. - const std::vector* getScaledNormValue() const {return(&scalevector_);}; + const std::vector* getScaledNormValue() const { return (&scalevector_); }; //! Returns a boolean indicating a loss of accuracy has been detected in computing the residual. //! \note This status test does not check for loss of accuracy, so this method will always return false. @@ -424,7 +436,6 @@ class StatusTestGenResSubNormgetScaledNormValue() can be called * to get the scaling std::vector. */ - StatusType firstCallCheckStatusSetup(Iteration* iSolver) { + StatusType firstCallCheckStatusSetup(Iteration* iSolver) { int i; - MagnitudeType zero = Teuchos::ScalarTraits::zero(); - MagnitudeType one = Teuchos::ScalarTraits::one(); - const LinearProblem& lp = iSolver->getProblem(); + MagnitudeType zero = Teuchos::ScalarTraits::zero(); + MagnitudeType one = Teuchos::ScalarTraits::one(); + const LinearProblem& lp = iSolver->getProblem(); // Compute scaling term (done once for each block that's being solved) if (firstcallCheckStatus_) { // @@ -447,9 +458,9 @@ class StatusTestGenResSubNorm Op = lp.getOperator(); - Teuchos::RCP > xOp = - Teuchos::rcp_dynamic_cast >(Op); - TEUCHOS_TEST_FOR_EXCEPTION(xOp.is_null(), MueLu::Exceptions::BadCast, "Bad cast from \'const Belos::OperatorT\' to \'const Belos::XpetraOp\'. The origin type is " << typeid(const OP).name() << "."); + Teuchos::RCP > xOp = + Teuchos::rcp_dynamic_cast >(Op); + TEUCHOS_TEST_FOR_EXCEPTION(xOp.is_null(), MueLu::Exceptions::BadCast, "Bad cast from \'const Belos::OperatorT\' to \'const Belos::XpetraOp\'. The origin type is " << typeid(const OP).name() << "."); Teuchos::RCP > xIntOp = xOp->getOperator(); TEUCHOS_TEST_FOR_EXCEPTION(xIntOp.is_null(), MueLu::Exceptions::BadCast, "Cannot access Xpetra::Operator stored in Belos::XpetraOperator."); @@ -457,77 +468,72 @@ class StatusTestGenResSubNorm >(xIntOp); TEUCHOS_TEST_FOR_EXCEPTION(xMat.is_null(), MueLu::Exceptions::RuntimeError, "Cannot access Xpetra::Matrix stored in Belos::XpetraOp. Error."); Teuchos::RCP > bMat = Teuchos::rcp_dynamic_cast >(xMat); - TEUCHOS_TEST_FOR_EXCEPTION(bMat.is_null(), MueLu::Exceptions::BadCast, "Bad cast from \'const Xpetra::Matrix\' to \'const Xpetra::BlockedCrsMatrix\'. The origin type is " << typeid(const Xpetra::Matrix).name() << ". Note: you need a BlockedCrsMatrix object for the StatusTestGenResSubNorm to work!"); + TEUCHOS_TEST_FOR_EXCEPTION(bMat.is_null(), MueLu::Exceptions::BadCast, "Bad cast from \'const Xpetra::Matrix\' to \'const Xpetra::BlockedCrsMatrix\'. The origin type is " << typeid(const Xpetra::Matrix).name() << ". Note: you need a BlockedCrsMatrix object for the StatusTestGenResSubNorm to work!"); mapExtractor_ = bMat->getRangeMapExtractor(); TEUCHOS_TEST_FOR_EXCEPTION(mapExtractor_.is_null(), MueLu::Exceptions::RuntimeError, "Could not extract map extractor from BlockedCrsMatrix. Error."); - TEUCHOS_TEST_FOR_EXCEPTION(mapExtractor_->NumMaps()<=subIdx_, MueLu::Exceptions::RuntimeError, "The multivector is only split into " << mapExtractor_->NumMaps() << " sub parts. Cannot access sub-block " << subIdx_ << "."); + TEUCHOS_TEST_FOR_EXCEPTION(mapExtractor_->NumMaps() <= subIdx_, MueLu::Exceptions::RuntimeError, "The multivector is only split into " << mapExtractor_->NumMaps() << " sub parts. Cannot access sub-block " << subIdx_ << "."); // calculate initial norms - if (scaletype_== NormOfRHS) { + if (scaletype_ == NormOfRHS) { Teuchos::RCP rhs = lp.getRHS(); - numrhs_ = MVT::GetNumberVecs( *rhs ); - scalevector_.resize( numrhs_ ); - MvSubNorm( *rhs, subIdx_, scalevector_, scalenormtype_ ); - } - else if (scaletype_==NormOfInitRes) { + numrhs_ = MVT::GetNumberVecs(*rhs); + scalevector_.resize(numrhs_); + MvSubNorm(*rhs, subIdx_, scalevector_, scalenormtype_); + } else if (scaletype_ == NormOfInitRes) { Teuchos::RCP init_res = lp.getInitResVec(); - numrhs_ = MVT::GetNumberVecs( *init_res ); - scalevector_.resize( numrhs_ ); - MvSubNorm( *init_res, subIdx_, scalevector_, scalenormtype_ ); - } - else if (scaletype_==NormOfPrecInitRes) { + numrhs_ = MVT::GetNumberVecs(*init_res); + scalevector_.resize(numrhs_); + MvSubNorm(*init_res, subIdx_, scalevector_, scalenormtype_); + } else if (scaletype_ == NormOfPrecInitRes) { Teuchos::RCP init_res = lp.getInitPrecResVec(); - numrhs_ = MVT::GetNumberVecs( *init_res ); - scalevector_.resize( numrhs_ ); - MvSubNorm( *init_res, subIdx_, scalevector_, scalenormtype_ ); - } - else if (scaletype_==NormOfFullInitRes) { + numrhs_ = MVT::GetNumberVecs(*init_res); + scalevector_.resize(numrhs_); + MvSubNorm(*init_res, subIdx_, scalevector_, scalenormtype_); + } else if (scaletype_ == NormOfFullInitRes) { Teuchos::RCP init_res = lp.getInitResVec(); - numrhs_ = MVT::GetNumberVecs( *init_res ); - scalevector_.resize( numrhs_ ); - MVT::MvNorm( *init_res, scalevector_, scalenormtype_ ); + numrhs_ = MVT::GetNumberVecs(*init_res); + scalevector_.resize(numrhs_); + MVT::MvNorm(*init_res, scalevector_, scalenormtype_); scalevalue_ = one; - } - else if (scaletype_==NormOfFullPrecInitRes) { + } else if (scaletype_ == NormOfFullPrecInitRes) { Teuchos::RCP init_res = lp.getInitPrecResVec(); - numrhs_ = MVT::GetNumberVecs( *init_res ); - scalevector_.resize( numrhs_ ); - MVT::MvNorm( *init_res, scalevector_, scalenormtype_ ); + numrhs_ = MVT::GetNumberVecs(*init_res); + scalevector_.resize(numrhs_); + MVT::MvNorm(*init_res, scalevector_, scalenormtype_); scalevalue_ = one; - } - else if (scaletype_==NormOfFullScaledInitRes) { + } else if (scaletype_ == NormOfFullScaledInitRes) { Teuchos::RCP init_res = lp.getInitResVec(); - numrhs_ = MVT::GetNumberVecs( *init_res ); - scalevector_.resize( numrhs_ ); - MVT::MvNorm( *init_res, scalevector_, scalenormtype_ ); - MvScalingRatio( *init_res, subIdx_, scalevalue_ ); - } - else if (scaletype_==NormOfFullScaledPrecInitRes) { + numrhs_ = MVT::GetNumberVecs(*init_res); + scalevector_.resize(numrhs_); + MVT::MvNorm(*init_res, scalevector_, scalenormtype_); + MvScalingRatio(*init_res, subIdx_, scalevalue_); + } else if (scaletype_ == NormOfFullScaledPrecInitRes) { Teuchos::RCP init_res = lp.getInitPrecResVec(); - numrhs_ = MVT::GetNumberVecs( *init_res ); - scalevector_.resize( numrhs_ ); - MVT::MvNorm( *init_res, scalevector_, scalenormtype_ ); - MvScalingRatio( *init_res, subIdx_, scalevalue_ ); - } - else { - numrhs_ = MVT::GetNumberVecs( *(lp.getRHS()) ); + numrhs_ = MVT::GetNumberVecs(*init_res); + scalevector_.resize(numrhs_); + MVT::MvNorm(*init_res, scalevector_, scalenormtype_); + MvScalingRatio(*init_res, subIdx_, scalevalue_); + } else { + numrhs_ = MVT::GetNumberVecs(*(lp.getRHS())); } - resvector_.resize( numrhs_ ); - testvector_.resize( numrhs_ ); + resvector_.resize(numrhs_); + testvector_.resize(numrhs_); - curLSNum_ = lp.getLSNumber(); - curLSIdx_ = lp.getLSIndex(); - curBlksz_ = (int)curLSIdx_.size(); + curLSNum_ = lp.getLSNumber(); + curLSIdx_ = lp.getLSIndex(); + curBlksz_ = (int)curLSIdx_.size(); int validLS = 0; - for (i=0; i -1 && curLSIdx_[i] < numrhs_) validLS++; } curNumRHS_ = validLS; // // Initialize the testvector. - for (i=0; i: " << resFormStr(); oss << ", tol = " << tolerance_; @@ -552,43 +557,41 @@ class StatusTestGenResSubNorm::magnitudeType>& normVec, NormType type = TwoNorm) { - + void MvSubNorm(const MV& mv, size_t block, std::vector::magnitudeType>& normVec, NormType type = TwoNorm) { Teuchos::RCP input = Teuchos::rcpFromRef(mv); Teuchos::RCP SubVec = mapExtractor_->ExtractVector(input, block); - MVT::MvNorm(*SubVec,normVec,type); + MVT::MvNorm(*SubVec, normVec, type); } // calculate ration of sub-vector length to full vector length (for scalevalue_) - void MvScalingRatio( const MV& mv, size_t block, MagnitudeType& lengthRatio) { + void MvScalingRatio(const MV& mv, size_t block, MagnitudeType& lengthRatio) { Teuchos::RCP input = Teuchos::rcpFromRef(mv); Teuchos::RCP SubVec = mapExtractor_->ExtractVector(input, block); @@ -697,9 +699,8 @@ class StatusTestGenResSubNorm mapExtractor_; //@} - }; -} // namespace Belos +} // namespace Belos #endif /* BELOS_XPETRA_STATUS_TEST_GEN_RES_SUB_NORM_HPP */ diff --git a/packages/muelu/adapters/epetra/MueLu_CreateEpetraPreconditioner.cpp b/packages/muelu/adapters/epetra/MueLu_CreateEpetraPreconditioner.cpp index 8d5ed9dbdb8b..6536d40be90b 100644 --- a/packages/muelu/adapters/epetra/MueLu_CreateEpetraPreconditioner.cpp +++ b/packages/muelu/adapters/epetra/MueLu_CreateEpetraPreconditioner.cpp @@ -22,108 +22,104 @@ #if defined(HAVE_MUELU_EPETRA) namespace MueLu { - /*! - @brief Helper function to create a MueLu preconditioner that can be used by Epetra. - @ingroup MueLuAdapters - Given a EpetraCrs_Matrix, this function returns a constructed MueLu preconditioner. - @param[in] inA Matrix - @param[in] paramListIn Parameter list - */ - Teuchos::RCP - CreateEpetraPreconditioner(const Teuchos::RCP& inA, - // FIXME: why is it non-const - Teuchos::ParameterList& paramListIn) - { - using SC = double; - using LO = int; - using GO = int; - using NO = Xpetra::EpetraNode; - - using Teuchos::ParameterList; - - using MultiVector = Xpetra::MultiVector; - using Matrix = Xpetra::Matrix; - using Hierarchy = Hierarchy; - using HierarchyManager = HierarchyManager; - - Teuchos::ParameterList& userList = paramListIn.sublist("user data"); - if (userList.isParameter("Coordinates")) { - RCP::coordinateType,LO,GO,NO> > coordinates = Teuchos::null; - try { - coordinates = EpetraMultiVector_To_XpetraMultiVector::coordinateType,LO,GO,NO>(userList.get >("Coordinates")); - } catch(Teuchos::Exceptions::InvalidParameterType&) { - coordinates = userList.get::coordinateType, LO, GO, NO> > >("Coordinates"); - } - if(Teuchos::nonnull(coordinates)){ - userList.set::coordinateType,LO,GO,NO> > >("Coordinates", coordinates); - } +/*! + @brief Helper function to create a MueLu preconditioner that can be used by Epetra. + @ingroup MueLuAdapters + Given a EpetraCrs_Matrix, this function returns a constructed MueLu preconditioner. + @param[in] inA Matrix + @param[in] paramListIn Parameter list + */ +Teuchos::RCP +CreateEpetraPreconditioner(const Teuchos::RCP& inA, + // FIXME: why is it non-const + Teuchos::ParameterList& paramListIn) { + using SC = double; + using LO = int; + using GO = int; + using NO = Xpetra::EpetraNode; + + using Teuchos::ParameterList; + + using MultiVector = Xpetra::MultiVector; + using Matrix = Xpetra::Matrix; + using Hierarchy = Hierarchy; + using HierarchyManager = HierarchyManager; + + Teuchos::ParameterList& userList = paramListIn.sublist("user data"); + if (userList.isParameter("Coordinates")) { + RCP::coordinateType, LO, GO, NO> > coordinates = Teuchos::null; + try { + coordinates = EpetraMultiVector_To_XpetraMultiVector::coordinateType, LO, GO, NO>(userList.get >("Coordinates")); + } catch (Teuchos::Exceptions::InvalidParameterType&) { + coordinates = userList.get::coordinateType, LO, GO, NO> > >("Coordinates"); } - if (userList.isParameter("Nullspace")) { - RCP::coordinateType,LO,GO,NO> > nullspace = Teuchos::null; - try { - nullspace = EpetraMultiVector_To_XpetraMultiVector(userList.get >("Nullspace")); - } catch(Teuchos::Exceptions::InvalidParameterType&) { - nullspace = userList.get > >("Nullspace"); - } - if(Teuchos::nonnull(nullspace)){ - userList.set::coordinateType,LO,GO,NO> > >("Nullspace", nullspace); - } + if (Teuchos::nonnull(coordinates)) { + userList.set::coordinateType, LO, GO, NO> > >("Coordinates", coordinates); } - - RCP A = EpetraCrs_To_XpetraMatrix(inA); - RCP H = MueLu::CreateXpetraPreconditioner(A, paramListIn); - return rcp(new EpetraOperator(H)); - } - - /*! - @brief Helper function to create a MueLu preconditioner that can be used by Epetra. - @ingroup MueLuAdapters - Given a Epetra_CrsMatrix, this function returns a constructed MueLu preconditioner. - @param[in] inA Matrix - @param[in] xmlFileName XML file containing MueLu options. - */ - Teuchos::RCP - CreateEpetraPreconditioner(const Teuchos::RCP & A, - const std::string& xmlFileName) - { - Teuchos::ParameterList paramList; - Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, Teuchos::Ptr(¶mList), *Xpetra::toXpetra(A->Comm())); - - return CreateEpetraPreconditioner(A, paramList); } - - /*! - @brief Helper function to create a MueLu preconditioner that can be used by Epetra. - @ingroup MueLuAdapters - Given a Epetra_CrsMatrix, this function returns a constructed MueLu preconditioner. - @param[in] inA Matrix. - */ - Teuchos::RCP - CreateEpetraPreconditioner(const Teuchos::RCP & A) - { - Teuchos::ParameterList paramList; - return CreateEpetraPreconditioner(A, paramList); - } - - void ReuseEpetraPreconditioner(const Teuchos::RCP& inA, MueLu::EpetraOperator& Op) { - using SC = double; - using LO = int; - using GO = int; - using NO = Xpetra::EpetraNode; - - using Teuchos::ParameterList; - - using Matrix = Xpetra::Matrix; - using Hierarchy = Hierarchy; - - RCP H = Op.GetHierarchy(); - RCP A = EpetraCrs_To_XpetraMatrix(inA); - - MueLu::ReuseXpetraPreconditioner(A, H); + if (userList.isParameter("Nullspace")) { + RCP::coordinateType, LO, GO, NO> > nullspace = Teuchos::null; + try { + nullspace = EpetraMultiVector_To_XpetraMultiVector(userList.get >("Nullspace")); + } catch (Teuchos::Exceptions::InvalidParameterType&) { + nullspace = userList.get > >("Nullspace"); + } + if (Teuchos::nonnull(nullspace)) { + userList.set::coordinateType, LO, GO, NO> > >("Nullspace", nullspace); + } } - -} //namespace -#endif // HAVE_MUELU_SERIAL and HAVE_MUELU_EPETRA - -#endif //ifndef MUELU_CREATE_EPETRA_PRECONDITIONER_CPP + RCP A = EpetraCrs_To_XpetraMatrix(inA); + RCP H = MueLu::CreateXpetraPreconditioner(A, paramListIn); + return rcp(new EpetraOperator(H)); +} + +/*! + @brief Helper function to create a MueLu preconditioner that can be used by Epetra. + @ingroup MueLuAdapters + Given a Epetra_CrsMatrix, this function returns a constructed MueLu preconditioner. + @param[in] inA Matrix + @param[in] xmlFileName XML file containing MueLu options. + */ +Teuchos::RCP +CreateEpetraPreconditioner(const Teuchos::RCP& A, + const std::string& xmlFileName) { + Teuchos::ParameterList paramList; + Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, Teuchos::Ptr(¶mList), *Xpetra::toXpetra(A->Comm())); + + return CreateEpetraPreconditioner(A, paramList); +} + +/*! + @brief Helper function to create a MueLu preconditioner that can be used by Epetra. + @ingroup MueLuAdapters + Given a Epetra_CrsMatrix, this function returns a constructed MueLu preconditioner. + @param[in] inA Matrix. + */ +Teuchos::RCP +CreateEpetraPreconditioner(const Teuchos::RCP& A) { + Teuchos::ParameterList paramList; + return CreateEpetraPreconditioner(A, paramList); +} + +void ReuseEpetraPreconditioner(const Teuchos::RCP& inA, MueLu::EpetraOperator& Op) { + using SC = double; + using LO = int; + using GO = int; + using NO = Xpetra::EpetraNode; + + using Teuchos::ParameterList; + + using Matrix = Xpetra::Matrix; + using Hierarchy = Hierarchy; + + RCP H = Op.GetHierarchy(); + RCP A = EpetraCrs_To_XpetraMatrix(inA); + + MueLu::ReuseXpetraPreconditioner(A, H); +} + +} // namespace MueLu +#endif // HAVE_MUELU_SERIAL and HAVE_MUELU_EPETRA + +#endif // ifndef MUELU_CREATE_EPETRA_PRECONDITIONER_CPP diff --git a/packages/muelu/adapters/epetra/MueLu_CreateEpetraPreconditioner.hpp b/packages/muelu/adapters/epetra/MueLu_CreateEpetraPreconditioner.hpp index 9ffb14c17f45..a06a788afc47 100644 --- a/packages/muelu/adapters/epetra/MueLu_CreateEpetraPreconditioner.hpp +++ b/packages/muelu/adapters/epetra/MueLu_CreateEpetraPreconditioner.hpp @@ -15,43 +15,42 @@ #if defined(HAVE_MUELU_EPETRA) namespace MueLu { - /*! - @brief Helper function to create a MueLu preconditioner that can be used by Epetra. - @ingroup MueLuAdapters - Given a EpetraCrs_Matrix, this function returns a constructed MueLu preconditioner. - @param[in] inA Matrix - @param[in] paramListIn Parameter list - */ - Teuchos::RCP - CreateEpetraPreconditioner(const Teuchos::RCP& inA, - // FIXME: why is it non-const - Teuchos::ParameterList& paramListIn); - - /*! - @brief Helper function to create a MueLu preconditioner that can be used by Epetra. - @ingroup MueLuAdapters - Given a Epetra_CrsMatrix, this function returns a constructed MueLu preconditioner. - @param[in] inA Matrix - @param[in] xmlFileName XML file containing MueLu options. - */ - Teuchos::RCP - CreateEpetraPreconditioner(const Teuchos::RCP & A, - const std::string& xmlFileName); - - /*! - @brief Helper function to create a MueLu preconditioner that can be used by Epetra. - @ingroup MueLuAdapters - Given a Epetra_CrsMatrix, this function returns a constructed MueLu preconditioner. - @param[in] inA Matrix - */ - Teuchos::RCP - CreateEpetraPreconditioner(const Teuchos::RCP & A, - const std::string& xmlFileName); - - void ReuseEpetraPreconditioner(const Teuchos::RCP& inA, MueLu::EpetraOperator& Op); - - -} //namespace -#endif // HAVE_MUELU_SERIAL and HAVE_MUELU_EPETRA - -#endif //ifndef MUELU_CREATE_EPETRA_PRECONDITIONER_HPP +/*! + @brief Helper function to create a MueLu preconditioner that can be used by Epetra. + @ingroup MueLuAdapters + Given a EpetraCrs_Matrix, this function returns a constructed MueLu preconditioner. + @param[in] inA Matrix + @param[in] paramListIn Parameter list + */ +Teuchos::RCP +CreateEpetraPreconditioner(const Teuchos::RCP& inA, + // FIXME: why is it non-const + Teuchos::ParameterList& paramListIn); + +/*! + @brief Helper function to create a MueLu preconditioner that can be used by Epetra. + @ingroup MueLuAdapters + Given a Epetra_CrsMatrix, this function returns a constructed MueLu preconditioner. + @param[in] inA Matrix + @param[in] xmlFileName XML file containing MueLu options. + */ +Teuchos::RCP +CreateEpetraPreconditioner(const Teuchos::RCP& A, + const std::string& xmlFileName); + +/*! + @brief Helper function to create a MueLu preconditioner that can be used by Epetra. + @ingroup MueLuAdapters + Given a Epetra_CrsMatrix, this function returns a constructed MueLu preconditioner. + @param[in] inA Matrix + */ +Teuchos::RCP +CreateEpetraPreconditioner(const Teuchos::RCP& A, + const std::string& xmlFileName); + +void ReuseEpetraPreconditioner(const Teuchos::RCP& inA, MueLu::EpetraOperator& Op); + +} // namespace MueLu +#endif // HAVE_MUELU_SERIAL and HAVE_MUELU_EPETRA + +#endif // ifndef MUELU_CREATE_EPETRA_PRECONDITIONER_HPP diff --git a/packages/muelu/adapters/epetra/MueLu_EpetraOperator.cpp b/packages/muelu/adapters/epetra/MueLu_EpetraOperator.cpp index 0134537ab2e8..19e49b864c74 100644 --- a/packages/muelu/adapters/epetra/MueLu_EpetraOperator.cpp +++ b/packages/muelu/adapters/epetra/MueLu_EpetraOperator.cpp @@ -58,12 +58,12 @@ namespace MueLu { int EpetraOperator::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { try { // There is no rcpFromRef(const T&), so we need to do const_cast - const Xpetra::EpetraMultiVectorT eX(rcpFromRef(const_cast(X))); - Xpetra::EpetraMultiVectorT eY(rcpFromRef(Y)); + const Xpetra::EpetraMultiVectorT eX(rcpFromRef(const_cast(X))); + Xpetra::EpetraMultiVectorT eY(rcpFromRef(Y)); // Generally, we assume two different vectors, but AztecOO uses a single vector if (X.Values() == Y.Values()) { // X and Y point to the same memory, use an additional vector - RCP > tmpY = Teuchos::rcp(new Xpetra::EpetraMultiVectorT(eY.getMap(), eY.getNumVectors())); + RCP> tmpY = Teuchos::rcp(new Xpetra::EpetraMultiVectorT(eY.getMap(), eY.getNumVectors())); // InitialGuessIsZero in MueLu::Hierarchy.Iterate() does not zero out components, it // only assumes that user provided an already zeroed out vector bool initialGuessZero = true; @@ -83,72 +83,72 @@ int EpetraOperator::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector } } catch (std::exception& e) { - //TODO: error msg directly on std::cerr? + // TODO: error msg directly on std::cerr? std::cerr << "Caught an exception in MueLu::EpetraOperator::ApplyInverse():" << std::endl - << e.what() << std::endl; + << e.what() << std::endl; return -1; } return 0; } const Epetra_Comm& EpetraOperator::Comm() const { - RCP A = Hierarchy_->GetLevel(0)->Get >("A"); + RCP A = Hierarchy_->GetLevel(0)->Get>("A"); - //TODO: This code is not pretty - RCP > epbA = Teuchos::rcp_dynamic_cast >(A); + // TODO: This code is not pretty + RCP> epbA = Teuchos::rcp_dynamic_cast>(A); if (epbA != Teuchos::null) { - RCP > blockMat = epbA->getMatrix(0,0); - RCP > blockCrsWrap = Teuchos::rcp_dynamic_cast >(blockMat); + RCP> blockMat = epbA->getMatrix(0, 0); + RCP> blockCrsWrap = Teuchos::rcp_dynamic_cast>(blockMat); if (blockCrsWrap == Teuchos::null) throw Exceptions::BadCast("MueLu::EpetraOperator::Comm(): Cast from block (0,0) to CrsMatrixWrap failed. Could be a block matrix. TODO implement recursive support for block matrices."); - RCP> tmp_ECrsMtx = rcp_dynamic_cast >(blockCrsWrap->getCrsMatrix()); + RCP> tmp_ECrsMtx = rcp_dynamic_cast>(blockCrsWrap->getCrsMatrix()); if (tmp_ECrsMtx == Teuchos::null) throw Exceptions::BadCast("MueLu::EpetraOperator::Comm(): Cast from Xpetra::CrsMatrix to Xpetra::EpetraCrsMatrix failed"); RCP epA = tmp_ECrsMtx->getEpetra_CrsMatrixNonConst(); return epA->Comm(); } - RCP > crsOp = rcp_dynamic_cast >(A); + RCP> crsOp = rcp_dynamic_cast>(A); if (crsOp == Teuchos::null) throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed"); - const RCP> &tmp_ECrsMtx = rcp_dynamic_cast>(crsOp->getCrsMatrix()); + const RCP>& tmp_ECrsMtx = rcp_dynamic_cast>(crsOp->getCrsMatrix()); if (tmp_ECrsMtx == Teuchos::null) throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::EpetraCrsMatrix failed"); return tmp_ECrsMtx->getEpetra_CrsMatrixNonConst()->Comm(); } const Epetra_Map& EpetraOperator::OperatorDomainMap() const { - RCP > A = Hierarchy_->GetLevel(0)->Get >("A"); + RCP> A = Hierarchy_->GetLevel(0)->Get>("A"); - RCP > epbA = Teuchos::rcp_dynamic_cast >(A); + RCP> epbA = Teuchos::rcp_dynamic_cast>(A); if (epbA != Teuchos::null) - return Xpetra::toEpetra(epbA->getFullDomainMap()); // TODO check me + return Xpetra::toEpetra(epbA->getFullDomainMap()); // TODO check me - RCP > crsOp = rcp_dynamic_cast >(A); + RCP> crsOp = rcp_dynamic_cast>(A); if (crsOp == Teuchos::null) throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed"); - const RCP> &tmp_ECrsMtx = rcp_dynamic_cast>(crsOp->getCrsMatrix()); + const RCP>& tmp_ECrsMtx = rcp_dynamic_cast>(crsOp->getCrsMatrix()); if (tmp_ECrsMtx == Teuchos::null) throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::EpetraCrsMatrix failed"); return tmp_ECrsMtx->getEpetra_CrsMatrixNonConst()->DomainMap(); } -const Epetra_Map & EpetraOperator::OperatorRangeMap() const { - RCP > A = Hierarchy_->GetLevel(0)->Get >("A"); +const Epetra_Map& EpetraOperator::OperatorRangeMap() const { + RCP> A = Hierarchy_->GetLevel(0)->Get>("A"); - RCP > epbA = Teuchos::rcp_dynamic_cast >(A); + RCP> epbA = Teuchos::rcp_dynamic_cast>(A); if (epbA != Teuchos::null) return Xpetra::toEpetra(epbA->getFullRangeMap()); - RCP > crsOp = rcp_dynamic_cast >(A); + RCP> crsOp = rcp_dynamic_cast>(A); if (crsOp == Teuchos::null) throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed"); - const RCP> &tmp_ECrsMtx = rcp_dynamic_cast>(crsOp->getCrsMatrix()); + const RCP>& tmp_ECrsMtx = rcp_dynamic_cast>(crsOp->getCrsMatrix()); if (tmp_ECrsMtx == Teuchos::null) throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::EpetraCrsMatrix failed"); return tmp_ECrsMtx->getEpetra_CrsMatrixNonConst()->RangeMap(); } -} // namespace +} // namespace MueLu -#endif // #if defined(HAVE_MUELU_SERIAL) and defined(HAVE_MUELU_EPETRA) +#endif // #if defined(HAVE_MUELU_SERIAL) and defined(HAVE_MUELU_EPETRA) diff --git a/packages/muelu/adapters/epetra/MueLu_EpetraOperator.hpp b/packages/muelu/adapters/epetra/MueLu_EpetraOperator.hpp index e20f8650b2da..0b325cc5ffbb 100644 --- a/packages/muelu/adapters/epetra/MueLu_EpetraOperator.hpp +++ b/packages/muelu/adapters/epetra/MueLu_EpetraOperator.hpp @@ -50,7 +50,7 @@ #include #include "MueLu_Hierarchy.hpp" -//TODO: Kokkos headers +// TODO: Kokkos headers #if defined(HAVE_MUELU_SERIAL) and defined(HAVE_MUELU_EPETRA) @@ -60,108 +60,105 @@ namespace MueLu { @brief Turns a MueLu::Hierarchy into a Epetra_Operator. It allows MueLu to be used as a preconditioner for AztecOO (for instance). */ - class EpetraOperator : public Epetra_Operator { - typedef double SC; - typedef int LO; - typedef int GO; - typedef Xpetra::EpetraNode NO; +class EpetraOperator : public Epetra_Operator { + typedef double SC; + typedef int LO; + typedef int GO; + typedef Xpetra::EpetraNode NO; - typedef Xpetra::Matrix Matrix; - typedef MueLu::Hierarchy Hierarchy; - typedef MueLu::Utilities Utils; + typedef Xpetra::Matrix Matrix; + typedef MueLu::Hierarchy Hierarchy; + typedef MueLu::Utilities Utils; - public: + public: + //! @name Constructor/Destructor + //@{ - //! @name Constructor/Destructor - //@{ + //! Constructor + EpetraOperator(const RCP& H) + : Hierarchy_(H) {} - //! Constructor - EpetraOperator(const RCP& H) : Hierarchy_(H) { } + //! Destructor. + virtual ~EpetraOperator() {} - //! Destructor. - virtual ~EpetraOperator() { } + //@} - //@} + int SetUseTranspose(bool /* UseTransposeBool */) { return -1; } - int SetUseTranspose(bool /* UseTransposeBool */) { return -1; } + //! @name Mathematical functions + //@{ - //! @name Mathematical functions - //@{ + //! Returns the result of a Epetra_Operator applied to a Epetra_MultiVector X in Y. + /*! + \param In + X - A Epetra_MultiVector of dimension NumVectors to multiply with matrix. + \param Out + Y -A Epetra_MultiVector of dimension NumVectors containing result. - //! Returns the result of a Epetra_Operator applied to a Epetra_MultiVector X in Y. - /*! - \param In - X - A Epetra_MultiVector of dimension NumVectors to multiply with matrix. - \param Out - Y -A Epetra_MultiVector of dimension NumVectors containing result. + \return Integer error code, set to 0 if successful. + */ + int Apply(const Epetra_MultiVector& /* X */, Epetra_MultiVector& /* Y */) const { return -1; } - \return Integer error code, set to 0 if successful. - */ - int Apply(const Epetra_MultiVector& /* X */, Epetra_MultiVector& /* Y */) const { return -1; } + //! Returns the result of a Epetra_Operator inverse applied to an Epetra_MultiVector X in Y. + /*! + \param In + X - A Epetra_MultiVector of dimension NumVectors to solve for. + \param Out + Y -A Epetra_MultiVector of dimension NumVectors containing result. - //! Returns the result of a Epetra_Operator inverse applied to an Epetra_MultiVector X in Y. - /*! - \param In - X - A Epetra_MultiVector of dimension NumVectors to solve for. - \param Out - Y -A Epetra_MultiVector of dimension NumVectors containing result. + \return Integer error code, set to 0 if successful. - \return Integer error code, set to 0 if successful. + \warning In order to work with AztecOO, any implementation of this method must + support the case where X and Y are the same object. + */ + int ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const; - \warning In order to work with AztecOO, any implementation of this method must - support the case where X and Y are the same object. - */ - int ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const; + //! Returns the infinity norm of the global matrix. + /* Returns the quantity \f$ \| A \|_\infty\f$ such that + \f[\| A \|_\infty = \max_{1\lei\lem} \sum_{j=1}^n |a_{ij}| \f]. - //! Returns the infinity norm of the global matrix. - /* Returns the quantity \f$ \| A \|_\infty\f$ such that - \f[\| A \|_\infty = \max_{1\lei\lem} \sum_{j=1}^n |a_{ij}| \f]. + \warning This method must not be called unless HasNormInf() returns true. + */ + double NormInf() const { return 0; } + //@} - \warning This method must not be called unless HasNormInf() returns true. - */ - double NormInf() const { return 0; } - //@} + //! @name Attribute access functions + //@{ - //! @name Attribute access functions - //@{ + //! Returns a character string describing the operator + const char* Label() const { return "MueLu::Hierarchy"; } - //! Returns a character string describing the operator - const char * Label() const { return "MueLu::Hierarchy"; } + //! Returns the current UseTranspose setting. + bool UseTranspose() const { return false; } - //! Returns the current UseTranspose setting. - bool UseTranspose() const { return false; } + //! Returns true if the \e this object can provide an approximate Inf-norm, false otherwise. + bool HasNormInf() const { return 0; } - //! Returns true if the \e this object can provide an approximate Inf-norm, false otherwise. - bool HasNormInf() const { return 0; } + //! Returns a pointer to the Epetra_Comm communicator associated with this operator. + const Epetra_Comm& Comm() const; - //! Returns a pointer to the Epetra_Comm communicator associated with this operator. - const Epetra_Comm & Comm() const; + //! Returns the Epetra_Map object associated with the domain of this operator. + const Epetra_Map& OperatorDomainMap() const; - //! Returns the Epetra_Map object associated with the domain of this operator. - const Epetra_Map & OperatorDomainMap() const; + //! Returns the Epetra_Map object associated with the range of this operator. + const Epetra_Map& OperatorRangeMap() const; - //! Returns the Epetra_Map object associated with the range of this operator. - const Epetra_Map & OperatorRangeMap() const; + //@} - //@} + //! @name MueLu specific + //@{ - //! @name MueLu specific - //@{ + //! Direct access to the underlying MueLu::Hierarchy. + RCP GetHierarchy() const { return Hierarchy_; } - //! Direct access to the underlying MueLu::Hierarchy. - RCP GetHierarchy() const { return Hierarchy_; } + //@} - //@} + private: + RCP Hierarchy_; +}; +} // namespace MueLu - private: +#endif // HAVE_MUELU_EPETRA and HAVE_MUELU_SERIAL - RCP Hierarchy_; - - }; - -} // namespace - -#endif // HAVE_MUELU_EPETRA and HAVE_MUELU_SERIAL - -#endif // MUELU_EPETRAOPERATOR_HPP +#endif // MUELU_EPETRAOPERATOR_HPP diff --git a/packages/muelu/adapters/linear_solver_factory/MueLu_Details_LinearSolverFactory.cpp b/packages/muelu/adapters/linear_solver_factory/MueLu_Details_LinearSolverFactory.cpp index f414dd518bee..399cd351599c 100644 --- a/packages/muelu/adapters/linear_solver_factory/MueLu_Details_LinearSolverFactory.cpp +++ b/packages/muelu/adapters/linear_solver_factory/MueLu_Details_LinearSolverFactory.cpp @@ -57,7 +57,7 @@ // Do explicit instantiation of MueLu::Details::LinearSolverFactory, // for Epetra objects. template class MueLu::Details::LinearSolverFactory; -#endif // HAVE_MUELU_EPETRA +#endif // HAVE_MUELU_EPETRA // Define typedefs that make the Tpetra macros work TPETRA_ETI_MANGLING_TYPEDEFS() @@ -65,8 +65,8 @@ TPETRA_ETI_MANGLING_TYPEDEFS() // Do explicit instantiation of MueLu::Details::LinearSolverFactory, for // Tpetra objects, for all combinations of Tpetra template parameters // for which Tpetra does explicit template instantiation (ETI). -TPETRA_INSTANTIATE_SLGN_NO_ORDINAL_SCALAR( MUELU_DETAILS_LINEARSOLVERFACTORY_INSTANT ) +TPETRA_INSTANTIATE_SLGN_NO_ORDINAL_SCALAR(MUELU_DETAILS_LINEARSOLVERFACTORY_INSTANT) // TODO amk: do we also have to do this for Xpetra? -#endif // HAVE_MUELU_EXPLICIT_INSTANTIATION \ No newline at end of file +#endif // HAVE_MUELU_EXPLICIT_INSTANTIATION \ No newline at end of file diff --git a/packages/muelu/adapters/linear_solver_factory/MueLu_Details_LinearSolverFactory.hpp b/packages/muelu/adapters/linear_solver_factory/MueLu_Details_LinearSolverFactory.hpp index ffe7875c07e6..2bc5ba10c4cf 100644 --- a/packages/muelu/adapters/linear_solver_factory/MueLu_Details_LinearSolverFactory.hpp +++ b/packages/muelu/adapters/linear_solver_factory/MueLu_Details_LinearSolverFactory.hpp @@ -46,7 +46,7 @@ #include "MueLu_Details_LinearSolverFactory_decl.hpp" #ifndef HAVE_MUELU_EXPLICIT_INSTANTIATION - #include "MueLu_Details_LinearSolverFactory_def.hpp" +#include "MueLu_Details_LinearSolverFactory_def.hpp" #endif -#endif // MUELU_DETAILS_LINEARSOLVERFACTORY_HPP \ No newline at end of file +#endif // MUELU_DETAILS_LINEARSOLVERFACTORY_HPP \ No newline at end of file diff --git a/packages/muelu/adapters/linear_solver_factory/MueLu_Details_LinearSolverFactory_decl.hpp b/packages/muelu/adapters/linear_solver_factory/MueLu_Details_LinearSolverFactory_decl.hpp index f74b4f341442..df25f6680749 100644 --- a/packages/muelu/adapters/linear_solver_factory/MueLu_Details_LinearSolverFactory_decl.hpp +++ b/packages/muelu/adapters/linear_solver_factory/MueLu_Details_LinearSolverFactory_decl.hpp @@ -54,58 +54,57 @@ namespace MueLu { namespace Details { - /// \class LinearSolverFactory - /// \brief Interface for a "factory" that creates MueLu solvers. +/// \class LinearSolverFactory +/// \brief Interface for a "factory" that creates MueLu solvers. +/// +/// \tparam MV Type of a (multi)vector, representing either the +/// solution(s) X or the right-hand side(s) B of a linear system +/// AX=B. For example, with Tpetra, use a Tpetra::MultiVector +/// specialization. A multivector is a single data structure +/// containing zero or more vectors with the same dimensions and +/// layout. +/// +/// \tparam OP Type of a matrix or linear operator that this Solver +/// understands. For example, for Tpetra, use a Tpetra::Operator +/// specialization. Always use the most abstract interface +/// possible; solvers should dynamic_cast to the subclass they +/// need. Also, be consistent: using different classes here +/// (e.g., Tpetra::RowMatrix instead of Tpetra::Operator) means +/// more expensive explicit template instantiation. +/// +/// \tparam NormType Type of the norm of a residual. +template +class LinearSolverFactory : public Trilinos::Details::LinearSolverFactory { + public: + /// \brief Get an instance of a MueLu solver. /// - /// \tparam MV Type of a (multi)vector, representing either the - /// solution(s) X or the right-hand side(s) B of a linear system - /// AX=B. For example, with Tpetra, use a Tpetra::MultiVector - /// specialization. A multivector is a single data structure - /// containing zero or more vectors with the same dimensions and - /// layout. + /// The solver is wrapped in a Trilinos::Details::LinearSolver + /// interface. /// - /// \tparam OP Type of a matrix or linear operator that this Solver - /// understands. For example, for Tpetra, use a Tpetra::Operator - /// specialization. Always use the most abstract interface - /// possible; solvers should dynamic_cast to the subclass they - /// need. Also, be consistent: using different classes here - /// (e.g., Tpetra::RowMatrix instead of Tpetra::Operator) means - /// more expensive explicit template instantiation. + /// \param solverName [in] The solver's name. Names are case + /// sensitive + /// \return A pointer to the solver, if the name was valid; else, + /// a null pointer (Teuchos::null). + virtual Teuchos::RCP > + getLinearSolver(const std::string& solverName); + + /// \brief Register this LinearSolverFactory with the central registry. + /// + /// Register this LinearSolverFactory with the central registry, for + /// the given SC, LO, GO, NT template parameters. This will let any + /// clients of Trilinos::Details::getLinearSolver create MueLu + /// solvers with those template parameters. /// - /// \tparam NormType Type of the norm of a residual. - template - class LinearSolverFactory : - public Trilinos::Details::LinearSolverFactory { - public: - /// \brief Get an instance of a MueLu solver. - /// - /// The solver is wrapped in a Trilinos::Details::LinearSolver - /// interface. - /// - /// \param solverName [in] The solver's name. Names are case - /// sensitive - /// \return A pointer to the solver, if the name was valid; else, - /// a null pointer (Teuchos::null). - virtual Teuchos::RCP > - getLinearSolver (const std::string& solverName); - - /// \brief Register this LinearSolverFactory with the central registry. - /// - /// Register this LinearSolverFactory with the central registry, for - /// the given SC, LO, GO, NT template parameters. This will let any - /// clients of Trilinos::Details::getLinearSolver create MueLu - /// solvers with those template parameters. - /// - /// You may call this function multiple times; it will only have an - /// effect the first time (it is idempotent). - /// - /// Users do not normally have to call this function. MueLu - /// automatically registers its LinearSolverFactory with the central - /// repository, for all enabled template parameter combinations. - static void registerLinearSolverFactory (); - }; - -} // namespace Details -} // namespace MueLu + /// You may call this function multiple times; it will only have an + /// effect the first time (it is idempotent). + /// + /// Users do not normally have to call this function. MueLu + /// automatically registers its LinearSolverFactory with the central + /// repository, for all enabled template parameter combinations. + static void registerLinearSolverFactory(); +}; + +} // namespace Details +} // namespace MueLu -#endif // MUELU_DETAILS_LINEARSOLVERFACTORY_DECL_HPP +#endif // MUELU_DETAILS_LINEARSOLVERFACTORY_DECL_HPP diff --git a/packages/muelu/adapters/linear_solver_factory/MueLu_Details_LinearSolverFactory_def.hpp b/packages/muelu/adapters/linear_solver_factory/MueLu_Details_LinearSolverFactory_def.hpp index 39e707cb0938..0e8f151dbecf 100644 --- a/packages/muelu/adapters/linear_solver_factory/MueLu_Details_LinearSolverFactory_def.hpp +++ b/packages/muelu/adapters/linear_solver_factory/MueLu_Details_LinearSolverFactory_def.hpp @@ -54,169 +54,149 @@ #include #ifdef HAVE_MUELU_EPETRA -# include "Epetra_CrsMatrix.h" +#include "Epetra_CrsMatrix.h" #include "MueLu_CreateEpetraPreconditioner.hpp" -#endif // HAVE_MUELU_EPETRA +#endif // HAVE_MUELU_EPETRA -# include "Tpetra_Operator.hpp" -# include "MueLu_CreateTpetraPreconditioner.hpp" +#include "Tpetra_Operator.hpp" +#include "MueLu_CreateTpetraPreconditioner.hpp" namespace MueLu { namespace Details { -template -class LinearSolver : - public Trilinos::Details::LinearSolver, - virtual public Teuchos::Describable -{ - -public: - +template +class LinearSolver : public Trilinos::Details::LinearSolver, + virtual public Teuchos::Describable { + public: /// \brief Constructor. - LinearSolver () {} + LinearSolver() {} //! Destructor (virtual for memory safety). - virtual ~LinearSolver () {} + virtual ~LinearSolver() {} /// \brief Set the Solver's matrix. /// /// \param A [in] Pointer to the matrix A in the linear system(s) /// AX=B to solve. - void setMatrix (const Teuchos::RCP& A); + void setMatrix(const Teuchos::RCP& A); //! Get a pointer to this Solver's matrix. - Teuchos::RCP getMatrix () const { + Teuchos::RCP getMatrix() const { return A_; } //! Solve the linear system(s) AX=B. - void solve (MV& X, const MV& B); + void solve(MV& X, const MV& B); //! Set this solver's parameters. - void setParameters (const Teuchos::RCP& params); + void setParameters(const Teuchos::RCP& params); /// \brief Set up any part of the solve that depends on the /// structure of the input matrix, but not its numerical values. - void symbolic () {} + void symbolic() {} /// \brief Set up any part of the solve that depends on both the /// structure and the numerical values of the input matrix. - void numeric (); + void numeric(); //! Implementation of Teuchos::Describable::description. - std::string description () const; + std::string description() const; //! Implementation of Teuchos::Describable::describe. void - describe (Teuchos::FancyOStream& out, - const Teuchos::EVerbosityLevel verbLevel = - Teuchos::Describable::verbLevel_default) const; + describe(Teuchos::FancyOStream& out, + const Teuchos::EVerbosityLevel verbLevel = + Teuchos::Describable::verbLevel_default) const; -private: + private: Teuchos::RCP A_; Teuchos::RCP params_; }; - + // Why does MueLu_EpetraOperator insist on HAVE_MUELU_SERIAL? #if defined(HAVE_MUELU_SERIAL) and defined(HAVE_MUELU_EPETRA) -template<> -class LinearSolver : - public Trilinos::Details::LinearSolver, - virtual public Teuchos::Describable -{ - -public: - +template <> +class LinearSolver : public Trilinos::Details::LinearSolver, + virtual public Teuchos::Describable { + public: /// \brief Constructor. - LinearSolver () : - changedA_(false), - changedParams_(false) - {} + LinearSolver() + : changedA_(false) + , changedParams_(false) {} //! Destructor (virtual for memory safety). - virtual ~LinearSolver () {} + virtual ~LinearSolver() {} /// \brief Set the Solver's matrix. /// /// \param A [in] Pointer to the matrix A in the linear system(s) /// AX=B to solve. - void setMatrix (const Teuchos::RCP& A) - { + void setMatrix(const Teuchos::RCP& A) { const char prefix[] = "MueLu::Details::LinearSolver::setMatrix: "; - - if(A != A_) - { - if(solver_ != Teuchos::null) + + if (A != A_) { + if (solver_ != Teuchos::null) changedA_ = true; - + A_ = rcp_dynamic_cast(A); - TEUCHOS_TEST_FOR_EXCEPTION - (A_.is_null(), std::runtime_error, prefix << "MueLu requires " - "an Epetra_CrsMatrix, but the matrix you provided is of a " - "different type. Please provide an Epetra_CrsMatrix instead."); + TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, prefix << "MueLu requires " + "an Epetra_CrsMatrix, but the matrix you provided is of a " + "different type. Please provide an Epetra_CrsMatrix instead."); } } //! Get a pointer to this Solver's matrix. - Teuchos::RCP getMatrix () const { + Teuchos::RCP getMatrix() const { return A_; } //! Solve the linear system(s) AX=B. - void solve (Epetra_MultiVector& X, const Epetra_MultiVector& B) - { + void solve(Epetra_MultiVector& X, const Epetra_MultiVector& B) { // TODO amk: Do we assume the user has called numeric before solve, or should we call it for them? const char prefix[] = "MueLu::Details::LinearSolver::solve: "; - TEUCHOS_TEST_FOR_EXCEPTION - (solver_.is_null (), std::runtime_error, prefix << "The solver does not " - "exist yet. You must call numeric() before you may call this method."); - TEUCHOS_TEST_FOR_EXCEPTION - (changedA_, std::runtime_error, prefix << "The matrix A has been reset " - "since the last call to numeric(). Please call numeric() again."); - TEUCHOS_TEST_FOR_EXCEPTION - (changedParams_, std::runtime_error, prefix << "The parameters have been reset " - "since the last call to numeric(). Please call numeric() again."); - + TEUCHOS_TEST_FOR_EXCEPTION(solver_.is_null(), std::runtime_error, prefix << "The solver does not " + "exist yet. You must call numeric() before you may call this method."); + TEUCHOS_TEST_FOR_EXCEPTION(changedA_, std::runtime_error, prefix << "The matrix A has been reset " + "since the last call to numeric(). Please call numeric() again."); + TEUCHOS_TEST_FOR_EXCEPTION(changedParams_, std::runtime_error, prefix << "The parameters have been reset " + "since the last call to numeric(). Please call numeric() again."); + int err = solver_->ApplyInverse(B, X); - - TEUCHOS_TEST_FOR_EXCEPTION - (err != 0, std::runtime_error, prefix << "EpetraOperator::ApplyInverse returned " - "nonzero error code " << err); + + TEUCHOS_TEST_FOR_EXCEPTION(err != 0, std::runtime_error, prefix << "EpetraOperator::ApplyInverse returned " + "nonzero error code " + << err); } //! Set this solver's parameters. - void setParameters (const Teuchos::RCP& params) - { - if(solver_ != Teuchos::null && params != params_) + void setParameters(const Teuchos::RCP& params) { + if (solver_ != Teuchos::null && params != params_) changedParams_ = true; - + params_ = params; } /// \brief Set up any part of the solve that depends on the /// structure of the input matrix, but not its numerical values. - void symbolic () {} + void symbolic() {} /// \brief Set up any part of the solve that depends on both the /// structure and the numerical values of the input matrix. - void numeric () - { + void numeric() { const char prefix[] = "MueLu::Details::LinearSolver::numeric: "; - + // If the solver is up-to-date, leave it alone - if(solver_ == Teuchos::null || changedA_ || changedParams_) - { - changedA_ = false; + if (solver_ == Teuchos::null || changedA_ || changedParams_) { + changedA_ = false; changedParams_ = false; - - TEUCHOS_TEST_FOR_EXCEPTION - (A_ == Teuchos::null, std::runtime_error, prefix << "The matrix has not been " - "set yet. You must call setMatrix() with a nonnull matrix before you may " - "call this method."); - + + TEUCHOS_TEST_FOR_EXCEPTION(A_ == Teuchos::null, std::runtime_error, prefix << "The matrix has not been " + "set yet. You must call setMatrix() with a nonnull matrix before you may " + "call this method."); + // TODO: We should not have to cast away the constness here // TODO: See bug 6462 - if(params_ != Teuchos::null) + if (params_ != Teuchos::null) solver_ = CreateEpetraPreconditioner(rcp_const_cast(A_), *params_); else solver_ = CreateEpetraPreconditioner(rcp_const_cast(A_)); @@ -224,239 +204,209 @@ class LinearSolver : } //! Implementation of Teuchos::Describable::description. - std::string description () const - { + std::string description() const { if (solver_.is_null()) { return "\"MueLu::Details::LinearSolver\": {MV: Epetra_MultiVector, OP: Epetra_Operator, NormType: double}"; - } - else { - return solver_->GetHierarchy()->description (); + } else { + return solver_->GetHierarchy()->description(); } } //! Implementation of Teuchos::Describable::describe. void - describe (Teuchos::FancyOStream& out, - const Teuchos::EVerbosityLevel verbLevel = - Teuchos::Describable::verbLevel_default) const - { + describe(Teuchos::FancyOStream& out, + const Teuchos::EVerbosityLevel verbLevel = + Teuchos::Describable::verbLevel_default) const { using std::endl; if (solver_.is_null()) { - if(verbLevel > Teuchos::VERB_NONE) { - Teuchos::OSTab tab0 (out); + if (verbLevel > Teuchos::VERB_NONE) { + Teuchos::OSTab tab0(out); out << "\"MueLu::Details::LinearSolver\":" << endl; - Teuchos::OSTab tab1 (out); + Teuchos::OSTab tab1(out); out << "MV: Epetra_MultiVector" << endl << "OP: Epetra_Operator" << endl << "NormType: double" << endl; } - } - else { - solver_->GetHierarchy()->describe (out, verbLevel); + } else { + solver_->GetHierarchy()->describe(out, verbLevel); } } -private: + private: Teuchos::RCP A_; Teuchos::RCP params_; Teuchos::RCP solver_; bool changedA_; bool changedParams_; }; -#endif // HAVE_MUELU_EPETRA - -template -class LinearSolver, - Tpetra::Operator, - typename Teuchos::ScalarTraits::magnitudeType> : - public Trilinos::Details::LinearSolver, - Tpetra::Operator, - typename Teuchos::ScalarTraits::magnitudeType>, - virtual public Teuchos::Describable -{ - -public: - +#endif // HAVE_MUELU_EPETRA + +template +class LinearSolver, + Tpetra::Operator, + typename Teuchos::ScalarTraits::magnitudeType> : public Trilinos::Details::LinearSolver, + Tpetra::Operator, + typename Teuchos::ScalarTraits::magnitudeType>, + virtual public Teuchos::Describable { + public: /// \brief Constructor. - LinearSolver () : - changedA_(false), - changedParams_(false) - {} + LinearSolver() + : changedA_(false) + , changedParams_(false) {} //! Destructor (virtual for memory safety). - virtual ~LinearSolver () {} + virtual ~LinearSolver() {} /// \brief Set the Solver's matrix. /// /// \param A [in] Pointer to the matrix A in the linear system(s) /// AX=B to solve. - void setMatrix (const Teuchos::RCP >& A) - { - if(A != A_) - { - if(solver_ != Teuchos::null) + void setMatrix(const Teuchos::RCP >& A) { + if (A != A_) { + if (solver_ != Teuchos::null) changedA_ = true; - + A_ = A; } } //! Get a pointer to this Solver's matrix. - Teuchos::RCP > getMatrix () const { + Teuchos::RCP > getMatrix() const { return A_; } //! Solve the linear system(s) AX=B. - void solve (Tpetra::MultiVector& X, const Tpetra::MultiVector& B) - { + void solve(Tpetra::MultiVector& X, const Tpetra::MultiVector& B) { // TODO amk: Do we assume the user has called numeric before solve, or should we call it for them? const char prefix[] = "MueLu::Details::LinearSolver::solve: "; - TEUCHOS_TEST_FOR_EXCEPTION - (solver_.is_null (), std::runtime_error, prefix << "The solver does not " - "exist yet. You must call numeric() before you may call this method."); - TEUCHOS_TEST_FOR_EXCEPTION - (changedA_, std::runtime_error, prefix << "The matrix A has been reset " - "since the last call to numeric(). Please call numeric() again."); - TEUCHOS_TEST_FOR_EXCEPTION - (changedParams_, std::runtime_error, prefix << "The parameters have been reset " - "since the last call to numeric(). Please call numeric() again."); - + TEUCHOS_TEST_FOR_EXCEPTION(solver_.is_null(), std::runtime_error, prefix << "The solver does not " + "exist yet. You must call numeric() before you may call this method."); + TEUCHOS_TEST_FOR_EXCEPTION(changedA_, std::runtime_error, prefix << "The matrix A has been reset " + "since the last call to numeric(). Please call numeric() again."); + TEUCHOS_TEST_FOR_EXCEPTION(changedParams_, std::runtime_error, prefix << "The parameters have been reset " + "since the last call to numeric(). Please call numeric() again."); + solver_->apply(B, X); } //! Set this solver's parameters. - void setParameters (const Teuchos::RCP& params) - { - if(solver_ != Teuchos::null && params != params_) + void setParameters(const Teuchos::RCP& params) { + if (solver_ != Teuchos::null && params != params_) changedParams_ = true; - + params_ = params; } /// \brief Set up any part of the solve that depends on the /// structure of the input matrix, but not its numerical values. - void symbolic () {} + void symbolic() {} /// \brief Set up any part of the solve that depends on both the /// structure and the numerical values of the input matrix. - void numeric () - { + void numeric() { const char prefix[] = "MueLu::Details::LinearSolver::numeric: "; - + // If the solver is up-to-date, leave it alone - if(solver_ == Teuchos::null || changedParams_) - { - TEUCHOS_TEST_FOR_EXCEPTION - (A_ == Teuchos::null, std::runtime_error, prefix << "The matrix has not been " - "set yet. You must call setMatrix() with a nonnull matrix before you may " - "call this method."); - + if (solver_ == Teuchos::null || changedParams_) { + TEUCHOS_TEST_FOR_EXCEPTION(A_ == Teuchos::null, std::runtime_error, prefix << "The matrix has not been " + "set yet. You must call setMatrix() with a nonnull matrix before you may " + "call this method."); + // TODO: We should not have to cast away the constness here // TODO: See bug 6462 - if(params_ != Teuchos::null) - solver_ = CreateTpetraPreconditioner(rcp_const_cast >(A_), *params_); + if (params_ != Teuchos::null) + solver_ = CreateTpetraPreconditioner(rcp_const_cast >(A_), *params_); else - solver_ = CreateTpetraPreconditioner(rcp_const_cast >(A_)); - } - else if(changedA_) - { - TEUCHOS_TEST_FOR_EXCEPTION - (A_ == Teuchos::null, std::runtime_error, prefix << "The matrix has not been " - "set yet. You must call setMatrix() with a nonnull matrix before you may " - "call this method."); - + solver_ = CreateTpetraPreconditioner(rcp_const_cast >(A_)); + } else if (changedA_) { + TEUCHOS_TEST_FOR_EXCEPTION(A_ == Teuchos::null, std::runtime_error, prefix << "The matrix has not been " + "set yet. You must call setMatrix() with a nonnull matrix before you may " + "call this method."); + // TODO: We should not have to cast away the constness here // TODO: See bug 6462 - RCP > helperMat; - helperMat = rcp_dynamic_cast >(A_); - TEUCHOS_TEST_FOR_EXCEPTION - (helperMat.is_null(), std::runtime_error, prefix << "MueLu requires " - "a Tpetra::CrsMatrix, but the matrix you provided is of a " - "different type. Please provide a Tpetra::CrsMatrix instead."); - ReuseTpetraPreconditioner(rcp_const_cast >(helperMat), *solver_); + RCP > helperMat; + helperMat = rcp_dynamic_cast >(A_); + TEUCHOS_TEST_FOR_EXCEPTION(helperMat.is_null(), std::runtime_error, prefix << "MueLu requires " + "a Tpetra::CrsMatrix, but the matrix you provided is of a " + "different type. Please provide a Tpetra::CrsMatrix instead."); + ReuseTpetraPreconditioner(rcp_const_cast >(helperMat), *solver_); } - - changedA_ = false; + + changedA_ = false; changedParams_ = false; } //! Implementation of Teuchos::Describable::description. - std::string description () const - { + std::string description() const { using Teuchos::TypeNameTraits; if (solver_.is_null()) { std::ostringstream os; os << "\"MueLu::Details::LinearSolver\": {" - << "MV: " << TypeNameTraits >::name() - << "OP: " << TypeNameTraits >::name() + << "MV: " << TypeNameTraits >::name() + << "OP: " << TypeNameTraits >::name() << "NormType: " << TypeNameTraits::magnitudeType>::name() << "}"; - return os.str (); - } - else { - return solver_->GetHierarchy()->description (); + return os.str(); + } else { + return solver_->GetHierarchy()->description(); } } //! Implementation of Teuchos::Describable::describe. void - describe (Teuchos::FancyOStream& out, - const Teuchos::EVerbosityLevel verbLevel = - Teuchos::Describable::verbLevel_default) const - { - using Teuchos::TypeNameTraits; + describe(Teuchos::FancyOStream& out, + const Teuchos::EVerbosityLevel verbLevel = + Teuchos::Describable::verbLevel_default) const { using std::endl; + using Teuchos::TypeNameTraits; if (solver_.is_null()) { - if(verbLevel > Teuchos::VERB_NONE) { - Teuchos::OSTab tab0 (out); + if (verbLevel > Teuchos::VERB_NONE) { + Teuchos::OSTab tab0(out); out << "\"MueLu::Details::LinearSolver\":" << endl; - Teuchos::OSTab tab1 (out); - out << "MV: " << TypeNameTraits >::name() << endl - << "OP: " << TypeNameTraits >::name() << endl + Teuchos::OSTab tab1(out); + out << "MV: " << TypeNameTraits >::name() << endl + << "OP: " << TypeNameTraits >::name() << endl << "NormType: " << TypeNameTraits::magnitudeType>::name() << endl; } - } - else { - solver_->GetHierarchy()->describe (out, verbLevel); + } else { + solver_->GetHierarchy()->describe(out, verbLevel); } } -private: - Teuchos::RCP > A_; + private: + Teuchos::RCP > A_; Teuchos::RCP params_; - Teuchos::RCP > solver_; + Teuchos::RCP > solver_; bool changedA_; bool changedParams_; }; -template +template Teuchos::RCP > LinearSolverFactory:: -getLinearSolver (const std::string& solverName) -{ + getLinearSolver(const std::string& solverName) { using Teuchos::rcp; - return rcp (new MueLu::Details::LinearSolver ()); + return rcp(new MueLu::Details::LinearSolver()); } -template -void -LinearSolverFactory:: -registerLinearSolverFactory () -{ +template +void LinearSolverFactory:: + registerLinearSolverFactory() { #ifdef HAVE_TEUCHOSCORE_CXX11 typedef std::shared_ptr > ptr_type; - //typedef std::shared_ptr > base_ptr_type; + // typedef std::shared_ptr > base_ptr_type; #else typedef Teuchos::RCP > ptr_type; - //typedef Teuchos::RCP > base_ptr_type; -#endif // HAVE_TEUCHOSCORE_CXX11 + // typedef Teuchos::RCP > base_ptr_type; +#endif // HAVE_TEUCHOSCORE_CXX11 - ptr_type factory (new MueLu::Details::LinearSolverFactory ()); - Trilinos::Details::registerLinearSolverFactory ("MueLu", factory); + ptr_type factory(new MueLu::Details::LinearSolverFactory()); + Trilinos::Details::registerLinearSolverFactory("MueLu", factory); } -} // namespace Details -} // namespace MueLu +} // namespace Details +} // namespace MueLu // Macro for doing explicit instantiation of // MueLu::Details::LinearSolverFactory, for Tpetra objects, with @@ -465,9 +415,9 @@ registerLinearSolverFactory () // // We don't have to protect use of Tpetra objects here, or include // any header files for them, because this is a macro definition. -#define MUELU_DETAILS_LINEARSOLVERFACTORY_INSTANT(SC, LO, GO, NT) \ +#define MUELU_DETAILS_LINEARSOLVERFACTORY_INSTANT(SC, LO, GO, NT) \ template class MueLu::Details::LinearSolverFactory, \ - Tpetra::Operator, \ + Tpetra::Operator, \ typename Tpetra::MultiVector::mag_type>; -#endif // MUELU_DETAILS_LINEARSOLVERFACTORY_DEF_HPP +#endif // MUELU_DETAILS_LINEARSOLVERFACTORY_DEF_HPP diff --git a/packages/muelu/adapters/linear_solver_factory/MueLu_Details_registerLinearSolverFactory.cpp b/packages/muelu/adapters/linear_solver_factory/MueLu_Details_registerLinearSolverFactory.cpp index 0c3e228d69e4..de8cd6659de0 100644 --- a/packages/muelu/adapters/linear_solver_factory/MueLu_Details_registerLinearSolverFactory.cpp +++ b/packages/muelu/adapters/linear_solver_factory/MueLu_Details_registerLinearSolverFactory.cpp @@ -46,8 +46,8 @@ #include "Tpetra_MultiVector.hpp" #include "Tpetra_Operator.hpp" #ifdef HAVE_MUELU_EPETRA -# include "Epetra_MultiVector.h" -# include "Epetra_Operator.h" +#include "Epetra_MultiVector.h" +#include "Epetra_Operator.h" #endif #include "TpetraCore_ETIHelperMacros.h" @@ -66,29 +66,27 @@ TPETRA_ETI_MANGLING_TYPEDEFS() // call it LCL_CALL and not LCL_INST. We are just using the macros to // invoke this class method over the set of enabled template // parameters. -#define LCL_CALL( SC, LO, GO, NT ) \ +#define LCL_CALL(SC, LO, GO, NT) \ ::MueLu::Details::LinearSolverFactory, \ - Tpetra::Operator, \ - typename Tpetra::MultiVector::mag_type>::registerLinearSolverFactory (); + Tpetra::Operator, \ + typename Tpetra::MultiVector::mag_type>::registerLinearSolverFactory(); namespace MueLu { namespace Details { -void -registerLinearSolverFactory () -{ +void registerLinearSolverFactory() { // Fill in the body of the function with all the type-specific // run-time registration functions, for registering MueLu's // LinearSolverFactory with Tpetra objects. - TPETRA_INSTANTIATE_SLGN_NO_ORDINAL_SCALAR( LCL_CALL ) - + TPETRA_INSTANTIATE_SLGN_NO_ORDINAL_SCALAR(LCL_CALL) + // If Epetra is enabled in MueLu, also register MueLu's // LinearSolverFactory for Epetra objects. #ifdef HAVE_MUELU_EPETRA ::MueLu::Details::LinearSolverFactory::registerLinearSolverFactory (); -#endif // HAVE_MUELU_EPETRA + Epetra_Operator, double>::registerLinearSolverFactory(); +#endif // HAVE_MUELU_EPETRA } -} // namespace Details -} // namespace MueLu \ No newline at end of file +} // namespace Details +} // namespace MueLu \ No newline at end of file diff --git a/packages/muelu/adapters/linear_solver_factory/MueLu_Details_registerLinearSolverFactory.hpp b/packages/muelu/adapters/linear_solver_factory/MueLu_Details_registerLinearSolverFactory.hpp index 5732cf2f562f..26136f1338de 100644 --- a/packages/muelu/adapters/linear_solver_factory/MueLu_Details_registerLinearSolverFactory.hpp +++ b/packages/muelu/adapters/linear_solver_factory/MueLu_Details_registerLinearSolverFactory.hpp @@ -79,12 +79,12 @@ namespace Details { /// If you need to register MueLu's LinearSolverFactory for a set of /// template parameters that is not enabled, see /// MueLu_Details_LinearSolverFactory.hpp (in this directory). -void registerLinearSolverFactory (); +void registerLinearSolverFactory(); -} // namespace Details -} // namespace MueLu +} // namespace Details +} // namespace MueLu -namespace { // (anonymous) +namespace { // (anonymous) // \class RegisterLinearSolverFactory // \brief Register MueLu's solver factory/ies with the central registry. @@ -99,9 +99,9 @@ namespace { // (anonymous) // __attribute__((constructor)), without actually requiring // the syntax extension.) class RegisterLinearSolverFactory { -public: - RegisterLinearSolverFactory () { - MueLu::Details::registerLinearSolverFactory (); + public: + RegisterLinearSolverFactory() { + MueLu::Details::registerLinearSolverFactory(); } }; @@ -110,6 +110,6 @@ class RegisterLinearSolverFactory { // MueLu::Details::registerLinearSolverFactory(). RegisterLinearSolverFactory registerIt; -} // namespace (anonymous) +} // namespace -#endif // MUELU_DETAILS_REGISTERLINEARSOLVERFACTORY_HPP \ No newline at end of file +#endif // MUELU_DETAILS_REGISTERLINEARSOLVERFACTORY_HPP \ No newline at end of file diff --git a/packages/muelu/adapters/stratimikos/Stratimikos_MueLuHelpers.cpp b/packages/muelu/adapters/stratimikos/Stratimikos_MueLuHelpers.cpp index be6d55d36364..a60ea0d89092 100644 --- a/packages/muelu/adapters/stratimikos/Stratimikos_MueLuHelpers.cpp +++ b/packages/muelu/adapters/stratimikos/Stratimikos_MueLuHelpers.cpp @@ -77,4 +77,4 @@ namespace Stratimikos { #endif #endif -} // namespace Stratimikos +} // namespace Stratimikos diff --git a/packages/muelu/adapters/stratimikos/Stratimikos_MueLuHelpers.hpp b/packages/muelu/adapters/stratimikos/Stratimikos_MueLuHelpers.hpp index 080953dcc678..af6306f1c5ca 100644 --- a/packages/muelu/adapters/stratimikos/Stratimikos_MueLuHelpers.hpp +++ b/packages/muelu/adapters/stratimikos/Stratimikos_MueLuHelpers.hpp @@ -67,71 +67,65 @@ namespace Stratimikos { - template - void enableMueLu(LinearSolverBuilder& builder, const std::string& stratName = "MueLu") - { +template +void enableMueLu(LinearSolverBuilder& builder, const std::string& stratName = "MueLu") { #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA) - const Teuchos::RCP precValidParams = Teuchos::sublist(builder.getValidParameters(), "Preconditioner Types"); + const Teuchos::RCP precValidParams = Teuchos::sublist(builder.getValidParameters(), "Preconditioner Types"); - TEUCHOS_TEST_FOR_EXCEPTION(precValidParams->isParameter(stratName), std::logic_error, - "Stratimikos::enableMueLu cannot add \"" + stratName +"\" because it is already included in builder!"); + TEUCHOS_TEST_FOR_EXCEPTION(precValidParams->isParameter(stratName), std::logic_error, + "Stratimikos::enableMueLu cannot add \"" + stratName + "\" because it is already included in builder!"); - typedef Thyra::PreconditionerFactoryBase Base; - typedef Thyra::MueLuPreconditionerFactory Impl; + typedef Thyra::PreconditionerFactoryBase Base; + typedef Thyra::MueLuPreconditionerFactory Impl; - builder.setPreconditioningStrategyFactory(Teuchos::abstractFactoryStd(), stratName); + builder.setPreconditioningStrategyFactory(Teuchos::abstractFactoryStd(), stratName); #endif - } +} - template - MUELU_DEPRECATED void enableMueLu(LinearSolverBuilder& builder, const std::string& stratName = "MueLu") - { - enableMueLu(builder,stratName); - } +template +MUELU_DEPRECATED void enableMueLu(LinearSolverBuilder& builder, const std::string& stratName = "MueLu") { + enableMueLu(builder, stratName); +} - template - void enableMueLuRefMaxwell(LinearSolverBuilder& builder, const std::string& stratName = "MueLuRefMaxwell") - { +template +void enableMueLuRefMaxwell(LinearSolverBuilder& builder, const std::string& stratName = "MueLuRefMaxwell") { #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA) - const Teuchos::RCP precValidParams = Teuchos::sublist(builder.getValidParameters(), "Preconditioner Types"); + const Teuchos::RCP precValidParams = Teuchos::sublist(builder.getValidParameters(), "Preconditioner Types"); - TEUCHOS_TEST_FOR_EXCEPTION(precValidParams->isParameter(stratName), std::logic_error, - "Stratimikos::enableMueLuRefMaxwell cannot add \"" + stratName +"\" because it is already included in builder!"); + TEUCHOS_TEST_FOR_EXCEPTION(precValidParams->isParameter(stratName), std::logic_error, + "Stratimikos::enableMueLuRefMaxwell cannot add \"" + stratName + "\" because it is already included in builder!"); - typedef Thyra::PreconditionerFactoryBase Base; - typedef Thyra::MueLuRefMaxwellPreconditionerFactory Impl; + typedef Thyra::PreconditionerFactoryBase Base; + typedef Thyra::MueLuRefMaxwellPreconditionerFactory Impl; - builder.setPreconditioningStrategyFactory(Teuchos::abstractFactoryStd(), stratName); + builder.setPreconditioningStrategyFactory(Teuchos::abstractFactoryStd(), stratName); #endif - } +} - template - MUELU_DEPRECATED void enableMueLuRefMaxwell(LinearSolverBuilder& builder, const std::string& stratName = "MueLuRefMaxwell") - { - enableMueLuRefMaxwell(builder,stratName); - } +template +MUELU_DEPRECATED void enableMueLuRefMaxwell(LinearSolverBuilder& builder, const std::string& stratName = "MueLuRefMaxwell") { + enableMueLuRefMaxwell(builder, stratName); +} - template - void enableMueLuMaxwell1(LinearSolverBuilder& builder, const std::string& stratName = "MueLuMaxwell1") - { +template +void enableMueLuMaxwell1(LinearSolverBuilder& builder, const std::string& stratName = "MueLuMaxwell1") { #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA) - const Teuchos::RCP precValidParams = Teuchos::sublist(builder.getValidParameters(), "Preconditioner Types"); + const Teuchos::RCP precValidParams = Teuchos::sublist(builder.getValidParameters(), "Preconditioner Types"); - TEUCHOS_TEST_FOR_EXCEPTION(precValidParams->isParameter(stratName), std::logic_error, - "Stratimikos::enableMueLuRefMaxwell cannot add \"" + stratName +"\" because it is already included in builder!"); + TEUCHOS_TEST_FOR_EXCEPTION(precValidParams->isParameter(stratName), std::logic_error, + "Stratimikos::enableMueLuRefMaxwell cannot add \"" + stratName + "\" because it is already included in builder!"); - typedef Thyra::PreconditionerFactoryBase Base; - typedef Thyra::MueLuMaxwell1PreconditionerFactory Impl; + typedef Thyra::PreconditionerFactoryBase Base; + typedef Thyra::MueLuMaxwell1PreconditionerFactory Impl; - builder.setPreconditioningStrategyFactory(Teuchos::abstractFactoryStd(), stratName); + builder.setPreconditioningStrategyFactory(Teuchos::abstractFactoryStd(), stratName); #endif - } +} - template - MUELU_DEPRECATED void enableMueLuMaxwell1(LinearSolverBuilder& builder, const std::string& stratName = "MueLuMaxwell1") - { - enableMueLuMaxwell1(builder,stratName); - } +template +MUELU_DEPRECATED void enableMueLuMaxwell1(LinearSolverBuilder& builder, const std::string& stratName = "MueLuMaxwell1") { + enableMueLuMaxwell1(builder, stratName); +} #if defined(HAVE_MUELU_EXPERIMENTAL) && defined(HAVE_MUELU_TEKO) #if 0 @@ -139,26 +133,25 @@ namespace Stratimikos { void enableMueLuTpetraQ2Q1(DefaultLinearSolverBuilder &builder, const std::string &stratName = "MueLu"); #endif - template - void enableMueLuTpetraQ2Q1(LinearSolverBuilder& builder, const std::string &stratName = "MueLu") { - const Teuchos::RCP precValidParams = Teuchos::sublist(builder.getValidParameters(), "Preconditioner Types"); +template +void enableMueLuTpetraQ2Q1(LinearSolverBuilder& builder, const std::string& stratName = "MueLu") { + const Teuchos::RCP precValidParams = Teuchos::sublist(builder.getValidParameters(), "Preconditioner Types"); - TEUCHOS_TEST_FOR_EXCEPTION(precValidParams->isParameter(stratName), std::logic_error, - "Stratimikos::enableMueLuTpetraQ2Q1 cannot add \"" + stratName +"\" because it is already included in builder!"); + TEUCHOS_TEST_FOR_EXCEPTION(precValidParams->isParameter(stratName), std::logic_error, + "Stratimikos::enableMueLuTpetraQ2Q1 cannot add \"" + stratName + "\" because it is already included in builder!"); - typedef Thyra::PreconditionerFactoryBase Base; - typedef Thyra::MueLuTpetraQ2Q1PreconditionerFactory Impl; + typedef Thyra::PreconditionerFactoryBase Base; + typedef Thyra::MueLuTpetraQ2Q1PreconditionerFactory Impl; - builder.setPreconditioningStrategyFactory(Teuchos::abstractFactoryStd(), stratName); - } + builder.setPreconditioningStrategyFactory(Teuchos::abstractFactoryStd(), stratName); +} - template - MUELU_DEPRECATED void enableMueLuTpetraQ2Q1(LinearSolverBuilder& builder, const std::string& stratName = "MueLu") - { - enableMueLuTpetraQ2Q1(builder,stratName); - } +template +MUELU_DEPRECATED void enableMueLuTpetraQ2Q1(LinearSolverBuilder& builder, const std::string& stratName = "MueLu") { + enableMueLuTpetraQ2Q1(builder, stratName); +} #endif -} // namespace Stratimikos +} // namespace Stratimikos #endif diff --git a/packages/muelu/adapters/stratimikos/Thyra_MueLuMaxwell1PreconditionerFactory_decl.hpp b/packages/muelu/adapters/stratimikos/Thyra_MueLuMaxwell1PreconditionerFactory_decl.hpp index 7ba561cd8c68..139244813cbc 100644 --- a/packages/muelu/adapters/stratimikos/Thyra_MueLuMaxwell1PreconditionerFactory_decl.hpp +++ b/packages/muelu/adapters/stratimikos/Thyra_MueLuMaxwell1PreconditionerFactory_decl.hpp @@ -57,77 +57,71 @@ #include "Thyra_XpetraLinearOp.hpp" #include - namespace Thyra { - /** @brief Concrete preconditioner factory subclass for Thyra based on MueLu. - @ingroup MueLuAdapters - Add support for MueLu's Maxwell1 preconditioner in Thyra. - */ - template - class MueLuMaxwell1PreconditionerFactory : public PreconditionerFactoryBase { - public: - - /** @name Constructors/initializers/accessors */ - //@{ - - /** \brief . */ - MueLuMaxwell1PreconditionerFactory(); - //@} - - /** @name Overridden from PreconditionerFactoryBase */ - //@{ - - /** \brief . */ - bool isCompatible(const LinearOpSourceBase& fwdOp) const; - /** \brief . */ - Teuchos::RCP > createPrec() const; - /** \brief . */ - void initializePrec(const Teuchos::RCP >& fwdOp, - PreconditionerBase* prec, - const ESupportSolveUse supportSolveUse - ) const; - /** \brief . */ - void uninitializePrec(PreconditionerBase* prec, - Teuchos::RCP >* fwdOp, - ESupportSolveUse* supportSolveUse - ) const; - - //@} - - /** @name Overridden from Teuchos::ParameterListAcceptor */ - //@{ - - /** \brief . */ - void setParameterList(const Teuchos::RCP& paramList); - /** \brief . */ - Teuchos::RCP unsetParameterList(); - /** \brief . */ - Teuchos::RCP getNonconstParameterList(); - /** \brief . */ - Teuchos::RCP getParameterList() const; - /** \brief . */ - Teuchos::RCP getValidParameters() const; - //@} - - /** \name Public functions overridden from Describable. */ - //@{ - - /** \brief . */ - std::string description() const; - - // ToDo: Add an override of describe(...) to give more detail! - - //@} - - private: - - Teuchos::RCP paramList_; - - }; - -} // namespace Thyra - -#endif // #ifdef HAVE_MUELU_STRATIMIKOS - -#endif // THYRA_MUELU_MAXWELL1_PRECONDITIONER_FACTORY_DECL_HPP +/** @brief Concrete preconditioner factory subclass for Thyra based on MueLu. + @ingroup MueLuAdapters + Add support for MueLu's Maxwell1 preconditioner in Thyra. +*/ +template +class MueLuMaxwell1PreconditionerFactory : public PreconditionerFactoryBase { + public: + /** @name Constructors/initializers/accessors */ + //@{ + + /** \brief . */ + MueLuMaxwell1PreconditionerFactory(); + //@} + + /** @name Overridden from PreconditionerFactoryBase */ + //@{ + + /** \brief . */ + bool isCompatible(const LinearOpSourceBase& fwdOp) const; + /** \brief . */ + Teuchos::RCP > createPrec() const; + /** \brief . */ + void initializePrec(const Teuchos::RCP >& fwdOp, + PreconditionerBase* prec, + const ESupportSolveUse supportSolveUse) const; + /** \brief . */ + void uninitializePrec(PreconditionerBase* prec, + Teuchos::RCP >* fwdOp, + ESupportSolveUse* supportSolveUse) const; + + //@} + + /** @name Overridden from Teuchos::ParameterListAcceptor */ + //@{ + + /** \brief . */ + void setParameterList(const Teuchos::RCP& paramList); + /** \brief . */ + Teuchos::RCP unsetParameterList(); + /** \brief . */ + Teuchos::RCP getNonconstParameterList(); + /** \brief . */ + Teuchos::RCP getParameterList() const; + /** \brief . */ + Teuchos::RCP getValidParameters() const; + //@} + + /** \name Public functions overridden from Describable. */ + //@{ + + /** \brief . */ + std::string description() const; + + // ToDo: Add an override of describe(...) to give more detail! + + //@} + + private: + Teuchos::RCP paramList_; +}; + +} // namespace Thyra + +#endif // #ifdef HAVE_MUELU_STRATIMIKOS + +#endif // THYRA_MUELU_MAXWELL1_PRECONDITIONER_FACTORY_DECL_HPP diff --git a/packages/muelu/adapters/stratimikos/Thyra_MueLuMaxwell1PreconditionerFactory_def.hpp b/packages/muelu/adapters/stratimikos/Thyra_MueLuMaxwell1PreconditionerFactory_def.hpp index 2fd17754fff1..1476bfc3e5d5 100644 --- a/packages/muelu/adapters/stratimikos/Thyra_MueLuMaxwell1PreconditionerFactory_def.hpp +++ b/packages/muelu/adapters/stratimikos/Thyra_MueLuMaxwell1PreconditionerFactory_def.hpp @@ -55,276 +55,268 @@ #include #include - #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA) // This is not as general as possible, but should be good enough for most builds. -#if((defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT) && !defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && !defined(HAVE_TPETRA_INST_COMPLEX_FLOAT)) || \ - (!defined(HAVE_TPETRA_INST_DOUBLE) && !defined(HAVE_TPETRA_INST_FLOAT) && defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && defined(HAVE_TPETRA_INST_COMPLEX_FLOAT)) || \ - (defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT) && defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && defined(HAVE_TPETRA_INST_COMPLEX_FLOAT))) -# define MUELU_CAN_USE_MIXED_PRECISION +#if ((defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT) && !defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && !defined(HAVE_TPETRA_INST_COMPLEX_FLOAT)) || \ + (!defined(HAVE_TPETRA_INST_DOUBLE) && !defined(HAVE_TPETRA_INST_FLOAT) && defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && defined(HAVE_TPETRA_INST_COMPLEX_FLOAT)) || \ + (defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT) && defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && defined(HAVE_TPETRA_INST_COMPLEX_FLOAT))) +#define MUELU_CAN_USE_MIXED_PRECISION #endif namespace Thyra { - using Teuchos::RCP; - using Teuchos::rcp; - using Teuchos::ParameterList; - using Teuchos::rcp_dynamic_cast; - using Teuchos::rcp_const_cast; +using Teuchos::ParameterList; +using Teuchos::RCP; +using Teuchos::rcp; +using Teuchos::rcp_const_cast; +using Teuchos::rcp_dynamic_cast; - // Constructors/initializers/accessors +// Constructors/initializers/accessors - template - MueLuMaxwell1PreconditionerFactory::MueLuMaxwell1PreconditionerFactory() : - paramList_(rcp(new ParameterList())) - {} +template +MueLuMaxwell1PreconditionerFactory::MueLuMaxwell1PreconditionerFactory() + : paramList_(rcp(new ParameterList())) {} - // Overridden from PreconditionerFactoryBase +// Overridden from PreconditionerFactoryBase - template - bool MueLuMaxwell1PreconditionerFactory::isCompatible(const LinearOpSourceBase& fwdOpSrc) const { - const RCP > fwdOp = fwdOpSrc.getOp(); +template +bool MueLuMaxwell1PreconditionerFactory::isCompatible(const LinearOpSourceBase& fwdOpSrc) const { + const RCP> fwdOp = fwdOpSrc.getOp(); - if (Xpetra::ThyraUtils::isTpetra(fwdOp)) return true; + if (Xpetra::ThyraUtils::isTpetra(fwdOp)) return true; #ifdef HAVE_MUELU_EPETRA - if (Xpetra::ThyraUtils::isEpetra(fwdOp)) return true; + if (Xpetra::ThyraUtils::isEpetra(fwdOp)) return true; #endif - return false; - } - - - template - RCP > MueLuMaxwell1PreconditionerFactory::createPrec() const { - return Teuchos::rcp(new DefaultPreconditioner); - } - - template - void MueLuMaxwell1PreconditionerFactory:: - initializePrec(const RCP >& fwdOpSrc, PreconditionerBase* prec, const ESupportSolveUse supportSolveUse) const { - - // we are using typedefs here, since we are using objects from different packages (Xpetra, Thyra,...) - typedef Xpetra::Operator XpOp; - typedef Xpetra::ThyraUtils XpThyUtils; - typedef Xpetra::Matrix XpMat; - typedef Thyra::LinearOpBase ThyLinOpBase; - typedef Thyra::XpetraLinearOp ThyXpOp; + return false; +} + +template +RCP> MueLuMaxwell1PreconditionerFactory::createPrec() const { + return Teuchos::rcp(new DefaultPreconditioner); +} + +template +void MueLuMaxwell1PreconditionerFactory:: + initializePrec(const RCP>& fwdOpSrc, PreconditionerBase* prec, const ESupportSolveUse supportSolveUse) const { + // we are using typedefs here, since we are using objects from different packages (Xpetra, Thyra,...) + typedef Xpetra::Operator XpOp; + typedef Xpetra::ThyraUtils XpThyUtils; + typedef Xpetra::Matrix XpMat; + typedef Thyra::LinearOpBase ThyLinOpBase; + typedef Thyra::XpetraLinearOp ThyXpOp; #if defined(MUELU_CAN_USE_MIXED_PRECISION) - typedef Xpetra::TpetraHalfPrecisionOperator XpHalfPrecOp; - typedef Xpetra::MultiVector XpMV; - typedef typename XpHalfPrecOp::HalfScalar HalfScalar; - typedef typename Teuchos::ScalarTraits::magnitudeType Magnitude; - typedef typename Teuchos::ScalarTraits::halfPrecision HalfMagnitude; - typedef Xpetra::MultiVector XphMV; - typedef Xpetra::MultiVector XpmMV; - typedef Xpetra::MultiVector XphmMV; - typedef Xpetra::Matrix XphMat; + typedef Xpetra::TpetraHalfPrecisionOperator XpHalfPrecOp; + typedef Xpetra::MultiVector XpMV; + typedef typename XpHalfPrecOp::HalfScalar HalfScalar; + typedef typename Teuchos::ScalarTraits::magnitudeType Magnitude; + typedef typename Teuchos::ScalarTraits::halfPrecision HalfMagnitude; + typedef Xpetra::MultiVector XphMV; + typedef Xpetra::MultiVector XpmMV; + typedef Xpetra::MultiVector XphmMV; + typedef Xpetra::Matrix XphMat; #endif - Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("ThyraMueLuMaxwell1::initializePrec"))); - - // Check precondition - TEUCHOS_ASSERT(Teuchos::nonnull(fwdOpSrc)); - TEUCHOS_ASSERT(this->isCompatible(*fwdOpSrc)); - TEUCHOS_ASSERT(prec); - - // Create a copy, as we may remove some things from the list - ParameterList paramList = *paramList_; - - // Retrieve wrapped concrete Xpetra matrix from FwdOp - const RCP fwdOp = fwdOpSrc->getOp(); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(fwdOp)); - - // Check whether it is Epetra/Tpetra - bool bIsEpetra = XpThyUtils::isEpetra(fwdOp); - bool bIsTpetra = XpThyUtils::isTpetra(fwdOp); - TEUCHOS_TEST_FOR_EXCEPT((bIsEpetra == true && bIsTpetra == true)); - - // wrap the forward operator as an Xpetra::Matrix that MueLu can work with - // MueLu needs a non-const object as input - RCP A = XpThyUtils::toXpetra(Teuchos::rcp_const_cast(fwdOp)); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(A)); - - // Retrieve concrete preconditioner object - const Teuchos::Ptr > defaultPrec = Teuchos::ptr(dynamic_cast *>(prec)); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(defaultPrec)); - - // extract preconditioner operator - RCP thyra_precOp = Teuchos::null; - thyra_precOp = rcp_dynamic_cast >(defaultPrec->getNonconstUnspecifiedPrecOp(), true); - - // make a decision whether to (re)build the multigrid preconditioner or reuse the old one - // rebuild preconditioner if startingOver == true - // reuse preconditioner if startingOver == false - const bool startingOver = (thyra_precOp.is_null() || !paramList.isParameter("Maxwell1: enable reuse") || !paramList.get("Maxwell1: enable reuse")); - const bool useHalfPrecision = paramList.get("half precision", false) && bIsTpetra; - - RCP xpPrecOp; - if (startingOver == true) { - - // Convert to Xpetra - std::list convertXpetra = {"Coordinates", "Nullspace", "Kn", "D0"}; - for (auto it = convertXpetra.begin(); it != convertXpetra.end(); ++it) - Converters::replaceWithXpetra(paramList,*it); - - std::list sublists = {"maxwell1: 11list", "maxwell1: 22list"}; - for (auto itSublist = sublists.begin(); itSublist != sublists.end(); ++itSublist) + Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("ThyraMueLuMaxwell1::initializePrec"))); + + // Check precondition + TEUCHOS_ASSERT(Teuchos::nonnull(fwdOpSrc)); + TEUCHOS_ASSERT(this->isCompatible(*fwdOpSrc)); + TEUCHOS_ASSERT(prec); + + // Create a copy, as we may remove some things from the list + ParameterList paramList = *paramList_; + + // Retrieve wrapped concrete Xpetra matrix from FwdOp + const RCP fwdOp = fwdOpSrc->getOp(); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(fwdOp)); + + // Check whether it is Epetra/Tpetra + bool bIsEpetra = XpThyUtils::isEpetra(fwdOp); + bool bIsTpetra = XpThyUtils::isTpetra(fwdOp); + TEUCHOS_TEST_FOR_EXCEPT((bIsEpetra == true && bIsTpetra == true)); + + // wrap the forward operator as an Xpetra::Matrix that MueLu can work with + // MueLu needs a non-const object as input + RCP A = XpThyUtils::toXpetra(Teuchos::rcp_const_cast(fwdOp)); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(A)); + + // Retrieve concrete preconditioner object + const Teuchos::Ptr> defaultPrec = Teuchos::ptr(dynamic_cast*>(prec)); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(defaultPrec)); + + // extract preconditioner operator + RCP thyra_precOp = Teuchos::null; + thyra_precOp = rcp_dynamic_cast>(defaultPrec->getNonconstUnspecifiedPrecOp(), true); + + // make a decision whether to (re)build the multigrid preconditioner or reuse the old one + // rebuild preconditioner if startingOver == true + // reuse preconditioner if startingOver == false + const bool startingOver = (thyra_precOp.is_null() || !paramList.isParameter("Maxwell1: enable reuse") || !paramList.get("Maxwell1: enable reuse")); + const bool useHalfPrecision = paramList.get("half precision", false) && bIsTpetra; + + RCP xpPrecOp; + if (startingOver == true) { + // Convert to Xpetra + std::list convertXpetra = {"Coordinates", "Nullspace", "Kn", "D0"}; + for (auto it = convertXpetra.begin(); it != convertXpetra.end(); ++it) + Converters::replaceWithXpetra(paramList, *it); + + std::list sublists = {"maxwell1: 11list", "maxwell1: 22list"}; + for (auto itSublist = sublists.begin(); itSublist != sublists.end(); ++itSublist) if (paramList.isSublist(*itSublist)) { ParameterList& sublist = paramList.sublist(*itSublist); - for (int lvlNo=0; lvlNo < 10; ++lvlNo) { + for (int lvlNo = 0; lvlNo < 10; ++lvlNo) { if (sublist.isSublist("level " + std::to_string(lvlNo) + " user data")) { ParameterList& lvlList = sublist.sublist("level " + std::to_string(lvlNo) + " user data"); std::list convertKeys; for (auto it = lvlList.begin(); it != lvlList.end(); ++it) convertKeys.push_back(lvlList.name(it)); for (auto it = convertKeys.begin(); it != convertKeys.end(); ++it) - Converters::replaceWithXpetra(lvlList,*it); + Converters::replaceWithXpetra(lvlList, *it); } } } - ParameterList& sublist = paramList.sublist("maxwell1: 11list"); - if (sublist.isParameter("D0")) { - Converters::replaceWithXpetra(sublist,"D0"); - } + ParameterList& sublist = paramList.sublist("maxwell1: 11list"); + if (sublist.isParameter("D0")) { + Converters::replaceWithXpetra(sublist, "D0"); + } - paramList.set("Maxwell1: use as preconditioner", true); - if (useHalfPrecision) { + paramList.set("Maxwell1: use as preconditioner", true); + if (useHalfPrecision) { #if defined(MUELU_CAN_USE_MIXED_PRECISION) - // convert to half precision - RCP halfA = Xpetra::convertToHalfPrecision(A); - if (paramList.isType >("Coordinates")) { - RCP coords = paramList.get >("Coordinates"); - paramList.remove("Coordinates"); - RCP halfCoords = Xpetra::convertToHalfPrecision(coords); - paramList.set("Coordinates",halfCoords); - } - if (paramList.isType >("Nullspace")) { - RCP nullspace = paramList.get >("Nullspace"); - paramList.remove("Nullspace"); - RCP halfNullspace = Xpetra::convertToHalfPrecision(nullspace); - paramList.set("Nullspace",halfNullspace); - } - std::list convertMat = {"Kn", "D0"}; - for (auto it = convertMat.begin(); it != convertMat.end(); ++it) { - if (paramList.isType >(*it)) { - RCP M = paramList.get >(*it); - paramList.remove(*it); - RCP halfM = Xpetra::convertToHalfPrecision(M); - paramList.set(*it,halfM); - } + // convert to half precision + RCP halfA = Xpetra::convertToHalfPrecision(A); + if (paramList.isType>("Coordinates")) { + RCP coords = paramList.get>("Coordinates"); + paramList.remove("Coordinates"); + RCP halfCoords = Xpetra::convertToHalfPrecision(coords); + paramList.set("Coordinates", halfCoords); + } + if (paramList.isType>("Nullspace")) { + RCP nullspace = paramList.get>("Nullspace"); + paramList.remove("Nullspace"); + RCP halfNullspace = Xpetra::convertToHalfPrecision(nullspace); + paramList.set("Nullspace", halfNullspace); + } + std::list convertMat = {"Kn", "D0"}; + for (auto it = convertMat.begin(); it != convertMat.end(); ++it) { + if (paramList.isType>(*it)) { + RCP M = paramList.get>(*it); + paramList.remove(*it); + RCP halfM = Xpetra::convertToHalfPrecision(M); + paramList.set(*it, halfM); } + } - // build a new half-precision MueLu Maxwell1 preconditioner - RCP > halfPrec = rcp(new MueLu::Maxwell1(halfA, paramList, true)); - xpPrecOp = rcp(new XpHalfPrecOp(halfPrec)); + // build a new half-precision MueLu Maxwell1 preconditioner + RCP> halfPrec = rcp(new MueLu::Maxwell1(halfA, paramList, true)); + xpPrecOp = rcp(new XpHalfPrecOp(halfPrec)); #else - TEUCHOS_TEST_FOR_EXCEPT(true); + TEUCHOS_TEST_FOR_EXCEPT(true); #endif - } else - { - // build a new MueLu Maxwell1 preconditioner - RCP > preconditioner = rcp(new MueLu::Maxwell1(A, paramList, true)); - xpPrecOp = rcp_dynamic_cast(preconditioner); - } } else { - // reuse old MueLu preconditioner stored in MueLu Xpetra operator and put in new matrix + // build a new MueLu Maxwell1 preconditioner + RCP> preconditioner = rcp(new MueLu::Maxwell1(A, paramList, true)); + xpPrecOp = rcp_dynamic_cast(preconditioner); + } + } else { + // reuse old MueLu preconditioner stored in MueLu Xpetra operator and put in new matrix - RCP thyXpOp = rcp_dynamic_cast(thyra_precOp, true); - RCP xpOp = thyXpOp->getXpetraOperator(); + RCP thyXpOp = rcp_dynamic_cast(thyra_precOp, true); + RCP xpOp = thyXpOp->getXpetraOperator(); #if defined(MUELU_CAN_USE_MIXED_PRECISION) - RCP xpHalfPrecOp = rcp_dynamic_cast(xpOp); - if (!xpHalfPrecOp.is_null()) { - RCP > preconditioner = rcp_dynamic_cast>(xpHalfPrecOp->GetHalfPrecisionOperator(), true); - RCP halfA = Xpetra::convertToHalfPrecision(A); - preconditioner->resetMatrix(halfA); - xpPrecOp = rcp_dynamic_cast(preconditioner); - } else + RCP xpHalfPrecOp = rcp_dynamic_cast(xpOp); + if (!xpHalfPrecOp.is_null()) { + RCP> preconditioner = rcp_dynamic_cast>(xpHalfPrecOp->GetHalfPrecisionOperator(), true); + RCP halfA = Xpetra::convertToHalfPrecision(A); + preconditioner->resetMatrix(halfA); + xpPrecOp = rcp_dynamic_cast(preconditioner); + } else #endif - { - RCP > preconditioner = rcp_dynamic_cast>(xpOp, true); - preconditioner->resetMatrix(A); - xpPrecOp = rcp_dynamic_cast(preconditioner); - } + { + RCP> preconditioner = rcp_dynamic_cast>(xpOp, true); + preconditioner->resetMatrix(A); + xpPrecOp = rcp_dynamic_cast(preconditioner); } - - // wrap preconditioner in thyraPrecOp - RCP > thyraRangeSpace = Xpetra::ThyraUtils::toThyra(xpPrecOp->getRangeMap()); - RCP > thyraDomainSpace = Xpetra::ThyraUtils::toThyra(xpPrecOp->getDomainMap()); - - RCP thyraPrecOp = Thyra::xpetraLinearOp(thyraRangeSpace, thyraDomainSpace, xpPrecOp); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(thyraPrecOp)); - - defaultPrec->initializeUnspecified(thyraPrecOp); - - } - - template - void MueLuMaxwell1PreconditionerFactory:: - uninitializePrec(PreconditionerBase* prec, RCP >* fwdOp, ESupportSolveUse* supportSolveUse) const { - TEUCHOS_ASSERT(prec); - - // Retrieve concrete preconditioner object - const Teuchos::Ptr > defaultPrec = Teuchos::ptr(dynamic_cast *>(prec)); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(defaultPrec)); - - if (fwdOp) { - // TODO: Implement properly instead of returning default value - *fwdOp = Teuchos::null; - } - - if (supportSolveUse) { - // TODO: Implement properly instead of returning default value - *supportSolveUse = Thyra::SUPPORT_SOLVE_UNSPECIFIED; - } - - defaultPrec->uninitialize(); } + // wrap preconditioner in thyraPrecOp + RCP> thyraRangeSpace = Xpetra::ThyraUtils::toThyra(xpPrecOp->getRangeMap()); + RCP> thyraDomainSpace = Xpetra::ThyraUtils::toThyra(xpPrecOp->getDomainMap()); - // Overridden from ParameterListAcceptor - template - void MueLuMaxwell1PreconditionerFactory::setParameterList(RCP const& paramList) { - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(paramList)); - paramList_ = paramList; - } + RCP thyraPrecOp = Thyra::xpetraLinearOp(thyraRangeSpace, thyraDomainSpace, xpPrecOp); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(thyraPrecOp)); - template - RCP MueLuMaxwell1PreconditionerFactory::getNonconstParameterList() { - return paramList_; - } + defaultPrec->initializeUnspecified(thyraPrecOp); +} - template - RCP MueLuMaxwell1PreconditionerFactory::unsetParameterList() { - RCP savedParamList = paramList_; - paramList_ = Teuchos::null; - return savedParamList; - } +template +void MueLuMaxwell1PreconditionerFactory:: + uninitializePrec(PreconditionerBase* prec, RCP>* fwdOp, ESupportSolveUse* supportSolveUse) const { + TEUCHOS_ASSERT(prec); - template - RCP MueLuMaxwell1PreconditionerFactory::getParameterList() const { - return paramList_; - } - - template - RCP MueLuMaxwell1PreconditionerFactory::getValidParameters() const { - static RCP validPL; + // Retrieve concrete preconditioner object + const Teuchos::Ptr> defaultPrec = Teuchos::ptr(dynamic_cast*>(prec)); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(defaultPrec)); - if (Teuchos::is_null(validPL)) - validPL = rcp(new ParameterList()); - - return validPL; + if (fwdOp) { + // TODO: Implement properly instead of returning default value + *fwdOp = Teuchos::null; } - // Public functions overridden from Teuchos::Describable - template - std::string MueLuMaxwell1PreconditionerFactory::description() const { - return "Thyra::MueLuMaxwell1PreconditionerFactory"; + if (supportSolveUse) { + // TODO: Implement properly instead of returning default value + *supportSolveUse = Thyra::SUPPORT_SOLVE_UNSPECIFIED; } -} // namespace Thyra - -#endif // HAVE_MUELU_STRATIMIKOS -#endif // ifdef THYRA_MUELU_MAXWELL1_PRECONDITIONER_FACTORY_DEF_HPP + defaultPrec->uninitialize(); +} + +// Overridden from ParameterListAcceptor +template +void MueLuMaxwell1PreconditionerFactory::setParameterList(RCP const& paramList) { + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(paramList)); + paramList_ = paramList; +} + +template +RCP MueLuMaxwell1PreconditionerFactory::getNonconstParameterList() { + return paramList_; +} + +template +RCP MueLuMaxwell1PreconditionerFactory::unsetParameterList() { + RCP savedParamList = paramList_; + paramList_ = Teuchos::null; + return savedParamList; +} + +template +RCP MueLuMaxwell1PreconditionerFactory::getParameterList() const { + return paramList_; +} + +template +RCP MueLuMaxwell1PreconditionerFactory::getValidParameters() const { + static RCP validPL; + + if (Teuchos::is_null(validPL)) + validPL = rcp(new ParameterList()); + + return validPL; +} + +// Public functions overridden from Teuchos::Describable +template +std::string MueLuMaxwell1PreconditionerFactory::description() const { + return "Thyra::MueLuMaxwell1PreconditionerFactory"; +} +} // namespace Thyra + +#endif // HAVE_MUELU_STRATIMIKOS + +#endif // ifdef THYRA_MUELU_MAXWELL1_PRECONDITIONER_FACTORY_DEF_HPP diff --git a/packages/muelu/adapters/stratimikos/Thyra_MueLuPreconditionerFactory_decl.hpp b/packages/muelu/adapters/stratimikos/Thyra_MueLuPreconditionerFactory_decl.hpp index 31b68376f2fa..4bc13452fc68 100644 --- a/packages/muelu/adapters/stratimikos/Thyra_MueLuPreconditionerFactory_decl.hpp +++ b/packages/muelu/adapters/stratimikos/Thyra_MueLuPreconditionerFactory_decl.hpp @@ -78,7 +78,7 @@ #include #include #include -#include // todo fix me +#include // todo fix me #include #include #include @@ -94,92 +94,87 @@ namespace Thyra { - using Teuchos::RCP; - using Teuchos::rcp; +using Teuchos::RCP; +using Teuchos::rcp; - template - struct Converters { - static bool replaceWithXpetra(ParameterList& paramList, std::string parameterName); - }; +template +struct Converters { + static bool replaceWithXpetra(ParameterList& paramList, std::string parameterName); +}; #ifdef HAVE_MUELU_EPETRA - template - struct Converters { - static bool replaceWithXpetra(ParameterList& paramList, std::string parameterName); - }; +template +struct Converters { + static bool replaceWithXpetra(ParameterList& paramList, std::string parameterName); +}; #endif - /** @brief Concrete preconditioner factory subclass for Thyra based on MueLu. - @ingroup MueLuAdapters - Add support for MueLu preconditioners in Thyra. This class provides an interface both - for Epetra and Tpetra. - */ - template - class MueLuPreconditionerFactory : public PreconditionerFactoryBase { - public: - - /** @name Constructors/initializers/accessors */ - //@{ - - /** \brief . */ - MueLuPreconditionerFactory(); - //@} - - /** @name Overridden from PreconditionerFactoryBase */ - //@{ - - /** \brief . */ - bool isCompatible(const LinearOpSourceBase& fwdOp) const; - /** \brief . */ - Teuchos::RCP > createPrec() const; - /** \brief . */ - void initializePrec(const Teuchos::RCP >& fwdOp, - PreconditionerBase* prec, - const ESupportSolveUse supportSolveUse - ) const; - /** \brief . */ - void uninitializePrec(PreconditionerBase* prec, - Teuchos::RCP >* fwdOp, - ESupportSolveUse* supportSolveUse - ) const; - - //@} - - /** @name Overridden from Teuchos::ParameterListAcceptor */ - //@{ - - /** \brief . */ - void setParameterList(const Teuchos::RCP& paramList); - /** \brief . */ - Teuchos::RCP unsetParameterList(); - /** \brief . */ - Teuchos::RCP getNonconstParameterList(); - /** \brief . */ - Teuchos::RCP getParameterList() const; - /** \brief . */ - Teuchos::RCP getValidParameters() const; - //@} - - /** \name Public functions overridden from Describable. */ - //@{ - - /** \brief . */ - std::string description() const; - - // ToDo: Add an override of describe(...) to give more detail! - - //@} - - private: - - //Teuchos::RCP > CreateXpetraPreconditioner(Teuchos::RCP > op, const Teuchos::ParameterList& paramList, Teuchos::RCP::magnitudeType, LocalOrdinal, GlobalOrdinal, Node> > coords, Teuchos::RCP > nullspace) const; - - Teuchos::RCP paramList_; - - }; - -} // namespace Thyra - -#endif // #ifdef HAVE_MUELU_STRATIMIKOS - -#endif // THYRA_MUELU_PRECONDITIONER_FACTORY_DECL_HPP +/** @brief Concrete preconditioner factory subclass for Thyra based on MueLu. + @ingroup MueLuAdapters + Add support for MueLu preconditioners in Thyra. This class provides an interface both + for Epetra and Tpetra. +*/ +template +class MueLuPreconditionerFactory : public PreconditionerFactoryBase { + public: + /** @name Constructors/initializers/accessors */ + //@{ + + /** \brief . */ + MueLuPreconditionerFactory(); + //@} + + /** @name Overridden from PreconditionerFactoryBase */ + //@{ + + /** \brief . */ + bool isCompatible(const LinearOpSourceBase& fwdOp) const; + /** \brief . */ + Teuchos::RCP > createPrec() const; + /** \brief . */ + void initializePrec(const Teuchos::RCP >& fwdOp, + PreconditionerBase* prec, + const ESupportSolveUse supportSolveUse) const; + /** \brief . */ + void uninitializePrec(PreconditionerBase* prec, + Teuchos::RCP >* fwdOp, + ESupportSolveUse* supportSolveUse) const; + + //@} + + /** @name Overridden from Teuchos::ParameterListAcceptor */ + //@{ + + /** \brief . */ + void setParameterList(const Teuchos::RCP& paramList); + /** \brief . */ + Teuchos::RCP unsetParameterList(); + /** \brief . */ + Teuchos::RCP getNonconstParameterList(); + /** \brief . */ + Teuchos::RCP getParameterList() const; + /** \brief . */ + Teuchos::RCP getValidParameters() const; + //@} + + /** \name Public functions overridden from Describable. */ + //@{ + + /** \brief . */ + std::string description() const; + + // ToDo: Add an override of describe(...) to give more detail! + + //@} + + private: + // Teuchos::RCP > CreateXpetraPreconditioner(Teuchos::RCP > op, const Teuchos::ParameterList& paramList, Teuchos::RCP::magnitudeType, LocalOrdinal, GlobalOrdinal, Node> > coords, Teuchos::RCP > nullspace) const; + + Teuchos::RCP paramList_; +}; + +} // namespace Thyra + +#endif // #ifdef HAVE_MUELU_STRATIMIKOS + +#endif // THYRA_MUELU_PRECONDITIONER_FACTORY_DECL_HPP diff --git a/packages/muelu/adapters/stratimikos/Thyra_MueLuPreconditionerFactory_def.hpp b/packages/muelu/adapters/stratimikos/Thyra_MueLuPreconditionerFactory_def.hpp index 7b541dc6924c..0213059d9e89 100644 --- a/packages/muelu/adapters/stratimikos/Thyra_MueLuPreconditionerFactory_def.hpp +++ b/packages/muelu/adapters/stratimikos/Thyra_MueLuPreconditionerFactory_def.hpp @@ -52,695 +52,675 @@ #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA) // This is not as general as possible, but should be good enough for most builds. -#if((defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT) && !defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && !defined(HAVE_TPETRA_INST_COMPLEX_FLOAT)) || \ - (!defined(HAVE_TPETRA_INST_DOUBLE) && !defined(HAVE_TPETRA_INST_FLOAT) && defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && defined(HAVE_TPETRA_INST_COMPLEX_FLOAT)) || \ - (defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT) && defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && defined(HAVE_TPETRA_INST_COMPLEX_FLOAT))) -# define MUELU_CAN_USE_MIXED_PRECISION +#if ((defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT) && !defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && !defined(HAVE_TPETRA_INST_COMPLEX_FLOAT)) || \ + (!defined(HAVE_TPETRA_INST_DOUBLE) && !defined(HAVE_TPETRA_INST_FLOAT) && defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && defined(HAVE_TPETRA_INST_COMPLEX_FLOAT)) || \ + (defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT) && defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && defined(HAVE_TPETRA_INST_COMPLEX_FLOAT))) +#define MUELU_CAN_USE_MIXED_PRECISION #endif namespace Thyra { - using Teuchos::RCP; - using Teuchos::rcp; - using Teuchos::ParameterList; - using Teuchos::rcp_dynamic_cast; - using Teuchos::rcp_const_cast; - - - template - bool Converters::replaceWithXpetra(ParameterList& paramList, std::string parameterName) { - typedef typename Teuchos::ScalarTraits::magnitudeType Magnitude; - typedef Xpetra::Operator XpOp; - typedef Xpetra::ThyraUtils XpThyUtils; - // typedef Xpetra::CrsMatrixWrap XpCrsMatWrap; - // typedef Xpetra::CrsMatrix XpCrsMat; - typedef Xpetra::Matrix XpMat; - typedef Xpetra::MultiVector XpMultVec; - typedef Xpetra::MultiVector XpMagMultVec; - typedef Xpetra::Vector XpVec; - - typedef Thyra::LinearOpBase ThyLinOpBase; - typedef Thyra::DiagonalLinearOpBase ThyDiagLinOpBase; - // typedef Thyra::XpetraLinearOp ThyXpOp; - // typedef Thyra::SpmdVectorSpaceBase ThyVSBase; - - typedef Tpetra::CrsMatrix TpCrsMat; - typedef Tpetra::Operator tOp; - typedef Tpetra::Vector tV; - typedef Thyra::TpetraVector thyTpV; - typedef Tpetra::MultiVector tMV; - typedef Tpetra::MultiVector tMagMV; +using Teuchos::ParameterList; +using Teuchos::RCP; +using Teuchos::rcp; +using Teuchos::rcp_const_cast; +using Teuchos::rcp_dynamic_cast; + +template +bool Converters::replaceWithXpetra(ParameterList& paramList, std::string parameterName) { + typedef typename Teuchos::ScalarTraits::magnitudeType Magnitude; + typedef Xpetra::Operator XpOp; + typedef Xpetra::ThyraUtils XpThyUtils; + // typedef Xpetra::CrsMatrixWrap XpCrsMatWrap; + // typedef Xpetra::CrsMatrix XpCrsMat; + typedef Xpetra::Matrix XpMat; + typedef Xpetra::MultiVector XpMultVec; + typedef Xpetra::MultiVector XpMagMultVec; + typedef Xpetra::Vector XpVec; + + typedef Thyra::LinearOpBase ThyLinOpBase; + typedef Thyra::DiagonalLinearOpBase ThyDiagLinOpBase; + // typedef Thyra::XpetraLinearOp ThyXpOp; + // typedef Thyra::SpmdVectorSpaceBase ThyVSBase; + + typedef Tpetra::CrsMatrix TpCrsMat; + typedef Tpetra::Operator tOp; + typedef Tpetra::Vector tV; + typedef Thyra::TpetraVector thyTpV; + typedef Tpetra::MultiVector tMV; + typedef Tpetra::MultiVector tMagMV; #if defined(MUELU_CAN_USE_MIXED_PRECISION) - typedef typename Teuchos::ScalarTraits::halfPrecision HalfMagnitude; - typedef Tpetra::MultiVector tHalfMagMV; + typedef typename Teuchos::ScalarTraits::halfPrecision HalfMagnitude; + typedef Tpetra::MultiVector tHalfMagMV; #endif - if (paramList.isParameter(parameterName)) { - if (paramList.isType >(parameterName)) - return true; - else if (paramList.isType >(parameterName)) { - RCP constM = paramList.get >(parameterName); - paramList.remove(parameterName); - RCP M = rcp_const_cast(constM); - paramList.set >(parameterName, M); - return true; - } - else if (paramList.isType >(parameterName)) - return true; - else if (paramList.isType >(parameterName)) { - RCP constX = paramList.get >(parameterName); - paramList.remove(parameterName); - RCP X = rcp_const_cast(constX); - paramList.set >(parameterName, X); - return true; - } - else if (paramList.isType >(parameterName)) - return true; - else if (paramList.isType >(parameterName)) { - RCP constX = paramList.get >(parameterName); - paramList.remove(parameterName); - RCP X = rcp_const_cast(constX); - paramList.set >(parameterName, X); - return true; - } - else if (paramList.isType >(parameterName)) { - RCP tM = paramList.get >(parameterName); - paramList.remove(parameterName); - RCP xM = MueLu::TpetraCrs_To_XpetraMatrix(tM); - paramList.set >(parameterName, xM); - return true; - } else if (paramList.isType >(parameterName)) { - RCP tpetra_X = paramList.get >(parameterName); - paramList.remove(parameterName); - RCP X = MueLu::TpetraMultiVector_To_XpetraMultiVector(tpetra_X); - paramList.set >(parameterName, X); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(X)); - return true; - } else if (paramList.isType >(parameterName)) { - RCP tpetra_X = paramList.get >(parameterName); - paramList.remove(parameterName); - RCP X = MueLu::TpetraMultiVector_To_XpetraMultiVector(tpetra_X); - paramList.set >(parameterName, X); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(X)); - return true; - } + if (paramList.isParameter(parameterName)) { + if (paramList.isType >(parameterName)) + return true; + else if (paramList.isType >(parameterName)) { + RCP constM = paramList.get >(parameterName); + paramList.remove(parameterName); + RCP M = rcp_const_cast(constM); + paramList.set >(parameterName, M); + return true; + } else if (paramList.isType >(parameterName)) + return true; + else if (paramList.isType >(parameterName)) { + RCP constX = paramList.get >(parameterName); + paramList.remove(parameterName); + RCP X = rcp_const_cast(constX); + paramList.set >(parameterName, X); + return true; + } else if (paramList.isType >(parameterName)) + return true; + else if (paramList.isType >(parameterName)) { + RCP constX = paramList.get >(parameterName); + paramList.remove(parameterName); + RCP X = rcp_const_cast(constX); + paramList.set >(parameterName, X); + return true; + } else if (paramList.isType >(parameterName)) { + RCP tM = paramList.get >(parameterName); + paramList.remove(parameterName); + RCP xM = MueLu::TpetraCrs_To_XpetraMatrix(tM); + paramList.set >(parameterName, xM); + return true; + } else if (paramList.isType >(parameterName)) { + RCP tpetra_X = paramList.get >(parameterName); + paramList.remove(parameterName); + RCP X = MueLu::TpetraMultiVector_To_XpetraMultiVector(tpetra_X); + paramList.set >(parameterName, X); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(X)); + return true; + } else if (paramList.isType >(parameterName)) { + RCP tpetra_X = paramList.get >(parameterName); + paramList.remove(parameterName); + RCP X = MueLu::TpetraMultiVector_To_XpetraMultiVector(tpetra_X); + paramList.set >(parameterName, X); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(X)); + return true; + } #if defined(MUELU_CAN_USE_MIXED_PRECISION) - else if (paramList.isType >(parameterName)) { - RCP tpetra_hX = paramList.get >(parameterName); - paramList.remove(parameterName); - RCP tpetra_X = rcp(new tMagMV(tpetra_hX->getMap(),tpetra_hX->getNumVectors())); - Tpetra::deep_copy(*tpetra_X,*tpetra_hX); - RCP X = MueLu::TpetraMultiVector_To_XpetraMultiVector(tpetra_X); - paramList.set >(parameterName, X); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(X)); - return true; - } + else if (paramList.isType >(parameterName)) { + RCP tpetra_hX = paramList.get >(parameterName); + paramList.remove(parameterName); + RCP tpetra_X = rcp(new tMagMV(tpetra_hX->getMap(), tpetra_hX->getNumVectors())); + Tpetra::deep_copy(*tpetra_X, *tpetra_hX); + RCP X = MueLu::TpetraMultiVector_To_XpetraMultiVector(tpetra_X); + paramList.set >(parameterName, X); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(X)); + return true; + } #endif - else if (paramList.isType >(parameterName) || - (paramList.isType >(parameterName) && ! - rcp_dynamic_cast(paramList.get >(parameterName)).is_null())) { - RCP thyM; - if (paramList.isType >(parameterName)) - thyM = paramList.get >(parameterName); - else - thyM = rcp_dynamic_cast(paramList.get >(parameterName), true); - paramList.remove(parameterName); - RCP > diag = thyM->getDiag(); - - RCP xpDiag; - if (!rcp_dynamic_cast(diag).is_null()) { - RCP tDiag = Thyra::TpetraOperatorVectorExtraction::getConstTpetraVector(diag); - if (!tDiag.is_null()) - xpDiag = Xpetra::toXpetra(tDiag); - } - TEUCHOS_ASSERT(!xpDiag.is_null()); - RCP M = Xpetra::MatrixFactory::Build(xpDiag); - paramList.set >(parameterName, M); - return true; + else if (paramList.isType >(parameterName) || + (paramList.isType >(parameterName) && !rcp_dynamic_cast(paramList.get >(parameterName)).is_null())) { + RCP thyM; + if (paramList.isType >(parameterName)) + thyM = paramList.get >(parameterName); + else + thyM = rcp_dynamic_cast(paramList.get >(parameterName), true); + paramList.remove(parameterName); + RCP > diag = thyM->getDiag(); + + RCP xpDiag; + if (!rcp_dynamic_cast(diag).is_null()) { + RCP tDiag = Thyra::TpetraOperatorVectorExtraction::getConstTpetraVector(diag); + if (!tDiag.is_null()) + xpDiag = Xpetra::toXpetra(tDiag); } - else if (paramList.isType >(parameterName)) { - RCP thyM = paramList.get >(parameterName); - paramList.remove(parameterName); - try { - RCP M = XpThyUtils::toXpetra(Teuchos::rcp_const_cast(thyM)); - paramList.set >(parameterName, M); - } catch (std::exception& e) { - RCP M = XpThyUtils::toXpetraOperator(Teuchos::rcp_const_cast(thyM)); - RCP > tpOp = rcp_dynamic_cast >(M, true); - RCP tO = tpOp->getOperator(); - RCP diag; - if (tO->hasDiagonal()) { - diag = rcp(new tV(tO->getRangeMap())); - tO->getLocalDiagCopy(*diag); - } - auto fTpRow = rcp(new MueLu::TpetraOperatorAsRowMatrix(tO, diag)); - RCP > tpFOp = rcp(new Xpetra::TpetraOperator (fTpRow)); - auto op = rcp_dynamic_cast(tpFOp); - paramList.set >(parameterName, op); + TEUCHOS_ASSERT(!xpDiag.is_null()); + RCP M = Xpetra::MatrixFactory::Build(xpDiag); + paramList.set >(parameterName, M); + return true; + } else if (paramList.isType >(parameterName)) { + RCP thyM = paramList.get >(parameterName); + paramList.remove(parameterName); + try { + RCP M = XpThyUtils::toXpetra(Teuchos::rcp_const_cast(thyM)); + paramList.set >(parameterName, M); + } catch (std::exception& e) { + RCP M = XpThyUtils::toXpetraOperator(Teuchos::rcp_const_cast(thyM)); + RCP > tpOp = rcp_dynamic_cast >(M, true); + RCP tO = tpOp->getOperator(); + RCP diag; + if (tO->hasDiagonal()) { + diag = rcp(new tV(tO->getRangeMap())); + tO->getLocalDiagCopy(*diag); } - return true; - } - else { - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "Parameter " << parameterName << " has wrong type."); - return false; + auto fTpRow = rcp(new MueLu::TpetraOperatorAsRowMatrix(tO, diag)); + RCP > tpFOp = rcp(new Xpetra::TpetraOperator(fTpRow)); + auto op = rcp_dynamic_cast(tpFOp); + paramList.set >(parameterName, op); } - } else + return true; + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "Parameter " << parameterName << " has wrong type."); return false; - } - + } + } else + return false; +} #ifdef HAVE_MUELU_EPETRA - template - bool Converters::replaceWithXpetra(ParameterList& paramList, std::string parameterName) { - typedef double Scalar; - typedef int LocalOrdinal; - typedef Tpetra::KokkosCompat::KokkosSerialWrapperNode Node; - typedef typename Teuchos::ScalarTraits::magnitudeType Magnitude; - typedef Xpetra::Operator XpOp; - typedef Xpetra::ThyraUtils XpThyUtils; - typedef Xpetra::CrsMatrixWrap XpCrsMatWrap; - typedef Xpetra::CrsMatrix XpCrsMat; - typedef Xpetra::Matrix XpMat; - typedef Xpetra::MultiVector XpMultVec; - typedef Xpetra::MultiVector XpMagMultVec; - typedef Xpetra::Vector XpVec; - - typedef Thyra::LinearOpBase ThyLinOpBase; - typedef Thyra::DiagonalLinearOpBase ThyDiagLinOpBase; - typedef Thyra::SpmdVectorSpaceBase ThyVSBase; - - typedef Tpetra::CrsMatrix TpCrsMat; - typedef Tpetra::Operator tOp; - typedef Tpetra::Vector tV; - typedef Thyra::TpetraVector thyTpV; - typedef Tpetra::MultiVector tMV; - typedef Tpetra::MultiVector tMagMV; +template +bool Converters::replaceWithXpetra(ParameterList& paramList, std::string parameterName) { + typedef double Scalar; + typedef int LocalOrdinal; + typedef Tpetra::KokkosCompat::KokkosSerialWrapperNode Node; + typedef typename Teuchos::ScalarTraits::magnitudeType Magnitude; + typedef Xpetra::Operator XpOp; + typedef Xpetra::ThyraUtils XpThyUtils; + typedef Xpetra::CrsMatrixWrap XpCrsMatWrap; + typedef Xpetra::CrsMatrix XpCrsMat; + typedef Xpetra::Matrix XpMat; + typedef Xpetra::MultiVector XpMultVec; + typedef Xpetra::MultiVector XpMagMultVec; + typedef Xpetra::Vector XpVec; + + typedef Thyra::LinearOpBase ThyLinOpBase; + typedef Thyra::DiagonalLinearOpBase ThyDiagLinOpBase; + typedef Thyra::SpmdVectorSpaceBase ThyVSBase; + + typedef Tpetra::CrsMatrix TpCrsMat; + typedef Tpetra::Operator tOp; + typedef Tpetra::Vector tV; + typedef Thyra::TpetraVector thyTpV; + typedef Tpetra::MultiVector tMV; + typedef Tpetra::MultiVector tMagMV; #if defined(MUELU_CAN_USE_MIXED_PRECISION) - typedef typename Teuchos::ScalarTraits::halfPrecision HalfMagnitude; - typedef Tpetra::MultiVector tHalfMagMV; + typedef typename Teuchos::ScalarTraits::halfPrecision HalfMagnitude; + typedef Tpetra::MultiVector tHalfMagMV; #endif #if defined(HAVE_MUELU_EPETRA) - typedef Xpetra::EpetraCrsMatrixT XpEpCrsMat; + typedef Xpetra::EpetraCrsMatrixT XpEpCrsMat; #endif - if (paramList.isParameter(parameterName)) { - if (paramList.isType >(parameterName)) - return true; - else if (paramList.isType >(parameterName)) { - RCP constM = paramList.get >(parameterName); - paramList.remove(parameterName); - RCP M = rcp_const_cast(constM); - paramList.set >(parameterName, M); - return true; - } - else if (paramList.isType >(parameterName)) - return true; - else if (paramList.isType >(parameterName)) { - RCP constX = paramList.get >(parameterName); - paramList.remove(parameterName); - RCP X = rcp_const_cast(constX); - paramList.set >(parameterName, X); - return true; - } - else if (paramList.isType >(parameterName)) - return true; - else if (paramList.isType >(parameterName)) { - RCP constX = paramList.get >(parameterName); - paramList.remove(parameterName); - RCP X = rcp_const_cast(constX); - paramList.set >(parameterName, X); - return true; - } - else if (paramList.isType >(parameterName)) { - RCP tM = paramList.get >(parameterName); - paramList.remove(parameterName); - RCP xM = MueLu::TpetraCrs_To_XpetraMatrix(tM); - paramList.set >(parameterName, xM); - return true; - } else if (paramList.isType >(parameterName)) { - RCP tpetra_X = paramList.get >(parameterName); - paramList.remove(parameterName); - RCP X = MueLu::TpetraMultiVector_To_XpetraMultiVector(tpetra_X); - paramList.set >(parameterName, X); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(X)); - return true; - } else if (paramList.isType >(parameterName)) { - RCP tpetra_X = paramList.get >(parameterName); - paramList.remove(parameterName); - RCP X = MueLu::TpetraMultiVector_To_XpetraMultiVector(tpetra_X); - paramList.set >(parameterName, X); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(X)); - return true; - } + if (paramList.isParameter(parameterName)) { + if (paramList.isType >(parameterName)) + return true; + else if (paramList.isType >(parameterName)) { + RCP constM = paramList.get >(parameterName); + paramList.remove(parameterName); + RCP M = rcp_const_cast(constM); + paramList.set >(parameterName, M); + return true; + } else if (paramList.isType >(parameterName)) + return true; + else if (paramList.isType >(parameterName)) { + RCP constX = paramList.get >(parameterName); + paramList.remove(parameterName); + RCP X = rcp_const_cast(constX); + paramList.set >(parameterName, X); + return true; + } else if (paramList.isType >(parameterName)) + return true; + else if (paramList.isType >(parameterName)) { + RCP constX = paramList.get >(parameterName); + paramList.remove(parameterName); + RCP X = rcp_const_cast(constX); + paramList.set >(parameterName, X); + return true; + } else if (paramList.isType >(parameterName)) { + RCP tM = paramList.get >(parameterName); + paramList.remove(parameterName); + RCP xM = MueLu::TpetraCrs_To_XpetraMatrix(tM); + paramList.set >(parameterName, xM); + return true; + } else if (paramList.isType >(parameterName)) { + RCP tpetra_X = paramList.get >(parameterName); + paramList.remove(parameterName); + RCP X = MueLu::TpetraMultiVector_To_XpetraMultiVector(tpetra_X); + paramList.set >(parameterName, X); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(X)); + return true; + } else if (paramList.isType >(parameterName)) { + RCP tpetra_X = paramList.get >(parameterName); + paramList.remove(parameterName); + RCP X = MueLu::TpetraMultiVector_To_XpetraMultiVector(tpetra_X); + paramList.set >(parameterName, X); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(X)); + return true; + } #if defined(MUELU_CAN_USE_MIXED_PRECISION) - else if (paramList.isType >(parameterName)) { - RCP tpetra_hX = paramList.get >(parameterName); - paramList.remove(parameterName); - RCP tpetra_X = rcp(new tMagMV(tpetra_hX->getMap(),tpetra_hX->getNumVectors())); - Tpetra::deep_copy(*tpetra_X,*tpetra_hX); - RCP X = MueLu::TpetraMultiVector_To_XpetraMultiVector(tpetra_X); - paramList.set >(parameterName, X); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(X)); - return true; - } + else if (paramList.isType >(parameterName)) { + RCP tpetra_hX = paramList.get >(parameterName); + paramList.remove(parameterName); + RCP tpetra_X = rcp(new tMagMV(tpetra_hX->getMap(), tpetra_hX->getNumVectors())); + Tpetra::deep_copy(*tpetra_X, *tpetra_hX); + RCP X = MueLu::TpetraMultiVector_To_XpetraMultiVector(tpetra_X); + paramList.set >(parameterName, X); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(X)); + return true; + } #endif #ifdef HAVE_MUELU_EPETRA - else if (paramList.isType >(parameterName)) { - RCP eM = paramList.get >(parameterName); - paramList.remove(parameterName); - RCP xeM = rcp(new XpEpCrsMat(eM)); - RCP xCrsM = rcp_dynamic_cast(xeM, true); - RCP xwM = rcp(new XpCrsMatWrap(xCrsM)); - RCP xM = rcp_dynamic_cast(xwM); - paramList.set >(parameterName, xM); - return true; - } else if (paramList.isType >(parameterName)) { - RCP epetra_X = Teuchos::null; - epetra_X = paramList.get >(parameterName); - paramList.remove(parameterName); - RCP > xpEpX = rcp(new Xpetra::EpetraMultiVectorT(epetra_X)); - RCP > xpEpXMult = rcp_dynamic_cast >(xpEpX, true); - RCP X = rcp_dynamic_cast(xpEpXMult, true); - paramList.set >(parameterName, X); - return true; - } + else if (paramList.isType >(parameterName)) { + RCP eM = paramList.get >(parameterName); + paramList.remove(parameterName); + RCP xeM = rcp(new XpEpCrsMat(eM)); + RCP xCrsM = rcp_dynamic_cast(xeM, true); + RCP xwM = rcp(new XpCrsMatWrap(xCrsM)); + RCP xM = rcp_dynamic_cast(xwM); + paramList.set >(parameterName, xM); + return true; + } else if (paramList.isType >(parameterName)) { + RCP epetra_X = Teuchos::null; + epetra_X = paramList.get >(parameterName); + paramList.remove(parameterName); + RCP > xpEpX = rcp(new Xpetra::EpetraMultiVectorT(epetra_X)); + RCP > xpEpXMult = rcp_dynamic_cast >(xpEpX, true); + RCP X = rcp_dynamic_cast(xpEpXMult, true); + paramList.set >(parameterName, X); + return true; + } #endif - else if (paramList.isType >(parameterName) || - (paramList.isType >(parameterName) && ! - rcp_dynamic_cast(paramList.get >(parameterName)).is_null())) { - RCP thyM; - if (paramList.isType >(parameterName)) - thyM = paramList.get >(parameterName); - else - thyM = rcp_dynamic_cast(paramList.get >(parameterName), true); - paramList.remove(parameterName); - RCP > diag = thyM->getDiag(); - - RCP xpDiag; - if (!rcp_dynamic_cast(diag).is_null()) { - RCP tDiag = Thyra::TpetraOperatorVectorExtraction::getConstTpetraVector(diag); - if (!tDiag.is_null()) - xpDiag = Xpetra::toXpetra(tDiag); - } + else if (paramList.isType >(parameterName) || + (paramList.isType >(parameterName) && !rcp_dynamic_cast(paramList.get >(parameterName)).is_null())) { + RCP thyM; + if (paramList.isType >(parameterName)) + thyM = paramList.get >(parameterName); + else + thyM = rcp_dynamic_cast(paramList.get >(parameterName), true); + paramList.remove(parameterName); + RCP > diag = thyM->getDiag(); + + RCP xpDiag; + if (!rcp_dynamic_cast(diag).is_null()) { + RCP tDiag = Thyra::TpetraOperatorVectorExtraction::getConstTpetraVector(diag); + if (!tDiag.is_null()) + xpDiag = Xpetra::toXpetra(tDiag); + } #ifdef HAVE_MUELU_EPETRA - if (xpDiag.is_null()) { - RCP comm = Thyra::get_Epetra_Comm(*rcp_dynamic_cast(thyM->range())->getComm()); - RCP map = Thyra::get_Epetra_Map(*(thyM->range()), comm); - if (!map.is_null()) { - RCP eDiag = Thyra::get_Epetra_Vector(*map, diag); - RCP nceDiag = rcp_const_cast(eDiag); - RCP > xpEpDiag = rcp(new Xpetra::EpetraVectorT(nceDiag)); - xpDiag = rcp_dynamic_cast(xpEpDiag, true); - } + if (xpDiag.is_null()) { + RCP comm = Thyra::get_Epetra_Comm(*rcp_dynamic_cast(thyM->range())->getComm()); + RCP map = Thyra::get_Epetra_Map(*(thyM->range()), comm); + if (!map.is_null()) { + RCP eDiag = Thyra::get_Epetra_Vector(*map, diag); + RCP nceDiag = rcp_const_cast(eDiag); + RCP > xpEpDiag = rcp(new Xpetra::EpetraVectorT(nceDiag)); + xpDiag = rcp_dynamic_cast(xpEpDiag, true); } + } #endif - TEUCHOS_ASSERT(!xpDiag.is_null()); - RCP M = Xpetra::MatrixFactory::Build(xpDiag); + TEUCHOS_ASSERT(!xpDiag.is_null()); + RCP M = Xpetra::MatrixFactory::Build(xpDiag); + paramList.set >(parameterName, M); + return true; + } else if (paramList.isType >(parameterName)) { + RCP thyM = paramList.get >(parameterName); + paramList.remove(parameterName); + try { + RCP M = XpThyUtils::toXpetra(Teuchos::rcp_const_cast(thyM)); paramList.set >(parameterName, M); - return true; - } - else if (paramList.isType >(parameterName)) { - RCP thyM = paramList.get >(parameterName); - paramList.remove(parameterName); - try { - RCP M = XpThyUtils::toXpetra(Teuchos::rcp_const_cast(thyM)); - paramList.set >(parameterName, M); - } catch (std::exception& e) { - RCP M = XpThyUtils::toXpetraOperator(Teuchos::rcp_const_cast(thyM)); - RCP > tpOp = rcp_dynamic_cast >(M, true); - RCP tO = tpOp->getOperator(); - RCP diag; - if (tO->hasDiagonal()) { - diag = rcp(new tV(tO->getRangeMap())); - tO->getLocalDiagCopy(*diag); - } - auto fTpRow = rcp(new MueLu::TpetraOperatorAsRowMatrix(tO, diag)); - RCP > tpFOp = rcp(new Xpetra::TpetraOperator (fTpRow)); - auto op = rcp_dynamic_cast(tpFOp); - paramList.set >(parameterName, op); + } catch (std::exception& e) { + RCP M = XpThyUtils::toXpetraOperator(Teuchos::rcp_const_cast(thyM)); + RCP > tpOp = rcp_dynamic_cast >(M, true); + RCP tO = tpOp->getOperator(); + RCP diag; + if (tO->hasDiagonal()) { + diag = rcp(new tV(tO->getRangeMap())); + tO->getLocalDiagCopy(*diag); } - return true; - } - else { - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "Parameter " << parameterName << " has wrong type."); - return false; + auto fTpRow = rcp(new MueLu::TpetraOperatorAsRowMatrix(tO, diag)); + RCP > tpFOp = rcp(new Xpetra::TpetraOperator(fTpRow)); + auto op = rcp_dynamic_cast(tpFOp); + paramList.set >(parameterName, op); } - } else + return true; + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "Parameter " << parameterName << " has wrong type."); return false; - } + } + } else + return false; +} #endif - // Constructors/initializers/accessors +// Constructors/initializers/accessors - template - MueLuPreconditionerFactory::MueLuPreconditionerFactory() : - paramList_(rcp(new ParameterList())) - {} +template +MueLuPreconditionerFactory::MueLuPreconditionerFactory() + : paramList_(rcp(new ParameterList())) {} - // Overridden from PreconditionerFactoryBase +// Overridden from PreconditionerFactoryBase - template - bool MueLuPreconditionerFactory::isCompatible(const LinearOpSourceBase& fwdOpSrc) const { - const RCP > fwdOp = fwdOpSrc.getOp(); +template +bool MueLuPreconditionerFactory::isCompatible(const LinearOpSourceBase& fwdOpSrc) const { + const RCP > fwdOp = fwdOpSrc.getOp(); - if (Xpetra::ThyraUtils::isTpetra(fwdOp)) return true; + if (Xpetra::ThyraUtils::isTpetra(fwdOp)) return true; #ifdef HAVE_MUELU_EPETRA - if (Xpetra::ThyraUtils::isEpetra(fwdOp)) return true; + if (Xpetra::ThyraUtils::isEpetra(fwdOp)) return true; #endif - if (Xpetra::ThyraUtils::isBlockedOperator(fwdOp)) return true; + if (Xpetra::ThyraUtils::isBlockedOperator(fwdOp)) return true; - return false; - } + return false; +} +template +RCP > MueLuPreconditionerFactory::createPrec() const { + return Teuchos::rcp(new DefaultPreconditioner); +} - template - RCP > MueLuPreconditionerFactory::createPrec() const { - return Teuchos::rcp(new DefaultPreconditioner); - } +template +void MueLuPreconditionerFactory:: + initializePrec(const RCP >& fwdOpSrc, PreconditionerBase* prec, const ESupportSolveUse supportSolveUse) const { + Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("ThyraMueLu::initializePrec"))); + + using Teuchos::rcp_dynamic_cast; - template - void MueLuPreconditionerFactory:: - initializePrec(const RCP >& fwdOpSrc, PreconditionerBase* prec, const ESupportSolveUse supportSolveUse) const { - Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("ThyraMueLu::initializePrec"))); - - using Teuchos::rcp_dynamic_cast; - - // we are using typedefs here, since we are using objects from different packages (Xpetra, Thyra,...) - typedef Xpetra::Map XpMap; - typedef Xpetra::Operator XpOp; - typedef MueLu::XpetraOperator MueLuXpOp; - typedef Xpetra::ThyraUtils XpThyUtils; - // typedef Xpetra::CrsMatrix XpCrsMat; - typedef Xpetra::BlockedCrsMatrix XpBlockedCrsMat; - typedef Xpetra::Matrix XpMat; - // typedef Xpetra::MultiVector XpMultVec; - // typedef Xpetra::MultiVector::coordinateType,LocalOrdinal,GlobalOrdinal,Node> XpMultVecDouble; - typedef Thyra::LinearOpBase ThyLinOpBase; - typedef Thyra::XpetraLinearOp ThyXpOp; - typedef Xpetra::MultiVector XpMV; - typedef typename Teuchos::ScalarTraits::magnitudeType Magnitude; - typedef Xpetra::MultiVector XpmMV; + // we are using typedefs here, since we are using objects from different packages (Xpetra, Thyra,...) + typedef Xpetra::Map XpMap; + typedef Xpetra::Operator XpOp; + typedef MueLu::XpetraOperator MueLuXpOp; + typedef Xpetra::ThyraUtils XpThyUtils; + // typedef Xpetra::CrsMatrix XpCrsMat; + typedef Xpetra::BlockedCrsMatrix XpBlockedCrsMat; + typedef Xpetra::Matrix XpMat; + // typedef Xpetra::MultiVector XpMultVec; + // typedef Xpetra::MultiVector::coordinateType,LocalOrdinal,GlobalOrdinal,Node> XpMultVecDouble; + typedef Thyra::LinearOpBase ThyLinOpBase; + typedef Thyra::XpetraLinearOp ThyXpOp; + typedef Xpetra::MultiVector XpMV; + typedef typename Teuchos::ScalarTraits::magnitudeType Magnitude; + typedef Xpetra::MultiVector XpmMV; #if defined(MUELU_CAN_USE_MIXED_PRECISION) - typedef Xpetra::TpetraHalfPrecisionOperator XpHalfPrecOp; - typedef typename XpHalfPrecOp::HalfScalar HalfScalar; - typedef Xpetra::Operator XpHalfOp; - typedef MueLu::XpetraOperator MueLuHalfXpOp; - typedef typename Teuchos::ScalarTraits::halfPrecision HalfMagnitude; - typedef Xpetra::MultiVector XphMV; - typedef Xpetra::MultiVector XphmMV; - typedef Xpetra::Matrix XphMat; + typedef Xpetra::TpetraHalfPrecisionOperator XpHalfPrecOp; + typedef typename XpHalfPrecOp::HalfScalar HalfScalar; + typedef Xpetra::Operator XpHalfOp; + typedef MueLu::XpetraOperator MueLuHalfXpOp; + typedef typename Teuchos::ScalarTraits::halfPrecision HalfMagnitude; + typedef Xpetra::MultiVector XphMV; + typedef Xpetra::MultiVector XphmMV; + typedef Xpetra::Matrix XphMat; #endif - - // Check precondition - TEUCHOS_ASSERT(Teuchos::nonnull(fwdOpSrc)); - TEUCHOS_ASSERT(this->isCompatible(*fwdOpSrc)); - TEUCHOS_ASSERT(prec); - - // Create a copy, as we may remove some things from the list - ParameterList paramList = *paramList_; - - // Retrieve wrapped concrete Xpetra matrix from FwdOp - const RCP fwdOp = fwdOpSrc->getOp(); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(fwdOp)); - - // Check whether it is Epetra/Tpetra - bool bIsEpetra = XpThyUtils::isEpetra(fwdOp); - bool bIsTpetra = XpThyUtils::isTpetra(fwdOp); - bool bIsBlocked = XpThyUtils::isBlockedOperator(fwdOp); - TEUCHOS_TEST_FOR_EXCEPT((bIsEpetra == true && bIsTpetra == true)); - TEUCHOS_TEST_FOR_EXCEPT((bIsEpetra == bIsTpetra) && bIsBlocked == false); - TEUCHOS_TEST_FOR_EXCEPT((bIsEpetra != bIsTpetra) && bIsBlocked == true); - - RCP A = Teuchos::null; - if(bIsBlocked) { - Teuchos::RCP > ThyBlockedOp = - Teuchos::rcp_dynamic_cast >(fwdOp); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(ThyBlockedOp)); - - TEUCHOS_TEST_FOR_EXCEPT(ThyBlockedOp->blockExists(0,0)==false); - - Teuchos::RCP > b00 = ThyBlockedOp->getBlock(0,0); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(b00)); - - // wrap the forward operator as an Xpetra::Matrix that MueLu can work with - // MueLu needs a non-const object as input - RCP A00 = XpThyUtils::toXpetra(Teuchos::rcp_const_cast >(b00)); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(A00)); - - RCP rowmap00 = A00->getRowMap(); - RCP< const Teuchos::Comm< int > > comm = rowmap00->getComm(); - - // create a Xpetra::BlockedCrsMatrix which derives from Xpetra::Matrix that MueLu can work with - RCP bMat = Teuchos::rcp(new XpBlockedCrsMat(ThyBlockedOp, comm)); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(bMat)); - - // save blocked matrix - A = bMat; - } else { - // wrap the forward operator as an Xpetra::Matrix that MueLu can work with - // MueLu needs a non-const object as input - A = XpThyUtils::toXpetra(Teuchos::rcp_const_cast(fwdOp)); - } - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(A)); - - // Retrieve concrete preconditioner object - const Teuchos::Ptr > defaultPrec = Teuchos::ptr(dynamic_cast *>(prec)); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(defaultPrec)); - - // extract preconditioner operator - RCP thyra_precOp = Teuchos::null; - thyra_precOp = rcp_dynamic_cast >(defaultPrec->getNonconstUnspecifiedPrecOp(), true); - - // make a decision whether to (re)build the multigrid preconditioner or reuse the old one - // rebuild preconditioner if startingOver == true - // reuse preconditioner if startingOver == false - const bool startingOver = (thyra_precOp.is_null() || !paramList.isParameter("reuse: type") || paramList.get("reuse: type") == "none"); - bool useHalfPrecision = false; - if (paramList.isParameter("half precision")) - useHalfPrecision = paramList.get("half precision"); - else if (paramList.isSublist("Hierarchy") && paramList.sublist("Hierarchy").isParameter("half precision")) - useHalfPrecision = paramList.sublist("Hierarchy").get("half precision"); - if (useHalfPrecision) - TEUCHOS_TEST_FOR_EXCEPTION(!bIsTpetra, MueLu::Exceptions::RuntimeError, "The only scalar type Epetra knows is double, so a half precision preconditioner cannot be constructed."); - - RCP xpPrecOp; - if (startingOver == true) { - // Convert to Xpetra - std::list convertXpetra = {"Coordinates", "Nullspace"}; - for (auto it = convertXpetra.begin(); it != convertXpetra.end(); ++it) - Converters::replaceWithXpetra(paramList,*it); - - for (int lvlNo=0; lvlNo < 10; ++lvlNo) { - if (paramList.isSublist("level " + std::to_string(lvlNo) + " user data")) { - ParameterList& lvlList = paramList.sublist("level " + std::to_string(lvlNo) + " user data"); - std::list convertKeys; - for (auto it = lvlList.begin(); it != lvlList.end(); ++it) - convertKeys.push_back(lvlList.name(it)); - for (auto it = convertKeys.begin(); it != convertKeys.end(); ++it) - Converters::replaceWithXpetra(lvlList,*it); - } + // Check precondition + TEUCHOS_ASSERT(Teuchos::nonnull(fwdOpSrc)); + TEUCHOS_ASSERT(this->isCompatible(*fwdOpSrc)); + TEUCHOS_ASSERT(prec); + + // Create a copy, as we may remove some things from the list + ParameterList paramList = *paramList_; + + // Retrieve wrapped concrete Xpetra matrix from FwdOp + const RCP fwdOp = fwdOpSrc->getOp(); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(fwdOp)); + + // Check whether it is Epetra/Tpetra + bool bIsEpetra = XpThyUtils::isEpetra(fwdOp); + bool bIsTpetra = XpThyUtils::isTpetra(fwdOp); + bool bIsBlocked = XpThyUtils::isBlockedOperator(fwdOp); + TEUCHOS_TEST_FOR_EXCEPT((bIsEpetra == true && bIsTpetra == true)); + TEUCHOS_TEST_FOR_EXCEPT((bIsEpetra == bIsTpetra) && bIsBlocked == false); + TEUCHOS_TEST_FOR_EXCEPT((bIsEpetra != bIsTpetra) && bIsBlocked == true); + + RCP A = Teuchos::null; + if (bIsBlocked) { + Teuchos::RCP > ThyBlockedOp = + Teuchos::rcp_dynamic_cast >(fwdOp); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(ThyBlockedOp)); + + TEUCHOS_TEST_FOR_EXCEPT(ThyBlockedOp->blockExists(0, 0) == false); + + Teuchos::RCP > b00 = ThyBlockedOp->getBlock(0, 0); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(b00)); + + // wrap the forward operator as an Xpetra::Matrix that MueLu can work with + // MueLu needs a non-const object as input + RCP A00 = XpThyUtils::toXpetra(Teuchos::rcp_const_cast >(b00)); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(A00)); + + RCP rowmap00 = A00->getRowMap(); + RCP > comm = rowmap00->getComm(); + + // create a Xpetra::BlockedCrsMatrix which derives from Xpetra::Matrix that MueLu can work with + RCP bMat = Teuchos::rcp(new XpBlockedCrsMat(ThyBlockedOp, comm)); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(bMat)); + + // save blocked matrix + A = bMat; + } else { + // wrap the forward operator as an Xpetra::Matrix that MueLu can work with + // MueLu needs a non-const object as input + A = XpThyUtils::toXpetra(Teuchos::rcp_const_cast(fwdOp)); + } + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(A)); + + // Retrieve concrete preconditioner object + const Teuchos::Ptr > defaultPrec = Teuchos::ptr(dynamic_cast*>(prec)); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(defaultPrec)); + + // extract preconditioner operator + RCP thyra_precOp = Teuchos::null; + thyra_precOp = rcp_dynamic_cast >(defaultPrec->getNonconstUnspecifiedPrecOp(), true); + + // make a decision whether to (re)build the multigrid preconditioner or reuse the old one + // rebuild preconditioner if startingOver == true + // reuse preconditioner if startingOver == false + const bool startingOver = (thyra_precOp.is_null() || !paramList.isParameter("reuse: type") || paramList.get("reuse: type") == "none"); + bool useHalfPrecision = false; + if (paramList.isParameter("half precision")) + useHalfPrecision = paramList.get("half precision"); + else if (paramList.isSublist("Hierarchy") && paramList.sublist("Hierarchy").isParameter("half precision")) + useHalfPrecision = paramList.sublist("Hierarchy").get("half precision"); + if (useHalfPrecision) + TEUCHOS_TEST_FOR_EXCEPTION(!bIsTpetra, MueLu::Exceptions::RuntimeError, "The only scalar type Epetra knows is double, so a half precision preconditioner cannot be constructed."); + + RCP xpPrecOp; + if (startingOver == true) { + // Convert to Xpetra + std::list convertXpetra = {"Coordinates", "Nullspace"}; + for (auto it = convertXpetra.begin(); it != convertXpetra.end(); ++it) + Converters::replaceWithXpetra(paramList, *it); + + for (int lvlNo = 0; lvlNo < 10; ++lvlNo) { + if (paramList.isSublist("level " + std::to_string(lvlNo) + " user data")) { + ParameterList& lvlList = paramList.sublist("level " + std::to_string(lvlNo) + " user data"); + std::list convertKeys; + for (auto it = lvlList.begin(); it != lvlList.end(); ++it) + convertKeys.push_back(lvlList.name(it)); + for (auto it = convertKeys.begin(); it != convertKeys.end(); ++it) + Converters::replaceWithXpetra(lvlList, *it); } + } - if (useHalfPrecision) { + if (useHalfPrecision) { #if defined(MUELU_CAN_USE_MIXED_PRECISION) - // CAG: There is nothing special about the combination double-float, - // except that I feel somewhat confident that Trilinos builds - // with both scalar types. - - // convert to half precision - RCP halfA = Xpetra::convertToHalfPrecision(A); - const std::string userName = "user data"; - Teuchos::ParameterList& userParamList = paramList.sublist(userName); - if (userParamList.isType >("Coordinates")) { - RCP coords = userParamList.get >("Coordinates"); - userParamList.remove("Coordinates"); - RCP halfCoords = Xpetra::convertToHalfPrecision(coords); - userParamList.set("Coordinates",halfCoords); - } - if (userParamList.isType >("Nullspace")) { - RCP nullspace = userParamList.get >("Nullspace"); - userParamList.remove("Nullspace"); - RCP halfNullspace = Xpetra::convertToHalfPrecision(nullspace); - userParamList.set("Nullspace",halfNullspace); - } - if (paramList.isType >("Coordinates")) { - RCP coords = paramList.get >("Coordinates"); - paramList.remove("Coordinates"); - RCP halfCoords = Xpetra::convertToHalfPrecision(coords); - userParamList.set("Coordinates",halfCoords); - } - if (paramList.isType >("Nullspace")) { - RCP nullspace = paramList.get >("Nullspace"); - paramList.remove("Nullspace"); - RCP halfNullspace = Xpetra::convertToHalfPrecision(nullspace); - userParamList.set("Nullspace",halfNullspace); - } - + // CAG: There is nothing special about the combination double-float, + // except that I feel somewhat confident that Trilinos builds + // with both scalar types. + + // convert to half precision + RCP halfA = Xpetra::convertToHalfPrecision(A); + const std::string userName = "user data"; + Teuchos::ParameterList& userParamList = paramList.sublist(userName); + if (userParamList.isType >("Coordinates")) { + RCP coords = userParamList.get >("Coordinates"); + userParamList.remove("Coordinates"); + RCP halfCoords = Xpetra::convertToHalfPrecision(coords); + userParamList.set("Coordinates", halfCoords); + } + if (userParamList.isType >("Nullspace")) { + RCP nullspace = userParamList.get >("Nullspace"); + userParamList.remove("Nullspace"); + RCP halfNullspace = Xpetra::convertToHalfPrecision(nullspace); + userParamList.set("Nullspace", halfNullspace); + } + if (paramList.isType >("Coordinates")) { + RCP coords = paramList.get >("Coordinates"); + paramList.remove("Coordinates"); + RCP halfCoords = Xpetra::convertToHalfPrecision(coords); + userParamList.set("Coordinates", halfCoords); + } + if (paramList.isType >("Nullspace")) { + RCP nullspace = paramList.get >("Nullspace"); + paramList.remove("Nullspace"); + RCP halfNullspace = Xpetra::convertToHalfPrecision(nullspace); + userParamList.set("Nullspace", halfNullspace); + } - // build a new half-precision MueLu preconditioner + // build a new half-precision MueLu preconditioner - RCP > H = MueLu::CreateXpetraPreconditioner(halfA, paramList); - RCP xpOp = rcp(new MueLuHalfXpOp(H)); - xpPrecOp = rcp(new XpHalfPrecOp(xpOp)); + RCP > H = MueLu::CreateXpetraPreconditioner(halfA, paramList); + RCP xpOp = rcp(new MueLuHalfXpOp(H)); + xpPrecOp = rcp(new XpHalfPrecOp(xpOp)); #else - TEUCHOS_TEST_FOR_EXCEPT(true); + TEUCHOS_TEST_FOR_EXCEPT(true); #endif - } else - { - const std::string userName = "user data"; - Teuchos::ParameterList& userParamList = paramList.sublist(userName); - if (paramList.isType >("Coordinates")) { - RCP coords = paramList.get >("Coordinates"); - paramList.remove("Coordinates"); - userParamList.set("Coordinates",coords); - } - if (paramList.isType >("Nullspace")) { - RCP nullspace = paramList.get >("Nullspace"); - paramList.remove("Nullspace"); - userParamList.set("Nullspace",nullspace); - } - - // build a new MueLu RefMaxwell preconditioner - RCP > H = MueLu::CreateXpetraPreconditioner(A, paramList); - xpPrecOp = rcp(new MueLuXpOp(H)); - } } else { - // reuse old MueLu hierarchy stored in MueLu Xpetra operator and put in new matrix - RCP thyXpOp = rcp_dynamic_cast(thyra_precOp, true); - xpPrecOp = rcp_dynamic_cast(thyXpOp->getXpetraOperator(), true); + const std::string userName = "user data"; + Teuchos::ParameterList& userParamList = paramList.sublist(userName); + if (paramList.isType >("Coordinates")) { + RCP coords = paramList.get >("Coordinates"); + paramList.remove("Coordinates"); + userParamList.set("Coordinates", coords); + } + if (paramList.isType >("Nullspace")) { + RCP nullspace = paramList.get >("Nullspace"); + paramList.remove("Nullspace"); + userParamList.set("Nullspace", nullspace); + } + + // build a new MueLu RefMaxwell preconditioner + RCP > H = MueLu::CreateXpetraPreconditioner(A, paramList); + xpPrecOp = rcp(new MueLuXpOp(H)); + } + } else { + // reuse old MueLu hierarchy stored in MueLu Xpetra operator and put in new matrix + RCP thyXpOp = rcp_dynamic_cast(thyra_precOp, true); + xpPrecOp = rcp_dynamic_cast(thyXpOp->getXpetraOperator(), true); #if defined(MUELU_CAN_USE_MIXED_PRECISION) - RCP xpHalfPrecOp = rcp_dynamic_cast(xpPrecOp); - if (!xpHalfPrecOp.is_null()) { - RCP > H = rcp_dynamic_cast(xpHalfPrecOp->GetHalfPrecisionOperator(), true)->GetHierarchy(); - RCP halfA = Xpetra::convertToHalfPrecision(A); - - TEUCHOS_TEST_FOR_EXCEPTION(!H->GetNumLevels(), MueLu::Exceptions::RuntimeError, - "Thyra::MueLuPreconditionerFactory: Hierarchy has no levels in it"); - TEUCHOS_TEST_FOR_EXCEPTION(!H->GetLevel(0)->IsAvailable("A"), MueLu::Exceptions::RuntimeError, - "Thyra::MueLuPreconditionerFactory: Hierarchy has no fine level operator"); - RCP level0 = H->GetLevel(0); - RCP O0 = level0->Get >("A"); - RCP A0 = rcp_dynamic_cast(O0, true); - - if (!A0.is_null()) { - // If a user provided a "number of equations" argument in a parameter list - // during the initial setup, we must honor that settings and reuse it for - // all consequent setups. - halfA->SetFixedBlockSize(A0->GetFixedBlockSize()); - } + RCP xpHalfPrecOp = rcp_dynamic_cast(xpPrecOp); + if (!xpHalfPrecOp.is_null()) { + RCP > H = rcp_dynamic_cast(xpHalfPrecOp->GetHalfPrecisionOperator(), true)->GetHierarchy(); + RCP halfA = Xpetra::convertToHalfPrecision(A); + + TEUCHOS_TEST_FOR_EXCEPTION(!H->GetNumLevels(), MueLu::Exceptions::RuntimeError, + "Thyra::MueLuPreconditionerFactory: Hierarchy has no levels in it"); + TEUCHOS_TEST_FOR_EXCEPTION(!H->GetLevel(0)->IsAvailable("A"), MueLu::Exceptions::RuntimeError, + "Thyra::MueLuPreconditionerFactory: Hierarchy has no fine level operator"); + RCP level0 = H->GetLevel(0); + RCP O0 = level0->Get >("A"); + RCP A0 = rcp_dynamic_cast(O0, true); + + if (!A0.is_null()) { + // If a user provided a "number of equations" argument in a parameter list + // during the initial setup, we must honor that settings and reuse it for + // all consequent setups. + halfA->SetFixedBlockSize(A0->GetFixedBlockSize()); + } - // set new matrix - level0->Set("A", halfA); + // set new matrix + level0->Set("A", halfA); - H->SetupRe(); - } else + H->SetupRe(); + } else #endif - { - // get old MueLu hierarchy - RCP xpOp = rcp_dynamic_cast(thyXpOp->getXpetraOperator(), true); - RCP > H = xpOp->GetHierarchy();; - - TEUCHOS_TEST_FOR_EXCEPTION(!H->GetNumLevels(), MueLu::Exceptions::RuntimeError, - "Thyra::MueLuPreconditionerFactory: Hierarchy has no levels in it"); - TEUCHOS_TEST_FOR_EXCEPTION(!H->GetLevel(0)->IsAvailable("A"), MueLu::Exceptions::RuntimeError, - "Thyra::MueLuPreconditionerFactory: Hierarchy has no fine level operator"); - RCP level0 = H->GetLevel(0); - RCP O0 = level0->Get >("A"); - RCP A0 = rcp_dynamic_cast(O0); - - if (!A0.is_null()) { - // If a user provided a "number of equations" argument in a parameter list - // during the initial setup, we must honor that settings and reuse it for - // all consequent setups. - A->SetFixedBlockSize(A0->GetFixedBlockSize()); - } - - // set new matrix - level0->Set("A", A); - - H->SetupRe(); + { + // get old MueLu hierarchy + RCP xpOp = rcp_dynamic_cast(thyXpOp->getXpetraOperator(), true); + RCP > H = xpOp->GetHierarchy(); + ; + + TEUCHOS_TEST_FOR_EXCEPTION(!H->GetNumLevels(), MueLu::Exceptions::RuntimeError, + "Thyra::MueLuPreconditionerFactory: Hierarchy has no levels in it"); + TEUCHOS_TEST_FOR_EXCEPTION(!H->GetLevel(0)->IsAvailable("A"), MueLu::Exceptions::RuntimeError, + "Thyra::MueLuPreconditionerFactory: Hierarchy has no fine level operator"); + RCP level0 = H->GetLevel(0); + RCP O0 = level0->Get >("A"); + RCP A0 = rcp_dynamic_cast(O0); + + if (!A0.is_null()) { + // If a user provided a "number of equations" argument in a parameter list + // during the initial setup, we must honor that settings and reuse it for + // all consequent setups. + A->SetFixedBlockSize(A0->GetFixedBlockSize()); } - } - - // wrap preconditioner in thyraPrecOp - RCP > thyraRangeSpace = Xpetra::ThyraUtils::toThyra(xpPrecOp->getRangeMap()); - RCP > thyraDomainSpace = Xpetra::ThyraUtils::toThyra(xpPrecOp->getDomainMap()); - - RCP thyraPrecOp = Thyra::xpetraLinearOp(thyraRangeSpace, thyraDomainSpace, xpPrecOp); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(thyraPrecOp)); - - defaultPrec->initializeUnspecified(thyraPrecOp); - - } - - template - void MueLuPreconditionerFactory:: - uninitializePrec(PreconditionerBase* prec, RCP >* fwdOp, ESupportSolveUse* supportSolveUse) const { - TEUCHOS_ASSERT(prec); - - // Retrieve concrete preconditioner object - const Teuchos::Ptr > defaultPrec = Teuchos::ptr(dynamic_cast *>(prec)); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(defaultPrec)); - if (fwdOp) { - // TODO: Implement properly instead of returning default value - *fwdOp = Teuchos::null; - } + // set new matrix + level0->Set("A", A); - if (supportSolveUse) { - // TODO: Implement properly instead of returning default value - *supportSolveUse = Thyra::SUPPORT_SOLVE_UNSPECIFIED; + H->SetupRe(); } - - defaultPrec->uninitialize(); - } - - - // Overridden from ParameterListAcceptor - template - void MueLuPreconditionerFactory::setParameterList(RCP const& paramList) { - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(paramList)); - paramList_ = paramList; } - template - RCP MueLuPreconditionerFactory::getNonconstParameterList() { - return paramList_; - } + // wrap preconditioner in thyraPrecOp + RCP > thyraRangeSpace = Xpetra::ThyraUtils::toThyra(xpPrecOp->getRangeMap()); + RCP > thyraDomainSpace = Xpetra::ThyraUtils::toThyra(xpPrecOp->getDomainMap()); - template - RCP MueLuPreconditionerFactory::unsetParameterList() { - RCP savedParamList = paramList_; - paramList_ = Teuchos::null; - return savedParamList; - } + RCP thyraPrecOp = Thyra::xpetraLinearOp(thyraRangeSpace, thyraDomainSpace, xpPrecOp); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(thyraPrecOp)); - template - RCP MueLuPreconditionerFactory::getParameterList() const { - return paramList_; - } + defaultPrec->initializeUnspecified(thyraPrecOp); +} - template - RCP MueLuPreconditionerFactory::getValidParameters() const { - static RCP validPL; +template +void MueLuPreconditionerFactory:: + uninitializePrec(PreconditionerBase* prec, RCP >* fwdOp, ESupportSolveUse* supportSolveUse) const { + TEUCHOS_ASSERT(prec); - if (Teuchos::is_null(validPL)) - validPL = rcp(new ParameterList()); + // Retrieve concrete preconditioner object + const Teuchos::Ptr > defaultPrec = Teuchos::ptr(dynamic_cast*>(prec)); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(defaultPrec)); - return validPL; + if (fwdOp) { + // TODO: Implement properly instead of returning default value + *fwdOp = Teuchos::null; } - // Public functions overridden from Teuchos::Describable - template - std::string MueLuPreconditionerFactory::description() const { - return "Thyra::MueLuPreconditionerFactory"; + if (supportSolveUse) { + // TODO: Implement properly instead of returning default value + *supportSolveUse = Thyra::SUPPORT_SOLVE_UNSPECIFIED; } -} // namespace Thyra - -#endif // HAVE_MUELU_STRATIMIKOS -#endif // ifdef THYRA_MUELU_PRECONDITIONER_FACTORY_DEF_HPP + defaultPrec->uninitialize(); +} + +// Overridden from ParameterListAcceptor +template +void MueLuPreconditionerFactory::setParameterList(RCP const& paramList) { + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(paramList)); + paramList_ = paramList; +} + +template +RCP MueLuPreconditionerFactory::getNonconstParameterList() { + return paramList_; +} + +template +RCP MueLuPreconditionerFactory::unsetParameterList() { + RCP savedParamList = paramList_; + paramList_ = Teuchos::null; + return savedParamList; +} + +template +RCP MueLuPreconditionerFactory::getParameterList() const { + return paramList_; +} + +template +RCP MueLuPreconditionerFactory::getValidParameters() const { + static RCP validPL; + + if (Teuchos::is_null(validPL)) + validPL = rcp(new ParameterList()); + + return validPL; +} + +// Public functions overridden from Teuchos::Describable +template +std::string MueLuPreconditionerFactory::description() const { + return "Thyra::MueLuPreconditionerFactory"; +} +} // namespace Thyra + +#endif // HAVE_MUELU_STRATIMIKOS + +#endif // ifdef THYRA_MUELU_PRECONDITIONER_FACTORY_DEF_HPP diff --git a/packages/muelu/adapters/stratimikos/Thyra_MueLuRefMaxwellPreconditionerFactory_decl.hpp b/packages/muelu/adapters/stratimikos/Thyra_MueLuRefMaxwellPreconditionerFactory_decl.hpp index 46702974d0bc..e158803bf6ba 100644 --- a/packages/muelu/adapters/stratimikos/Thyra_MueLuRefMaxwellPreconditionerFactory_decl.hpp +++ b/packages/muelu/adapters/stratimikos/Thyra_MueLuRefMaxwellPreconditionerFactory_decl.hpp @@ -74,7 +74,7 @@ #include #include -#include // todo fix me +#include // todo fix me #include #include #include @@ -92,78 +92,73 @@ namespace Thyra { - /** @brief Concrete preconditioner factory subclass for Thyra based on MueLu. - @ingroup MueLuAdapters - Add support for MueLu preconditioners in Thyra. This class provides an interface both - for Epetra and Tpetra. - - The general implementation only handles Tpetra. For Epetra there is a specialization - on SC=double, LO=int, GO=int and NO=EpetraNode. - */ - template - class MueLuRefMaxwellPreconditionerFactory : public PreconditionerFactoryBase { - public: - - /** @name Constructors/initializers/accessors */ - //@{ - - /** \brief . */ - MueLuRefMaxwellPreconditionerFactory(); - //@} - - /** @name Overridden from PreconditionerFactoryBase */ - //@{ - - /** \brief . */ - bool isCompatible(const LinearOpSourceBase& fwdOp) const; - /** \brief . */ - Teuchos::RCP > createPrec() const; - /** \brief . */ - void initializePrec(const Teuchos::RCP >& fwdOp, - PreconditionerBase* prec, - const ESupportSolveUse supportSolveUse - ) const; - /** \brief . */ - void uninitializePrec(PreconditionerBase* prec, - Teuchos::RCP >* fwdOp, - ESupportSolveUse* supportSolveUse - ) const; - - //@} - - /** @name Overridden from Teuchos::ParameterListAcceptor */ - //@{ - - /** \brief . */ - void setParameterList(const Teuchos::RCP& paramList); - /** \brief . */ - Teuchos::RCP unsetParameterList(); - /** \brief . */ - Teuchos::RCP getNonconstParameterList(); - /** \brief . */ - Teuchos::RCP getParameterList() const; - /** \brief . */ - Teuchos::RCP getValidParameters() const; - //@} - - /** \name Public functions overridden from Describable. */ - //@{ - - /** \brief . */ - std::string description() const; - - // ToDo: Add an override of describe(...) to give more detail! - - //@} - - private: - - Teuchos::RCP paramList_; - - }; - -} // namespace Thyra - -#endif // #ifdef HAVE_MUELU_STRATIMIKOS - -#endif // THYRA_MUELU_REFMAXWELL_PRECONDITIONER_FACTORY_DECL_HPP +/** @brief Concrete preconditioner factory subclass for Thyra based on MueLu. + @ingroup MueLuAdapters + Add support for MueLu preconditioners in Thyra. This class provides an interface both + for Epetra and Tpetra. + + The general implementation only handles Tpetra. For Epetra there is a specialization + on SC=double, LO=int, GO=int and NO=EpetraNode. +*/ +template +class MueLuRefMaxwellPreconditionerFactory : public PreconditionerFactoryBase { + public: + /** @name Constructors/initializers/accessors */ + //@{ + + /** \brief . */ + MueLuRefMaxwellPreconditionerFactory(); + //@} + + /** @name Overridden from PreconditionerFactoryBase */ + //@{ + + /** \brief . */ + bool isCompatible(const LinearOpSourceBase& fwdOp) const; + /** \brief . */ + Teuchos::RCP > createPrec() const; + /** \brief . */ + void initializePrec(const Teuchos::RCP >& fwdOp, + PreconditionerBase* prec, + const ESupportSolveUse supportSolveUse) const; + /** \brief . */ + void uninitializePrec(PreconditionerBase* prec, + Teuchos::RCP >* fwdOp, + ESupportSolveUse* supportSolveUse) const; + + //@} + + /** @name Overridden from Teuchos::ParameterListAcceptor */ + //@{ + + /** \brief . */ + void setParameterList(const Teuchos::RCP& paramList); + /** \brief . */ + Teuchos::RCP unsetParameterList(); + /** \brief . */ + Teuchos::RCP getNonconstParameterList(); + /** \brief . */ + Teuchos::RCP getParameterList() const; + /** \brief . */ + Teuchos::RCP getValidParameters() const; + //@} + + /** \name Public functions overridden from Describable. */ + //@{ + + /** \brief . */ + std::string description() const; + + // ToDo: Add an override of describe(...) to give more detail! + + //@} + + private: + Teuchos::RCP paramList_; +}; + +} // namespace Thyra + +#endif // #ifdef HAVE_MUELU_STRATIMIKOS + +#endif // THYRA_MUELU_REFMAXWELL_PRECONDITIONER_FACTORY_DECL_HPP diff --git a/packages/muelu/adapters/stratimikos/Thyra_MueLuRefMaxwellPreconditionerFactory_def.hpp b/packages/muelu/adapters/stratimikos/Thyra_MueLuRefMaxwellPreconditionerFactory_def.hpp index d79c5bb5da1e..f310be29b742 100644 --- a/packages/muelu/adapters/stratimikos/Thyra_MueLuRefMaxwellPreconditionerFactory_def.hpp +++ b/packages/muelu/adapters/stratimikos/Thyra_MueLuRefMaxwellPreconditionerFactory_def.hpp @@ -52,261 +52,252 @@ #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA) // This is not as general as possible, but should be good enough for most builds. -#if((defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT) && !defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && !defined(HAVE_TPETRA_INST_COMPLEX_FLOAT)) || \ - (!defined(HAVE_TPETRA_INST_DOUBLE) && !defined(HAVE_TPETRA_INST_FLOAT) && defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && defined(HAVE_TPETRA_INST_COMPLEX_FLOAT)) || \ - (defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT) && defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && defined(HAVE_TPETRA_INST_COMPLEX_FLOAT))) -# define MUELU_CAN_USE_MIXED_PRECISION +#if ((defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT) && !defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && !defined(HAVE_TPETRA_INST_COMPLEX_FLOAT)) || \ + (!defined(HAVE_TPETRA_INST_DOUBLE) && !defined(HAVE_TPETRA_INST_FLOAT) && defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && defined(HAVE_TPETRA_INST_COMPLEX_FLOAT)) || \ + (defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT) && defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && defined(HAVE_TPETRA_INST_COMPLEX_FLOAT))) +#define MUELU_CAN_USE_MIXED_PRECISION #endif - namespace Thyra { - using Teuchos::RCP; - using Teuchos::rcp; - using Teuchos::ParameterList; - using Teuchos::rcp_dynamic_cast; - using Teuchos::rcp_const_cast; +using Teuchos::ParameterList; +using Teuchos::RCP; +using Teuchos::rcp; +using Teuchos::rcp_const_cast; +using Teuchos::rcp_dynamic_cast; - // Constructors/initializers/accessors +// Constructors/initializers/accessors - template - MueLuRefMaxwellPreconditionerFactory::MueLuRefMaxwellPreconditionerFactory() : - paramList_(rcp(new ParameterList())) - {} +template +MueLuRefMaxwellPreconditionerFactory::MueLuRefMaxwellPreconditionerFactory() + : paramList_(rcp(new ParameterList())) {} - // Overridden from PreconditionerFactoryBase +// Overridden from PreconditionerFactoryBase - template - bool MueLuRefMaxwellPreconditionerFactory::isCompatible(const LinearOpSourceBase& fwdOpSrc) const { - const RCP > fwdOp = fwdOpSrc.getOp(); +template +bool MueLuRefMaxwellPreconditionerFactory::isCompatible(const LinearOpSourceBase& fwdOpSrc) const { + const RCP> fwdOp = fwdOpSrc.getOp(); - if (Xpetra::ThyraUtils::isTpetra(fwdOp)) return true; + if (Xpetra::ThyraUtils::isTpetra(fwdOp)) return true; #ifdef HAVE_MUELU_EPETRA - if (Xpetra::ThyraUtils::isEpetra(fwdOp)) return true; + if (Xpetra::ThyraUtils::isEpetra(fwdOp)) return true; #endif - return false; - } - - - template - RCP > MueLuRefMaxwellPreconditionerFactory::createPrec() const { - return Teuchos::rcp(new DefaultPreconditioner); - } - - template - void MueLuRefMaxwellPreconditionerFactory:: - initializePrec(const RCP >& fwdOpSrc, PreconditionerBase* prec, const ESupportSolveUse supportSolveUse) const { - - // we are using typedefs here, since we are using objects from different packages (Xpetra, Thyra,...) - typedef Xpetra::Operator XpOp; - typedef Xpetra::ThyraUtils XpThyUtils; - typedef Xpetra::Matrix XpMat; - typedef Thyra::LinearOpBase ThyLinOpBase; - typedef Thyra::XpetraLinearOp ThyXpOp; + return false; +} + +template +RCP> MueLuRefMaxwellPreconditionerFactory::createPrec() const { + return Teuchos::rcp(new DefaultPreconditioner); +} + +template +void MueLuRefMaxwellPreconditionerFactory:: + initializePrec(const RCP>& fwdOpSrc, PreconditionerBase* prec, const ESupportSolveUse supportSolveUse) const { + // we are using typedefs here, since we are using objects from different packages (Xpetra, Thyra,...) + typedef Xpetra::Operator XpOp; + typedef Xpetra::ThyraUtils XpThyUtils; + typedef Xpetra::Matrix XpMat; + typedef Thyra::LinearOpBase ThyLinOpBase; + typedef Thyra::XpetraLinearOp ThyXpOp; #if defined(MUELU_CAN_USE_MIXED_PRECISION) - typedef Xpetra::TpetraHalfPrecisionOperator XpHalfPrecOp; - typedef Xpetra::MultiVector XpMV; - typedef typename XpHalfPrecOp::HalfScalar HalfScalar; - typedef typename Teuchos::ScalarTraits::magnitudeType Magnitude; - typedef typename Teuchos::ScalarTraits::halfPrecision HalfMagnitude; - typedef Xpetra::MultiVector XphMV; - typedef Xpetra::MultiVector XpmMV; - typedef Xpetra::MultiVector XphmMV; - typedef Xpetra::Matrix XphMat; + typedef Xpetra::TpetraHalfPrecisionOperator XpHalfPrecOp; + typedef Xpetra::MultiVector XpMV; + typedef typename XpHalfPrecOp::HalfScalar HalfScalar; + typedef typename Teuchos::ScalarTraits::magnitudeType Magnitude; + typedef typename Teuchos::ScalarTraits::halfPrecision HalfMagnitude; + typedef Xpetra::MultiVector XphMV; + typedef Xpetra::MultiVector XpmMV; + typedef Xpetra::MultiVector XphmMV; + typedef Xpetra::Matrix XphMat; #endif - Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("ThyraMueLuRefMaxwell::initializePrec"))); - - // Check precondition - TEUCHOS_ASSERT(Teuchos::nonnull(fwdOpSrc)); - TEUCHOS_ASSERT(this->isCompatible(*fwdOpSrc)); - TEUCHOS_ASSERT(prec); - - // Create a copy, as we may remove some things from the list - ParameterList paramList = *paramList_; - - // Retrieve wrapped concrete Xpetra matrix from FwdOp - const RCP fwdOp = fwdOpSrc->getOp(); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(fwdOp)); - - // Check whether it is Epetra/Tpetra - bool bIsEpetra = XpThyUtils::isEpetra(fwdOp); - bool bIsTpetra = XpThyUtils::isTpetra(fwdOp); - TEUCHOS_TEST_FOR_EXCEPT((bIsEpetra == true && bIsTpetra == true)); - - // wrap the forward operator as an Xpetra::Matrix that MueLu can work with - // MueLu needs a non-const object as input - RCP A = XpThyUtils::toXpetra(Teuchos::rcp_const_cast(fwdOp)); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(A)); - - // Retrieve concrete preconditioner object - const Teuchos::Ptr > defaultPrec = Teuchos::ptr(dynamic_cast *>(prec)); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(defaultPrec)); - - // extract preconditioner operator - RCP thyra_precOp = Teuchos::null; - thyra_precOp = rcp_dynamic_cast >(defaultPrec->getNonconstUnspecifiedPrecOp(), true); - - // make a decision whether to (re)build the multigrid preconditioner or reuse the old one - // rebuild preconditioner if startingOver == true - // reuse preconditioner if startingOver == false - const bool startingOver = (thyra_precOp.is_null() || !paramList.isParameter("refmaxwell: enable reuse") || !paramList.get("refmaxwell: enable reuse")); - const bool useHalfPrecision = paramList.get("half precision", false) && bIsTpetra; - - RCP xpPrecOp; - if (startingOver == true) { - - // Convert to Xpetra - std::list convertMat = { + Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("ThyraMueLuRefMaxwell::initializePrec"))); + + // Check precondition + TEUCHOS_ASSERT(Teuchos::nonnull(fwdOpSrc)); + TEUCHOS_ASSERT(this->isCompatible(*fwdOpSrc)); + TEUCHOS_ASSERT(prec); + + // Create a copy, as we may remove some things from the list + ParameterList paramList = *paramList_; + + // Retrieve wrapped concrete Xpetra matrix from FwdOp + const RCP fwdOp = fwdOpSrc->getOp(); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(fwdOp)); + + // Check whether it is Epetra/Tpetra + bool bIsEpetra = XpThyUtils::isEpetra(fwdOp); + bool bIsTpetra = XpThyUtils::isTpetra(fwdOp); + TEUCHOS_TEST_FOR_EXCEPT((bIsEpetra == true && bIsTpetra == true)); + + // wrap the forward operator as an Xpetra::Matrix that MueLu can work with + // MueLu needs a non-const object as input + RCP A = XpThyUtils::toXpetra(Teuchos::rcp_const_cast(fwdOp)); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(A)); + + // Retrieve concrete preconditioner object + const Teuchos::Ptr> defaultPrec = Teuchos::ptr(dynamic_cast*>(prec)); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(defaultPrec)); + + // extract preconditioner operator + RCP thyra_precOp = Teuchos::null; + thyra_precOp = rcp_dynamic_cast>(defaultPrec->getNonconstUnspecifiedPrecOp(), true); + + // make a decision whether to (re)build the multigrid preconditioner or reuse the old one + // rebuild preconditioner if startingOver == true + // reuse preconditioner if startingOver == false + const bool startingOver = (thyra_precOp.is_null() || !paramList.isParameter("refmaxwell: enable reuse") || !paramList.get("refmaxwell: enable reuse")); + const bool useHalfPrecision = paramList.get("half precision", false) && bIsTpetra; + + RCP xpPrecOp; + if (startingOver == true) { + // Convert to Xpetra + std::list convertMat = { "Dk_1", "Dk_2", "D0", "Mk_one", "Mk_1_one", "M1_beta", "M1_alpha", "invMk_1_invBeta", "invMk_2_invAlpha", // for backwards compatibility - "M1", "Ms", "M0inv" - }; - std::list convertMV = {"Coordinates", "Nullspace"}; - std::list convertXpetra; - convertXpetra.insert(convertXpetra.end(), convertMV.begin(), convertMV.end()); - convertXpetra.insert(convertXpetra.end(), convertMat.begin(), convertMat.end()); - for (auto it = convertXpetra.begin(); it != convertXpetra.end(); ++it) - Converters::replaceWithXpetra(paramList,*it); - - paramList.set("refmaxwell: use as preconditioner", true); - if (useHalfPrecision) { + "M1", "Ms", "M0inv"}; + std::list convertMV = {"Coordinates", "Nullspace"}; + std::list convertXpetra; + convertXpetra.insert(convertXpetra.end(), convertMV.begin(), convertMV.end()); + convertXpetra.insert(convertXpetra.end(), convertMat.begin(), convertMat.end()); + for (auto it = convertXpetra.begin(); it != convertXpetra.end(); ++it) + Converters::replaceWithXpetra(paramList, *it); + + paramList.set("refmaxwell: use as preconditioner", true); + if (useHalfPrecision) { #if defined(MUELU_CAN_USE_MIXED_PRECISION) - // convert to half precision - RCP halfA = Xpetra::convertToHalfPrecision(A); - if (paramList.isType >("Coordinates")) { - RCP coords = paramList.get >("Coordinates"); - paramList.remove("Coordinates"); - RCP halfCoords = Xpetra::convertToHalfPrecision(coords); - paramList.set("Coordinates",halfCoords); - } - if (paramList.isType >("Nullspace")) { - RCP nullspace = paramList.get >("Nullspace"); - paramList.remove("Nullspace"); - RCP halfNullspace = Xpetra::convertToHalfPrecision(nullspace); - paramList.set("Nullspace",halfNullspace); - } - for (auto it = convertMat.begin(); it != convertMat.end(); ++it) { - if (paramList.isType >(*it)) { - RCP M = paramList.get >(*it); - paramList.remove(*it); - RCP halfM = Xpetra::convertToHalfPrecision(M); - paramList.set(*it,halfM); - } + // convert to half precision + RCP halfA = Xpetra::convertToHalfPrecision(A); + if (paramList.isType>("Coordinates")) { + RCP coords = paramList.get>("Coordinates"); + paramList.remove("Coordinates"); + RCP halfCoords = Xpetra::convertToHalfPrecision(coords); + paramList.set("Coordinates", halfCoords); + } + if (paramList.isType>("Nullspace")) { + RCP nullspace = paramList.get>("Nullspace"); + paramList.remove("Nullspace"); + RCP halfNullspace = Xpetra::convertToHalfPrecision(nullspace); + paramList.set("Nullspace", halfNullspace); + } + for (auto it = convertMat.begin(); it != convertMat.end(); ++it) { + if (paramList.isType>(*it)) { + RCP M = paramList.get>(*it); + paramList.remove(*it); + RCP halfM = Xpetra::convertToHalfPrecision(M); + paramList.set(*it, halfM); } + } - // build a new half-precision MueLu RefMaxwell preconditioner - RCP > halfPrec = rcp(new MueLu::RefMaxwell(halfA, paramList, true)); - xpPrecOp = rcp(new XpHalfPrecOp(halfPrec)); + // build a new half-precision MueLu RefMaxwell preconditioner + RCP> halfPrec = rcp(new MueLu::RefMaxwell(halfA, paramList, true)); + xpPrecOp = rcp(new XpHalfPrecOp(halfPrec)); #else - TEUCHOS_TEST_FOR_EXCEPT(true); + TEUCHOS_TEST_FOR_EXCEPT(true); #endif - } else - { - // build a new MueLu RefMaxwell preconditioner - RCP > preconditioner = rcp(new MueLu::RefMaxwell(A, paramList, true)); - xpPrecOp = rcp_dynamic_cast(preconditioner); - } } else { - // reuse old MueLu preconditioner stored in MueLu Xpetra operator and put in new matrix + // build a new MueLu RefMaxwell preconditioner + RCP> preconditioner = rcp(new MueLu::RefMaxwell(A, paramList, true)); + xpPrecOp = rcp_dynamic_cast(preconditioner); + } + } else { + // reuse old MueLu preconditioner stored in MueLu Xpetra operator and put in new matrix - RCP thyXpOp = rcp_dynamic_cast(thyra_precOp, true); - RCP xpOp = thyXpOp->getXpetraOperator(); + RCP thyXpOp = rcp_dynamic_cast(thyra_precOp, true); + RCP xpOp = thyXpOp->getXpetraOperator(); #if defined(MUELU_CAN_USE_MIXED_PRECISION) - RCP xpHalfPrecOp = rcp_dynamic_cast(xpOp); - if (!xpHalfPrecOp.is_null()) { - RCP > preconditioner = rcp_dynamic_cast>(xpHalfPrecOp->GetHalfPrecisionOperator(), true); - RCP halfA = Xpetra::convertToHalfPrecision(A); - preconditioner->resetMatrix(halfA); - xpPrecOp = rcp_dynamic_cast(preconditioner); - } else + RCP xpHalfPrecOp = rcp_dynamic_cast(xpOp); + if (!xpHalfPrecOp.is_null()) { + RCP> preconditioner = rcp_dynamic_cast>(xpHalfPrecOp->GetHalfPrecisionOperator(), true); + RCP halfA = Xpetra::convertToHalfPrecision(A); + preconditioner->resetMatrix(halfA); + xpPrecOp = rcp_dynamic_cast(preconditioner); + } else #endif - { - RCP > preconditioner = rcp_dynamic_cast>(xpOp, true); - preconditioner->resetMatrix(A); - xpPrecOp = rcp_dynamic_cast(preconditioner); - } - } - - // wrap preconditioner in thyraPrecOp - RCP > thyraRangeSpace = Xpetra::ThyraUtils::toThyra(xpPrecOp->getRangeMap()); - RCP > thyraDomainSpace = Xpetra::ThyraUtils::toThyra(xpPrecOp->getDomainMap()); - - RCP thyraPrecOp = Thyra::xpetraLinearOp(thyraRangeSpace, thyraDomainSpace, xpPrecOp); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(thyraPrecOp)); - - defaultPrec->initializeUnspecified(thyraPrecOp); - - } - - template - void MueLuRefMaxwellPreconditionerFactory:: - uninitializePrec(PreconditionerBase* prec, RCP >* fwdOp, ESupportSolveUse* supportSolveUse) const { - TEUCHOS_ASSERT(prec); - - // Retrieve concrete preconditioner object - const Teuchos::Ptr > defaultPrec = Teuchos::ptr(dynamic_cast *>(prec)); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(defaultPrec)); - - if (fwdOp) { - // TODO: Implement properly instead of returning default value - *fwdOp = Teuchos::null; + { + RCP> preconditioner = rcp_dynamic_cast>(xpOp, true); + preconditioner->resetMatrix(A); + xpPrecOp = rcp_dynamic_cast(preconditioner); } - - if (supportSolveUse) { - // TODO: Implement properly instead of returning default value - *supportSolveUse = Thyra::SUPPORT_SOLVE_UNSPECIFIED; - } - - defaultPrec->uninitialize(); - } - - - // Overridden from ParameterListAcceptor - template - void MueLuRefMaxwellPreconditionerFactory::setParameterList(RCP const& paramList) { - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(paramList)); - paramList_ = paramList; } - template - RCP MueLuRefMaxwellPreconditionerFactory::getNonconstParameterList() { - return paramList_; - } + // wrap preconditioner in thyraPrecOp + RCP> thyraRangeSpace = Xpetra::ThyraUtils::toThyra(xpPrecOp->getRangeMap()); + RCP> thyraDomainSpace = Xpetra::ThyraUtils::toThyra(xpPrecOp->getDomainMap()); - template - RCP MueLuRefMaxwellPreconditionerFactory::unsetParameterList() { - RCP savedParamList = paramList_; - paramList_ = Teuchos::null; - return savedParamList; - } + RCP thyraPrecOp = Thyra::xpetraLinearOp(thyraRangeSpace, thyraDomainSpace, xpPrecOp); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(thyraPrecOp)); - template - RCP MueLuRefMaxwellPreconditionerFactory::getParameterList() const { - return paramList_; - } + defaultPrec->initializeUnspecified(thyraPrecOp); +} - template - RCP MueLuRefMaxwellPreconditionerFactory::getValidParameters() const { - static RCP validPL; +template +void MueLuRefMaxwellPreconditionerFactory:: + uninitializePrec(PreconditionerBase* prec, RCP>* fwdOp, ESupportSolveUse* supportSolveUse) const { + TEUCHOS_ASSERT(prec); - if (Teuchos::is_null(validPL)) - validPL = rcp(new ParameterList()); + // Retrieve concrete preconditioner object + const Teuchos::Ptr> defaultPrec = Teuchos::ptr(dynamic_cast*>(prec)); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(defaultPrec)); - return validPL; + if (fwdOp) { + // TODO: Implement properly instead of returning default value + *fwdOp = Teuchos::null; } - // Public functions overridden from Teuchos::Describable - template - std::string MueLuRefMaxwellPreconditionerFactory::description() const { - return "Thyra::MueLuRefMaxwellPreconditionerFactory"; + if (supportSolveUse) { + // TODO: Implement properly instead of returning default value + *supportSolveUse = Thyra::SUPPORT_SOLVE_UNSPECIFIED; } -} // namespace Thyra - -#endif // HAVE_MUELU_STRATIMIKOS -#endif // ifdef THYRA_MUELU_REFMAXWELL_PRECONDITIONER_FACTORY_DEF_HPP + defaultPrec->uninitialize(); +} + +// Overridden from ParameterListAcceptor +template +void MueLuRefMaxwellPreconditionerFactory::setParameterList(RCP const& paramList) { + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(paramList)); + paramList_ = paramList; +} + +template +RCP MueLuRefMaxwellPreconditionerFactory::getNonconstParameterList() { + return paramList_; +} + +template +RCP MueLuRefMaxwellPreconditionerFactory::unsetParameterList() { + RCP savedParamList = paramList_; + paramList_ = Teuchos::null; + return savedParamList; +} + +template +RCP MueLuRefMaxwellPreconditionerFactory::getParameterList() const { + return paramList_; +} + +template +RCP MueLuRefMaxwellPreconditionerFactory::getValidParameters() const { + static RCP validPL; + + if (Teuchos::is_null(validPL)) + validPL = rcp(new ParameterList()); + + return validPL; +} + +// Public functions overridden from Teuchos::Describable +template +std::string MueLuRefMaxwellPreconditionerFactory::description() const { + return "Thyra::MueLuRefMaxwellPreconditionerFactory"; +} +} // namespace Thyra + +#endif // HAVE_MUELU_STRATIMIKOS + +#endif // ifdef THYRA_MUELU_REFMAXWELL_PRECONDITIONER_FACTORY_DEF_HPP diff --git a/packages/muelu/adapters/stratimikos/Thyra_MueLuTpetraQ2Q1PreconditionerFactory_decl.hpp b/packages/muelu/adapters/stratimikos/Thyra_MueLuTpetraQ2Q1PreconditionerFactory_decl.hpp index 2f73ff9c52cd..cffc273eb42c 100644 --- a/packages/muelu/adapters/stratimikos/Thyra_MueLuTpetraQ2Q1PreconditionerFactory_decl.hpp +++ b/packages/muelu/adapters/stratimikos/Thyra_MueLuTpetraQ2Q1PreconditionerFactory_decl.hpp @@ -47,7 +47,6 @@ #define THYRA_MUELU_TPETRA_Q2Q1PRECONDITIONER_FACTORY_DECL_HPP #ifdef HAVE_MUELU_EXPERIMENTAL - #include "Thyra_PreconditionerFactoryBase.hpp" #include @@ -59,86 +58,83 @@ namespace Thyra { - /** \brief Concrete preconditioner factory subclass based on MueLu. - * - * ToDo: Finish documentation! - */ - template - class MueLuTpetraQ2Q1PreconditionerFactory : public PreconditionerFactoryBase { - private: - typedef Scalar SC; - typedef LocalOrdinal LO; - typedef GlobalOrdinal GO; - typedef Node NO; - - public: - - /** @name Constructors/initializers/accessors */ - //@{ - - /** \brief . */ - MueLuTpetraQ2Q1PreconditionerFactory(); - //@} - - /** @name Overridden from PreconditionerFactoryBase */ - //@{ - - /** \brief . */ - bool isCompatible( const LinearOpSourceBase &fwdOp ) const; - /** \brief . */ - Teuchos::RCP > createPrec() const; - /** \brief . */ - void initializePrec(const Teuchos::RCP > &fwdOp, PreconditionerBase *prec, const ESupportSolveUse supportSolveUse) const; - /** \brief . */ - void uninitializePrec(PreconditionerBase *prec, Teuchos::RCP > *fwdOp, ESupportSolveUse *supportSolveUse) const; - //@} - - /** @name Overridden from Teuchos::ParameterListAcceptor */ - //@{ - - /** \brief . */ - void setParameterList(const Teuchos::RCP& paramList); - /** \brief . */ - Teuchos::RCP unsetParameterList(); - /** \brief . */ - Teuchos::RCP getNonconstParameterList(); - /** \brief . */ - Teuchos::RCP getParameterList() const; - /** \brief . */ - Teuchos::RCP getValidParameters() const; - //@} - - /** \name Public functions overridden from Describable. */ - //@{ - - /** \brief . */ - std::string description() const; - - // ToDo: Add an override of describe(...) to give more detail! - - //@} - - private: - - Teuchos::RCP > - Q2Q1MkPrecond(const ParameterList& paramList, - const Teuchos::RCP >& velCoords, - const Teuchos::RCP >& presCoords, - const Teuchos::ArrayRCP& p2vMap, - const Teko::LinearOp& thA11, const Teko::LinearOp& thA12, const Teko::LinearOp& thA21, const Teko::LinearOp& thA11_9Pt) const; - - Teuchos::RCP > Absolute (const Xpetra::Matrix& A) const; - Teuchos::RCP > FilterMatrix(Xpetra::Matrix& A, Xpetra::Matrix& Pattern, SC dropTol) const; - - void SetDependencyTree (MueLu::FactoryManager& M, const ParameterList& paramList) const; - void SetBlockDependencyTree(MueLu::FactoryManager& M, LO row, LO col, const std::string& mode, const ParameterList& paramList) const; - - RCP GetSmoother(const std::string& type, const ParameterList& paramList, bool coarseSolver) const; - - Teuchos::RCP paramList_; - - }; - -} // namespace Thyra +/** \brief Concrete preconditioner factory subclass based on MueLu. + * + * ToDo: Finish documentation! + */ +template +class MueLuTpetraQ2Q1PreconditionerFactory : public PreconditionerFactoryBase { + private: + typedef Scalar SC; + typedef LocalOrdinal LO; + typedef GlobalOrdinal GO; + typedef Node NO; + + public: + /** @name Constructors/initializers/accessors */ + //@{ + + /** \brief . */ + MueLuTpetraQ2Q1PreconditionerFactory(); + //@} + + /** @name Overridden from PreconditionerFactoryBase */ + //@{ + + /** \brief . */ + bool isCompatible(const LinearOpSourceBase& fwdOp) const; + /** \brief . */ + Teuchos::RCP > createPrec() const; + /** \brief . */ + void initializePrec(const Teuchos::RCP >& fwdOp, PreconditionerBase* prec, const ESupportSolveUse supportSolveUse) const; + /** \brief . */ + void uninitializePrec(PreconditionerBase* prec, Teuchos::RCP >* fwdOp, ESupportSolveUse* supportSolveUse) const; + //@} + + /** @name Overridden from Teuchos::ParameterListAcceptor */ + //@{ + + /** \brief . */ + void setParameterList(const Teuchos::RCP& paramList); + /** \brief . */ + Teuchos::RCP unsetParameterList(); + /** \brief . */ + Teuchos::RCP getNonconstParameterList(); + /** \brief . */ + Teuchos::RCP getParameterList() const; + /** \brief . */ + Teuchos::RCP getValidParameters() const; + //@} + + /** \name Public functions overridden from Describable. */ + //@{ + + /** \brief . */ + std::string description() const; + + // ToDo: Add an override of describe(...) to give more detail! + + //@} + + private: + Teuchos::RCP > + Q2Q1MkPrecond(const ParameterList& paramList, + const Teuchos::RCP >& velCoords, + const Teuchos::RCP >& presCoords, + const Teuchos::ArrayRCP& p2vMap, + const Teko::LinearOp& thA11, const Teko::LinearOp& thA12, const Teko::LinearOp& thA21, const Teko::LinearOp& thA11_9Pt) const; + + Teuchos::RCP > Absolute(const Xpetra::Matrix& A) const; + Teuchos::RCP > FilterMatrix(Xpetra::Matrix& A, Xpetra::Matrix& Pattern, SC dropTol) const; + + void SetDependencyTree(MueLu::FactoryManager& M, const ParameterList& paramList) const; + void SetBlockDependencyTree(MueLu::FactoryManager& M, LO row, LO col, const std::string& mode, const ParameterList& paramList) const; + + RCP GetSmoother(const std::string& type, const ParameterList& paramList, bool coarseSolver) const; + + Teuchos::RCP paramList_; +}; + +} // namespace Thyra #endif -#endif // THYRA_MUELU_TPETRA_Q2Q1PRECONDITIONER_FACTORY_DECL_HPP +#endif // THYRA_MUELU_TPETRA_Q2Q1PRECONDITIONER_FACTORY_DECL_HPP diff --git a/packages/muelu/adapters/stratimikos/Thyra_MueLuTpetraQ2Q1PreconditionerFactory_def.hpp b/packages/muelu/adapters/stratimikos/Thyra_MueLuTpetraQ2Q1PreconditionerFactory_def.hpp index 0c69bd698027..44c70302b9e3 100644 --- a/packages/muelu/adapters/stratimikos/Thyra_MueLuTpetraQ2Q1PreconditionerFactory_def.hpp +++ b/packages/muelu/adapters/stratimikos/Thyra_MueLuTpetraQ2Q1PreconditionerFactory_def.hpp @@ -105,249 +105,272 @@ namespace Thyra { #define MUELU_GPD(name, type, defaultValue) \ (paramList.isParameter(name) ? paramList.get(name) : defaultValue) - using Teuchos::RCP; - using Teuchos::rcp; - using Teuchos::rcp_const_cast; - using Teuchos::rcp_dynamic_cast; - using Teuchos::ParameterList; - using Teuchos::ArrayView; - using Teuchos::ArrayRCP; - using Teuchos::as; - using Teuchos::Array; - - // Constructors/initializers/accessors - template - MueLuTpetraQ2Q1PreconditionerFactory::MueLuTpetraQ2Q1PreconditionerFactory() {} - - - // Overridden from PreconditionerFactoryBase - template - bool MueLuTpetraQ2Q1PreconditionerFactory::isCompatible(const LinearOpSourceBase& fwdOpSrc) const { - typedef Thyra ::TpetraLinearOp ThyraTpetraLinOp; - typedef Tpetra::Operator TpetraLinOp; - typedef Tpetra::CrsMatrix TpetraCrsMat; - - const RCP > fwdOp = fwdOpSrc.getOp(); - const RCP thyraTpetraFwdOp = rcp_dynamic_cast(fwdOp); - const RCP tpetraFwdOp = Teuchos::nonnull(thyraTpetraFwdOp) ? thyraTpetraFwdOp->getConstTpetraOperator() : Teuchos::null; - const RCP tpetraFwdCrsMat = rcp_dynamic_cast(tpetraFwdOp); - - return Teuchos::nonnull(tpetraFwdCrsMat); +using Teuchos::Array; +using Teuchos::ArrayRCP; +using Teuchos::ArrayView; +using Teuchos::as; +using Teuchos::ParameterList; +using Teuchos::RCP; +using Teuchos::rcp; +using Teuchos::rcp_const_cast; +using Teuchos::rcp_dynamic_cast; + +// Constructors/initializers/accessors +template +MueLuTpetraQ2Q1PreconditionerFactory::MueLuTpetraQ2Q1PreconditionerFactory() {} + +// Overridden from PreconditionerFactoryBase +template +bool MueLuTpetraQ2Q1PreconditionerFactory::isCompatible(const LinearOpSourceBase& fwdOpSrc) const { + typedef Thyra ::TpetraLinearOp ThyraTpetraLinOp; + typedef Tpetra::Operator TpetraLinOp; + typedef Tpetra::CrsMatrix TpetraCrsMat; + + const RCP > fwdOp = fwdOpSrc.getOp(); + const RCP thyraTpetraFwdOp = rcp_dynamic_cast(fwdOp); + const RCP tpetraFwdOp = Teuchos::nonnull(thyraTpetraFwdOp) ? thyraTpetraFwdOp->getConstTpetraOperator() : Teuchos::null; + const RCP tpetraFwdCrsMat = rcp_dynamic_cast(tpetraFwdOp); + + return Teuchos::nonnull(tpetraFwdCrsMat); +} + +template +RCP > +MueLuTpetraQ2Q1PreconditionerFactory::createPrec() const { + return rcp(new DefaultPreconditioner); +} + +template +void MueLuTpetraQ2Q1PreconditionerFactory:: + initializePrec(const RCP >& fwdOpSrc, PreconditionerBase* prec, const ESupportSolveUse supportSolveUse) const { + // Check precondition + TEUCHOS_ASSERT(Teuchos::nonnull(fwdOpSrc)); + TEUCHOS_ASSERT(this->isCompatible(*fwdOpSrc)); + TEUCHOS_ASSERT(prec); + + // Retrieve wrapped concrete Tpetra matrix from FwdOp + const RCP > fwdOp = fwdOpSrc->getOp(); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(fwdOp)); + + typedef Thyra::TpetraLinearOp ThyraTpetraLinOp; + const RCP thyraTpetraFwdOp = rcp_dynamic_cast(fwdOp); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(thyraTpetraFwdOp)); + + typedef Tpetra::Operator TpetraLinOp; + const RCP tpetraFwdOp = thyraTpetraFwdOp->getConstTpetraOperator(); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(tpetraFwdOp)); + + typedef Tpetra::CrsMatrix TpetraCrsMat; + const RCP tpetraFwdCrsMat = rcp_dynamic_cast(tpetraFwdOp); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(tpetraFwdCrsMat)); + + // Retrieve concrete preconditioner object + const Teuchos::Ptr > defaultPrec = Teuchos::ptr(dynamic_cast*>(prec)); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(defaultPrec)); + + // Workaround since MueLu interface does not accept const matrix as input + const RCP tpetraFwdCrsMatNonConst = rcp_const_cast(tpetraFwdCrsMat); + + // Create and compute the initial preconditioner + + // Create a copy, as we may remove some things from the list + ParameterList paramList = *paramList_; + + typedef Tpetra::MultiVector MultiVector; + RCP coords, nullspace, velCoords, presCoords; + ArrayRCP p2vMap; + Teko::LinearOp thA11, thA12, thA21, thA11_9Pt; + if (paramList.isType >("Coordinates")) { + coords = paramList.get >("Coordinates"); + paramList.remove("Coordinates"); } - - template - RCP > - MueLuTpetraQ2Q1PreconditionerFactory::createPrec() const { - return rcp(new DefaultPreconditioner); + if (paramList.isType >("Nullspace")) { + nullspace = paramList.get >("Nullspace"); + paramList.remove("Nullspace"); } - - template - void MueLuTpetraQ2Q1PreconditionerFactory:: - initializePrec(const RCP > &fwdOpSrc, PreconditionerBase *prec, const ESupportSolveUse supportSolveUse) const { - // Check precondition - TEUCHOS_ASSERT(Teuchos::nonnull(fwdOpSrc)); - TEUCHOS_ASSERT(this->isCompatible(*fwdOpSrc)); - TEUCHOS_ASSERT(prec); - - // Retrieve wrapped concrete Tpetra matrix from FwdOp - const RCP > fwdOp = fwdOpSrc->getOp(); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(fwdOp)); - - typedef Thyra::TpetraLinearOp ThyraTpetraLinOp; - const RCP thyraTpetraFwdOp = rcp_dynamic_cast(fwdOp); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(thyraTpetraFwdOp)); - - typedef Tpetra::Operator TpetraLinOp; - const RCP tpetraFwdOp = thyraTpetraFwdOp->getConstTpetraOperator(); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(tpetraFwdOp)); - - typedef Tpetra::CrsMatrix TpetraCrsMat; - const RCP tpetraFwdCrsMat = rcp_dynamic_cast(tpetraFwdOp); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(tpetraFwdCrsMat)); - - // Retrieve concrete preconditioner object - const Teuchos::Ptr > defaultPrec = Teuchos::ptr(dynamic_cast *>(prec)); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(defaultPrec)); - - // Workaround since MueLu interface does not accept const matrix as input - const RCP tpetraFwdCrsMatNonConst = rcp_const_cast(tpetraFwdCrsMat); - - // Create and compute the initial preconditioner - - // Create a copy, as we may remove some things from the list - ParameterList paramList = *paramList_; - - typedef Tpetra::MultiVector MultiVector; - RCP coords, nullspace, velCoords, presCoords; - ArrayRCP p2vMap; - Teko::LinearOp thA11, thA12, thA21, thA11_9Pt; - if (paramList.isType >("Coordinates")) { coords = paramList.get >("Coordinates"); paramList.remove("Coordinates"); } - if (paramList.isType >("Nullspace")) { nullspace = paramList.get >("Nullspace"); paramList.remove("Nullspace"); } - if (paramList.isType >("Velcoords")) { velCoords = paramList.get >("Velcoords"); paramList.remove("Velcoords"); } - if (paramList.isType >("Prescoords")) { presCoords = paramList.get >("Prescoords"); paramList.remove("Prescoords"); } - if (paramList.isType > ("p2vMap")) { p2vMap = paramList.get > ("p2vMap"); paramList.remove("p2vMap"); } - if (paramList.isType ("A11")) { thA11 = paramList.get ("A11"); paramList.remove("A11"); } - if (paramList.isType ("A12")) { thA12 = paramList.get ("A12"); paramList.remove("A12"); } - if (paramList.isType ("A21")) { thA21 = paramList.get ("A21"); paramList.remove("A21"); } - if (paramList.isType ("A11_9Pt")) { thA11_9Pt = paramList.get ("A11_9Pt"); paramList.remove("A11_9Pt"); } - - typedef MueLu::TpetraOperator MueLuOperator; - const RCP mueluPrecOp = Q2Q1MkPrecond(paramList, velCoords, presCoords, p2vMap, thA11, thA12, thA21, thA11_9Pt); - - const RCP > thyraPrecOp = Thyra::createLinearOp(RCP(mueluPrecOp)); - defaultPrec->initializeUnspecified(thyraPrecOp); + if (paramList.isType >("Velcoords")) { + velCoords = paramList.get >("Velcoords"); + paramList.remove("Velcoords"); } - - template - void MueLuTpetraQ2Q1PreconditionerFactory:: - uninitializePrec(PreconditionerBase *prec, RCP >* fwdOp, ESupportSolveUse* supportSolveUse) const { - // Check precondition - TEUCHOS_ASSERT(prec); - - // Retrieve concrete preconditioner object - const Teuchos::Ptr > defaultPrec = Teuchos::ptr(dynamic_cast *>(prec)); - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(defaultPrec)); - - if (fwdOp) { - // TODO: Implement properly instead of returning default value - *fwdOp = Teuchos::null; - } - - if (supportSolveUse) { - // TODO: Implement properly instead of returning default value - *supportSolveUse = Thyra::SUPPORT_SOLVE_UNSPECIFIED; - } - - defaultPrec->uninitialize(); + if (paramList.isType >("Prescoords")) { + presCoords = paramList.get >("Prescoords"); + paramList.remove("Prescoords"); } - - - // Overridden from ParameterListAcceptor - template - void MueLuTpetraQ2Q1PreconditionerFactory::setParameterList(const RCP& paramList) { - TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(paramList)); - paramList_ = paramList; + if (paramList.isType >("p2vMap")) { + p2vMap = paramList.get >("p2vMap"); + paramList.remove("p2vMap"); } - - template - RCP - MueLuTpetraQ2Q1PreconditionerFactory::getNonconstParameterList() { - return paramList_; + if (paramList.isType("A11")) { + thA11 = paramList.get("A11"); + paramList.remove("A11"); + } + if (paramList.isType("A12")) { + thA12 = paramList.get("A12"); + paramList.remove("A12"); + } + if (paramList.isType("A21")) { + thA21 = paramList.get("A21"); + paramList.remove("A21"); + } + if (paramList.isType("A11_9Pt")) { + thA11_9Pt = paramList.get("A11_9Pt"); + paramList.remove("A11_9Pt"); } + typedef MueLu::TpetraOperator MueLuOperator; + const RCP mueluPrecOp = Q2Q1MkPrecond(paramList, velCoords, presCoords, p2vMap, thA11, thA12, thA21, thA11_9Pt); - template - RCP - MueLuTpetraQ2Q1PreconditionerFactory::unsetParameterList() { - RCP savedParamList = paramList_; - paramList_ = Teuchos::null; - return savedParamList; - } + const RCP > thyraPrecOp = Thyra::createLinearOp(RCP(mueluPrecOp)); + defaultPrec->initializeUnspecified(thyraPrecOp); +} - template - RCP - MueLuTpetraQ2Q1PreconditionerFactory::getParameterList() const { - return paramList_; - } +template +void MueLuTpetraQ2Q1PreconditionerFactory:: + uninitializePrec(PreconditionerBase* prec, RCP >* fwdOp, ESupportSolveUse* supportSolveUse) const { + // Check precondition + TEUCHOS_ASSERT(prec); - template - RCP - MueLuTpetraQ2Q1PreconditionerFactory::getValidParameters() const { - static RCP validPL; + // Retrieve concrete preconditioner object + const Teuchos::Ptr > defaultPrec = Teuchos::ptr(dynamic_cast*>(prec)); + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(defaultPrec)); - if (validPL.is_null()) - validPL = rcp(new ParameterList()); + if (fwdOp) { + // TODO: Implement properly instead of returning default value + *fwdOp = Teuchos::null; + } - return validPL; + if (supportSolveUse) { + // TODO: Implement properly instead of returning default value + *supportSolveUse = Thyra::SUPPORT_SOLVE_UNSPECIFIED; } - template - RCP > - MueLuTpetraQ2Q1PreconditionerFactory:: - Q2Q1MkPrecond(const ParameterList& paramList, - const RCP >& velCoords, - const RCP >& presCoords, - const ArrayRCP& p2vMap, - const Teko::LinearOp& thA11, const Teko::LinearOp& thA12, const Teko::LinearOp& thA21, const Teko::LinearOp& thA11_9Pt) const - { - using Teuchos::null; - - typedef Tpetra::CrsMatrix TP_Crs; - typedef Tpetra::Operator TP_Op; - - typedef Xpetra::BlockedCrsMatrix BlockedCrsMatrix; - typedef Xpetra::CrsMatrix CrsMatrix; - typedef Xpetra::CrsMatrixWrap CrsMatrixWrap; - typedef Xpetra::MapExtractorFactory MapExtractorFactory; - typedef Xpetra::MapExtractor MapExtractor; - typedef Xpetra::Map Map; - typedef Xpetra::MapFactory MapFactory; - typedef Xpetra::Matrix Matrix; - typedef Xpetra::MatrixFactory MatrixFactory; - typedef Xpetra::StridedMapFactory StridedMapFactory; - - typedef MueLu::Hierarchy Hierarchy; - - const RCP > comm = velCoords->getMap()->getComm(); - - // Pull out Tpetra matrices - RCP > ThNonConstA11 = rcp_const_cast >(thA11); - RCP > ThNonConstA21 = rcp_const_cast >(thA21); - RCP > ThNonConstA12 = rcp_const_cast >(thA12); - RCP > ThNonConstA11_9Pt = rcp_const_cast >(thA11_9Pt); - - RCP TpetA11 = Thyra::TpetraOperatorVectorExtraction::getTpetraOperator(ThNonConstA11); - RCP TpetA21 = Thyra::TpetraOperatorVectorExtraction::getTpetraOperator(ThNonConstA21); - RCP TpetA12 = Thyra::TpetraOperatorVectorExtraction::getTpetraOperator(ThNonConstA12); - RCP TpetA11_9Pt = Thyra::TpetraOperatorVectorExtraction::getTpetraOperator(ThNonConstA11_9Pt); - - RCP TpetCrsA11 = rcp_dynamic_cast(TpetA11); - RCP TpetCrsA21 = rcp_dynamic_cast(TpetA21); - RCP TpetCrsA12 = rcp_dynamic_cast(TpetA12); - RCP TpetCrsA11_9Pt = rcp_dynamic_cast(TpetA11_9Pt); - - RCP A_11 = MueLu::TpetraCrs_To_XpetraMatrix(TpetCrsA11); - RCP tmp_A_21 = MueLu::TpetraCrs_To_XpetraMatrix(TpetCrsA21); // needs map modification - RCP tmp_A_12 = MueLu::TpetraCrs_To_XpetraMatrix(TpetCrsA12); // needs map modification - RCP A_11_9Pt = MueLu::TpetraCrs_To_XpetraMatrix(TpetCrsA11_9Pt); - - Xpetra::global_size_t numVel = A_11->getRowMap()->getLocalNumElements(); - Xpetra::global_size_t numPres = tmp_A_21->getRowMap()->getLocalNumElements(); - - // Create new A21 with map so that the global indices of the row map starts - // from numVel+1 (where numVel is the number of rows in the A11 block) - RCP domainMap2 = tmp_A_12->getDomainMap(); - RCP rangeMap2 = tmp_A_21->getRangeMap(); - Xpetra::global_size_t numRows2 = rangeMap2->getLocalNumElements(); - Xpetra::global_size_t numCols2 = domainMap2->getLocalNumElements(); - ArrayView rangeElem2 = rangeMap2->getLocalElementList(); - ArrayView domainElem2 = domainMap2->getLocalElementList(); - ArrayView rowElem1 = tmp_A_12->getRowMap()->getLocalElementList(); - ArrayView colElem1 = tmp_A_21->getColMap()->getLocalElementList(); - - Xpetra::UnderlyingLib lib = domainMap2->lib(); - GO indexBase = domainMap2->getIndexBase(); - - Array newRowElem2(numRows2, 0); - for (Xpetra::global_size_t i = 0; i < numRows2; i++) - newRowElem2[i] = numVel + rangeElem2[i]; - - RCP newRangeMap2 = MapFactory::Build(lib, numRows2, newRowElem2, indexBase, comm); - - // maybe should be column map??? - Array newColElem2(numCols2, 0); - for (Xpetra::global_size_t i = 0; i < numCols2; i++) - newColElem2[i] = numVel + domainElem2[i]; - - RCP newDomainMap2 = MapFactory::Build(lib, numCols2, newColElem2, indexBase, comm); - - RCP A_12 = MatrixFactory::Build(tmp_A_12->getRangeMap(), newDomainMap2, tmp_A_12->getLocalMaxNumRowEntries()); - RCP A_21 = MatrixFactory::Build(newRangeMap2, tmp_A_21->getDomainMap(), tmp_A_21->getLocalMaxNumRowEntries()); - - RCP A_11_crs = rcp_dynamic_cast(A_11) ->getCrsMatrix(); - RCP A_12_crs = rcp_dynamic_cast(A_12) ->getCrsMatrix(); - RCP A_21_crs = rcp_dynamic_cast(A_21) ->getCrsMatrix(); - RCP A_11_crs_9Pt = rcp_dynamic_cast(A_11_9Pt)->getCrsMatrix(); + defaultPrec->uninitialize(); +} + +// Overridden from ParameterListAcceptor +template +void MueLuTpetraQ2Q1PreconditionerFactory::setParameterList(const RCP& paramList) { + TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(paramList)); + paramList_ = paramList; +} + +template +RCP +MueLuTpetraQ2Q1PreconditionerFactory::getNonconstParameterList() { + return paramList_; +} + +template +RCP +MueLuTpetraQ2Q1PreconditionerFactory::unsetParameterList() { + RCP savedParamList = paramList_; + paramList_ = Teuchos::null; + return savedParamList; +} + +template +RCP +MueLuTpetraQ2Q1PreconditionerFactory::getParameterList() const { + return paramList_; +} + +template +RCP +MueLuTpetraQ2Q1PreconditionerFactory::getValidParameters() const { + static RCP validPL; + + if (validPL.is_null()) + validPL = rcp(new ParameterList()); + + return validPL; +} + +template +RCP > +MueLuTpetraQ2Q1PreconditionerFactory:: + Q2Q1MkPrecond(const ParameterList& paramList, + const RCP >& velCoords, + const RCP >& presCoords, + const ArrayRCP& p2vMap, + const Teko::LinearOp& thA11, const Teko::LinearOp& thA12, const Teko::LinearOp& thA21, const Teko::LinearOp& thA11_9Pt) const { + using Teuchos::null; + + typedef Tpetra::CrsMatrix TP_Crs; + typedef Tpetra::Operator TP_Op; + + typedef Xpetra::BlockedCrsMatrix BlockedCrsMatrix; + typedef Xpetra::CrsMatrix CrsMatrix; + typedef Xpetra::CrsMatrixWrap CrsMatrixWrap; + typedef Xpetra::MapExtractorFactory MapExtractorFactory; + typedef Xpetra::MapExtractor MapExtractor; + typedef Xpetra::Map Map; + typedef Xpetra::MapFactory MapFactory; + typedef Xpetra::Matrix Matrix; + typedef Xpetra::MatrixFactory MatrixFactory; + typedef Xpetra::StridedMapFactory StridedMapFactory; + + typedef MueLu::Hierarchy Hierarchy; + + const RCP > comm = velCoords->getMap()->getComm(); + + // Pull out Tpetra matrices + RCP > ThNonConstA11 = rcp_const_cast >(thA11); + RCP > ThNonConstA21 = rcp_const_cast >(thA21); + RCP > ThNonConstA12 = rcp_const_cast >(thA12); + RCP > ThNonConstA11_9Pt = rcp_const_cast >(thA11_9Pt); + + RCP TpetA11 = Thyra::TpetraOperatorVectorExtraction::getTpetraOperator(ThNonConstA11); + RCP TpetA21 = Thyra::TpetraOperatorVectorExtraction::getTpetraOperator(ThNonConstA21); + RCP TpetA12 = Thyra::TpetraOperatorVectorExtraction::getTpetraOperator(ThNonConstA12); + RCP TpetA11_9Pt = Thyra::TpetraOperatorVectorExtraction::getTpetraOperator(ThNonConstA11_9Pt); + + RCP TpetCrsA11 = rcp_dynamic_cast(TpetA11); + RCP TpetCrsA21 = rcp_dynamic_cast(TpetA21); + RCP TpetCrsA12 = rcp_dynamic_cast(TpetA12); + RCP TpetCrsA11_9Pt = rcp_dynamic_cast(TpetA11_9Pt); + + RCP A_11 = MueLu::TpetraCrs_To_XpetraMatrix(TpetCrsA11); + RCP tmp_A_21 = MueLu::TpetraCrs_To_XpetraMatrix(TpetCrsA21); // needs map modification + RCP tmp_A_12 = MueLu::TpetraCrs_To_XpetraMatrix(TpetCrsA12); // needs map modification + RCP A_11_9Pt = MueLu::TpetraCrs_To_XpetraMatrix(TpetCrsA11_9Pt); + + Xpetra::global_size_t numVel = A_11->getRowMap()->getLocalNumElements(); + Xpetra::global_size_t numPres = tmp_A_21->getRowMap()->getLocalNumElements(); + + // Create new A21 with map so that the global indices of the row map starts + // from numVel+1 (where numVel is the number of rows in the A11 block) + RCP domainMap2 = tmp_A_12->getDomainMap(); + RCP rangeMap2 = tmp_A_21->getRangeMap(); + Xpetra::global_size_t numRows2 = rangeMap2->getLocalNumElements(); + Xpetra::global_size_t numCols2 = domainMap2->getLocalNumElements(); + ArrayView rangeElem2 = rangeMap2->getLocalElementList(); + ArrayView domainElem2 = domainMap2->getLocalElementList(); + ArrayView rowElem1 = tmp_A_12->getRowMap()->getLocalElementList(); + ArrayView colElem1 = tmp_A_21->getColMap()->getLocalElementList(); + + Xpetra::UnderlyingLib lib = domainMap2->lib(); + GO indexBase = domainMap2->getIndexBase(); + + Array newRowElem2(numRows2, 0); + for (Xpetra::global_size_t i = 0; i < numRows2; i++) + newRowElem2[i] = numVel + rangeElem2[i]; + + RCP newRangeMap2 = MapFactory::Build(lib, numRows2, newRowElem2, indexBase, comm); + + // maybe should be column map??? + Array newColElem2(numCols2, 0); + for (Xpetra::global_size_t i = 0; i < numCols2; i++) + newColElem2[i] = numVel + domainElem2[i]; + + RCP newDomainMap2 = MapFactory::Build(lib, numCols2, newColElem2, indexBase, comm); + + RCP A_12 = MatrixFactory::Build(tmp_A_12->getRangeMap(), newDomainMap2, tmp_A_12->getLocalMaxNumRowEntries()); + RCP A_21 = MatrixFactory::Build(newRangeMap2, tmp_A_21->getDomainMap(), tmp_A_21->getLocalMaxNumRowEntries()); + + RCP A_11_crs = rcp_dynamic_cast(A_11)->getCrsMatrix(); + RCP A_12_crs = rcp_dynamic_cast(A_12)->getCrsMatrix(); + RCP A_21_crs = rcp_dynamic_cast(A_21)->getCrsMatrix(); + RCP A_11_crs_9Pt = rcp_dynamic_cast(A_11_9Pt)->getCrsMatrix(); #if 0 RCP A_22 = MatrixFactory::Build(newRangeMap2, newDomainMap2, 1); @@ -375,109 +398,109 @@ namespace Thyra { A_21_crs->fillComplete(tmp_A_21->getDomainMap(), newRangeMap2); A_22_crs->fillComplete(newDomainMap2, newRangeMap2); #else - RCP A_22 = Teuchos::null; - RCP A_22_crs = Teuchos::null; + RCP A_22 = Teuchos::null; + RCP A_22_crs = Teuchos::null; - ArrayView inds; - ArrayView vals; - for (LO row = 0; row < as(numRows2); ++row) { - tmp_A_21->getLocalRowView(row, inds, vals); + ArrayView inds; + ArrayView vals; + for (LO row = 0; row < as(numRows2); ++row) { + tmp_A_21->getLocalRowView(row, inds, vals); - size_t nnz = inds.size(); - Array newInds(nnz, 0); - for (LO colID = 0; colID < as(nnz); colID++) - newInds[colID] = colElem1[inds[colID]]; + size_t nnz = inds.size(); + Array newInds(nnz, 0); + for (LO colID = 0; colID < as(nnz); colID++) + newInds[colID] = colElem1[inds[colID]]; - A_21_crs->insertGlobalValues(newRowElem2[row], newInds, vals); - } - A_21_crs->fillComplete(tmp_A_21->getDomainMap(), newRangeMap2); + A_21_crs->insertGlobalValues(newRowElem2[row], newInds, vals); + } + A_21_crs->fillComplete(tmp_A_21->getDomainMap(), newRangeMap2); #endif - // Create new A12 with map so that the global indices of the ColMap starts - // from numVel+1 (where numVel is the number of rows in the A11 block) - for (LO row = 0; row < as(tmp_A_12->getRowMap()->getLocalNumElements()); ++row) { - tmp_A_12->getLocalRowView(row, inds, vals); + // Create new A12 with map so that the global indices of the ColMap starts + // from numVel+1 (where numVel is the number of rows in the A11 block) + for (LO row = 0; row < as(tmp_A_12->getRowMap()->getLocalNumElements()); ++row) { + tmp_A_12->getLocalRowView(row, inds, vals); - size_t nnz = inds.size(); - Array newInds(nnz, 0); - for (LO colID = 0; colID < as(nnz); colID++) - newInds[colID] = newColElem2[inds[colID]]; + size_t nnz = inds.size(); + Array newInds(nnz, 0); + for (LO colID = 0; colID < as(nnz); colID++) + newInds[colID] = newColElem2[inds[colID]]; - A_12_crs->insertGlobalValues(rowElem1[row], newInds, vals); - } - A_12_crs->fillComplete(newDomainMap2, tmp_A_12->getRangeMap()); - - RCP A_12_abs = Absolute(*A_12); - RCP A_21_abs = Absolute(*A_21); - - // ========================================================================= - // Preconditioner construction - I (block) - // ========================================================================= - RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& out = *fancy; - out.setOutputToRootOnly(0); - RCP BBt = Xpetra::MatrixMatrix::Multiply(*A_21, false, *A_12, false, out); - RCP BBt_abs = Xpetra::MatrixMatrix::Multiply(*A_21_abs, false, *A_12_abs, false, out); - - SC dropTol = (paramList.get("useFilters") ? paramList.get("tau_1") : 0.00); - RCP filteredA = FilterMatrix(*A_11, *A_11, dropTol); - RCP filteredB = FilterMatrix(*BBt, *BBt_abs, dropTol); - - RCP fA_11_crs = rcp_dynamic_cast(filteredA); - RCP fA_12_crs = Teuchos::null; - RCP fA_21_crs = Teuchos::null; - RCP fA_22_crs = rcp_dynamic_cast(filteredB); - - // Build the large filtered matrix which requires strided maps - std::vector stridingInfo(1, 1); - int stridedBlockId = -1; - - Array elementList(numVel+numPres); // Not RCP ... does this get cleared ? - Array velElem = A_12_crs->getRangeMap()->getLocalElementList(); - Array presElem = A_21_crs->getRangeMap()->getLocalElementList(); - - for (Xpetra::global_size_t i = 0 ; i < numVel; i++) elementList[i] = velElem[i]; - for (Xpetra::global_size_t i = numVel; i < numVel+numPres; i++) elementList[i] = presElem[i-numVel]; - RCP fullMap = StridedMapFactory::Build(Xpetra::UseTpetra, numVel+numPres, elementList(), indexBase, stridingInfo, comm); - - std::vector > partMaps(2); - partMaps[0] = StridedMapFactory::Build(Xpetra::UseTpetra, numVel, velElem, indexBase, stridingInfo, comm); - partMaps[1] = StridedMapFactory::Build(Xpetra::UseTpetra, numPres, presElem, indexBase, stridingInfo, comm, stridedBlockId, numVel); - - // Map extractors are necessary for Xpetra's block operators - RCP mapExtractor = MapExtractorFactory::Build(fullMap, partMaps); - RCP fA = rcp(new BlockedCrsMatrix(mapExtractor, mapExtractor, 10)); - fA->setMatrix(0, 0, fA_11_crs); - fA->setMatrix(0, 1, fA_12_crs); - fA->setMatrix(1, 0, fA_21_crs); - fA->setMatrix(1, 1, fA_22_crs); - fA->fillComplete(); - - // ------------------------------------------------------------------------- - // Preconditioner construction - I.a (filtered hierarchy) - // ------------------------------------------------------------------------- - MueLu::FactoryManager M; - SetDependencyTree(M, paramList); - - RCP H = rcp(new Hierarchy); - RCP finestLevel = H->GetLevel(0); - finestLevel->Set("A", rcp_dynamic_cast(fA)); - finestLevel->Set("p2vMap", p2vMap); - finestLevel->Set("CoordinatesVelocity", Xpetra::toXpetra(velCoords)); - finestLevel->Set("CoordinatesPressure", Xpetra::toXpetra(presCoords)); - finestLevel->Set("AForPat", A_11_9Pt); - H->SetMaxCoarseSize(MUELU_GPD("coarse: max size", int, 1)); - - // The first invocation of Setup() builds the hierarchy using the filtered - // matrix. This build includes the grid transfers but not the creation of the - // smoothers. - // NOTE: we need to indicate what should be kept from the first invocation - // for the second invocation, which then focuses on building the smoothers - // for the unfiltered matrix. - H->Keep("P", M.GetFactory("P") .get()); - H->Keep("R", M.GetFactory("R") .get()); - H->Keep("Ptent", M.GetFactory("Ptent").get()); - H->Setup(M, 0, MUELU_GPD("max levels", int, 3)); + A_12_crs->insertGlobalValues(rowElem1[row], newInds, vals); + } + A_12_crs->fillComplete(newDomainMap2, tmp_A_12->getRangeMap()); + + RCP A_12_abs = Absolute(*A_12); + RCP A_21_abs = Absolute(*A_21); + + // ========================================================================= + // Preconditioner construction - I (block) + // ========================================================================= + RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + Teuchos::FancyOStream& out = *fancy; + out.setOutputToRootOnly(0); + RCP BBt = Xpetra::MatrixMatrix::Multiply(*A_21, false, *A_12, false, out); + RCP BBt_abs = Xpetra::MatrixMatrix::Multiply(*A_21_abs, false, *A_12_abs, false, out); + + SC dropTol = (paramList.get("useFilters") ? paramList.get("tau_1") : 0.00); + RCP filteredA = FilterMatrix(*A_11, *A_11, dropTol); + RCP filteredB = FilterMatrix(*BBt, *BBt_abs, dropTol); + + RCP fA_11_crs = rcp_dynamic_cast(filteredA); + RCP fA_12_crs = Teuchos::null; + RCP fA_21_crs = Teuchos::null; + RCP fA_22_crs = rcp_dynamic_cast(filteredB); + + // Build the large filtered matrix which requires strided maps + std::vector stridingInfo(1, 1); + int stridedBlockId = -1; + + Array elementList(numVel + numPres); // Not RCP ... does this get cleared ? + Array velElem = A_12_crs->getRangeMap()->getLocalElementList(); + Array presElem = A_21_crs->getRangeMap()->getLocalElementList(); + + for (Xpetra::global_size_t i = 0; i < numVel; i++) elementList[i] = velElem[i]; + for (Xpetra::global_size_t i = numVel; i < numVel + numPres; i++) elementList[i] = presElem[i - numVel]; + RCP fullMap = StridedMapFactory::Build(Xpetra::UseTpetra, numVel + numPres, elementList(), indexBase, stridingInfo, comm); + + std::vector > partMaps(2); + partMaps[0] = StridedMapFactory::Build(Xpetra::UseTpetra, numVel, velElem, indexBase, stridingInfo, comm); + partMaps[1] = StridedMapFactory::Build(Xpetra::UseTpetra, numPres, presElem, indexBase, stridingInfo, comm, stridedBlockId, numVel); + + // Map extractors are necessary for Xpetra's block operators + RCP mapExtractor = MapExtractorFactory::Build(fullMap, partMaps); + RCP fA = rcp(new BlockedCrsMatrix(mapExtractor, mapExtractor, 10)); + fA->setMatrix(0, 0, fA_11_crs); + fA->setMatrix(0, 1, fA_12_crs); + fA->setMatrix(1, 0, fA_21_crs); + fA->setMatrix(1, 1, fA_22_crs); + fA->fillComplete(); + + // ------------------------------------------------------------------------- + // Preconditioner construction - I.a (filtered hierarchy) + // ------------------------------------------------------------------------- + MueLu::FactoryManager M; + SetDependencyTree(M, paramList); + + RCP H = rcp(new Hierarchy); + RCP finestLevel = H->GetLevel(0); + finestLevel->Set("A", rcp_dynamic_cast(fA)); + finestLevel->Set("p2vMap", p2vMap); + finestLevel->Set("CoordinatesVelocity", Xpetra::toXpetra(velCoords)); + finestLevel->Set("CoordinatesPressure", Xpetra::toXpetra(presCoords)); + finestLevel->Set("AForPat", A_11_9Pt); + H->SetMaxCoarseSize(MUELU_GPD("coarse: max size", int, 1)); + + // The first invocation of Setup() builds the hierarchy using the filtered + // matrix. This build includes the grid transfers but not the creation of the + // smoothers. + // NOTE: we need to indicate what should be kept from the first invocation + // for the second invocation, which then focuses on building the smoothers + // for the unfiltered matrix. + H->Keep("P", M.GetFactory("P").get()); + H->Keep("R", M.GetFactory("R").get()); + H->Keep("Ptent", M.GetFactory("Ptent").get()); + H->Setup(M, 0, MUELU_GPD("max levels", int, 3)); #if 0 for (int i = 1; i < H->GetNumLevels(); i++) { @@ -491,393 +514,390 @@ namespace Thyra { } #endif - // ------------------------------------------------------------------------- - // Preconditioner construction - I.b (smoothers for unfiltered matrix) - // ------------------------------------------------------------------------- - std::string smootherType = MUELU_GPD("smoother: type", std::string, "vanka"); - ParameterList smootherParams; - if (paramList.isSublist("smoother: params")) - smootherParams = paramList.sublist("smoother: params"); - M.SetFactory("Smoother", GetSmoother(smootherType, smootherParams, false/*coarseSolver?*/)); - - std::string coarseType = MUELU_GPD("coarse: type", std::string, "direct"); - ParameterList coarseParams; - if (paramList.isSublist("coarse: params")) - coarseParams = paramList.sublist("coarse: params"); - M.SetFactory("CoarseSolver", GetSmoother(coarseType, coarseParams, true/*coarseSolver?*/)); + // ------------------------------------------------------------------------- + // Preconditioner construction - I.b (smoothers for unfiltered matrix) + // ------------------------------------------------------------------------- + std::string smootherType = MUELU_GPD("smoother: type", std::string, "vanka"); + ParameterList smootherParams; + if (paramList.isSublist("smoother: params")) + smootherParams = paramList.sublist("smoother: params"); + M.SetFactory("Smoother", GetSmoother(smootherType, smootherParams, false /*coarseSolver?*/)); + + std::string coarseType = MUELU_GPD("coarse: type", std::string, "direct"); + ParameterList coarseParams; + if (paramList.isSublist("coarse: params")) + coarseParams = paramList.sublist("coarse: params"); + M.SetFactory("CoarseSolver", GetSmoother(coarseType, coarseParams, true /*coarseSolver?*/)); #ifdef HAVE_MUELU_DEBUG - M.ResetDebugData(); + M.ResetDebugData(); #endif - RCP A = rcp(new BlockedCrsMatrix(mapExtractor, mapExtractor, 10)); - A->setMatrix(0, 0, A_11); - A->setMatrix(0, 1, A_12); - A->setMatrix(1, 0, A_21); - A->setMatrix(1, 1, A_22); - A->fillComplete(); + RCP A = rcp(new BlockedCrsMatrix(mapExtractor, mapExtractor, 10)); + A->setMatrix(0, 0, A_11); + A->setMatrix(0, 1, A_12); + A->setMatrix(1, 0, A_21); + A->setMatrix(1, 1, A_22); + A->fillComplete(); - H->GetLevel(0)->Set("A", rcp_dynamic_cast(A)); + H->GetLevel(0)->Set("A", rcp_dynamic_cast(A)); - H->Setup(M, 0, H->GetNumLevels()); + H->Setup(M, 0, H->GetNumLevels()); - return rcp(new MueLu::TpetraOperator(H)); - } + return rcp(new MueLu::TpetraOperator(H)); +} - template - RCP > - MueLuTpetraQ2Q1PreconditionerFactory:: - FilterMatrix(Xpetra::Matrix& A, Xpetra::Matrix& Pattern, Scalar dropTol) const { - typedef Xpetra::Matrix Matrix; - typedef MueLu::AmalgamationFactory AmalgamationFactory; - typedef MueLu::CoalesceDropFactory CoalesceDropFactory; - typedef MueLu::FilteredAFactory FilteredAFactory; - typedef MueLu::GraphBase GraphBase; - - RCP filteredGraph; - { - // Get graph pattern for the pattern matrix - MueLu::Level level; - level.SetLevelID(1); - - level.Set >("A", rcpFromRef(Pattern)); - - RCP amalgFactory = rcp(new AmalgamationFactory()); - - RCP dropFactory = rcp(new CoalesceDropFactory()); - ParameterList dropParams = *(dropFactory->GetValidParameterList()); - dropParams.set("lightweight wrap", true); - dropParams.set("aggregation: drop scheme", "classical"); - dropParams.set("aggregation: drop tol", dropTol); - // dropParams.set("Dirichlet detection threshold", <>); - dropFactory->SetParameterList(dropParams); - dropFactory->SetFactory("UnAmalgamationInfo", amalgFactory); - - // Build - level.Request("Graph", dropFactory.get()); - dropFactory->Build(level); - - level.Get("Graph", filteredGraph, dropFactory.get()); - } - - RCP filteredA; - { - // Filter the original matrix, not the pattern one - MueLu::Level level; - level.SetLevelID(1); - - level.Set("A", rcpFromRef(A)); - level.Set("Graph", filteredGraph); - level.Set("Filtering", true); - - RCP filterFactory = rcp(new FilteredAFactory()); - ParameterList filterParams = *(filterFactory->GetValidParameterList()); - // We need a graph that has proper structure in it. Therefore, we need to - // drop older pattern, i.e. not to reuse it - filterParams.set("filtered matrix: reuse graph", false); - filterParams.set("filtered matrix: use lumping", false); - filterFactory->SetParameterList(filterParams); - - // Build - level.Request("A", filterFactory.get()); - filterFactory->Build(level); - - level.Get("A", filteredA, filterFactory.get()); - } +template +RCP > +MueLuTpetraQ2Q1PreconditionerFactory:: + FilterMatrix(Xpetra::Matrix& A, Xpetra::Matrix& Pattern, Scalar dropTol) const { + typedef Xpetra::Matrix Matrix; + typedef MueLu::AmalgamationFactory AmalgamationFactory; + typedef MueLu::CoalesceDropFactory CoalesceDropFactory; + typedef MueLu::FilteredAFactory FilteredAFactory; + typedef MueLu::GraphBase GraphBase; - // Zero out row sums by fixing the diagonal - filteredA->resumeFill(); - size_t numRows = filteredA->getRowMap()->getLocalNumElements(); - for (size_t i = 0; i < numRows; i++) { - ArrayView inds; - ArrayView vals; - filteredA->getLocalRowView(i, inds, vals); + RCP filteredGraph; + { + // Get graph pattern for the pattern matrix + MueLu::Level level; + level.SetLevelID(1); - size_t nnz = inds.size(); + level.Set >("A", rcpFromRef(Pattern)); - Array valsNew = vals; + RCP amalgFactory = rcp(new AmalgamationFactory()); - LO diagIndex = -1; - SC diag = Teuchos::ScalarTraits::zero(); - for (size_t j = 0; j < nnz; j++) { - diag += vals[j]; - if (inds[j] == Teuchos::as(i)) - diagIndex = j; - } - TEUCHOS_TEST_FOR_EXCEPTION(diagIndex == -1, MueLu::Exceptions::RuntimeError, - "No diagonal found"); - if (nnz <= 1) - continue; + RCP dropFactory = rcp(new CoalesceDropFactory()); + ParameterList dropParams = *(dropFactory->GetValidParameterList()); + dropParams.set("lightweight wrap", true); + dropParams.set("aggregation: drop scheme", "classical"); + dropParams.set("aggregation: drop tol", dropTol); + // dropParams.set("Dirichlet detection threshold", <>); + dropFactory->SetParameterList(dropParams); + dropFactory->SetFactory("UnAmalgamationInfo", amalgFactory); - valsNew[diagIndex] -= diag; + // Build + level.Request("Graph", dropFactory.get()); + dropFactory->Build(level); - filteredA->replaceLocalValues(i, inds, valsNew); - } - filteredA->fillComplete(); + level.Get("Graph", filteredGraph, dropFactory.get()); + } - return filteredA; + RCP filteredA; + { + // Filter the original matrix, not the pattern one + MueLu::Level level; + level.SetLevelID(1); + + level.Set("A", rcpFromRef(A)); + level.Set("Graph", filteredGraph); + level.Set("Filtering", true); + + RCP filterFactory = rcp(new FilteredAFactory()); + ParameterList filterParams = *(filterFactory->GetValidParameterList()); + // We need a graph that has proper structure in it. Therefore, we need to + // drop older pattern, i.e. not to reuse it + filterParams.set("filtered matrix: reuse graph", false); + filterParams.set("filtered matrix: use lumping", false); + filterFactory->SetParameterList(filterParams); + + // Build + level.Request("A", filterFactory.get()); + filterFactory->Build(level); + + level.Get("A", filteredA, filterFactory.get()); } - template - void - MueLuTpetraQ2Q1PreconditionerFactory:: - SetDependencyTree(MueLu::FactoryManager& M, const ParameterList& paramList) const { - typedef MueLu::BlockedPFactory BlockedPFactory; - typedef MueLu::GenericRFactory GenericRFactory; - typedef MueLu::BlockedRAPFactory BlockedRAPFactory; - typedef MueLu::SmootherFactory SmootherFactory; - typedef MueLu::BlockedDirectSolver BlockedDirectSolver; - typedef MueLu::FactoryManager FactoryManager; - - // Pressure and velocity dependency trees are identical. The only - // difference is that pressure has to go first, so that velocity can use - // some of pressure data - RCP M11 = rcp(new FactoryManager()), M22 = rcp(new FactoryManager()); - M11->SetKokkosRefactor(paramList.get("use kokkos refactor")); - M22->SetKokkosRefactor(paramList.get("use kokkos refactor")); - SetBlockDependencyTree(*M11, 0, 0, "velocity", paramList); - SetBlockDependencyTree(*M22, 1, 1, "pressure", paramList); - - RCP PFact = rcp(new BlockedPFactory()); - ParameterList pParamList = *(PFact->GetValidParameterList()); - pParamList.set("backwards", true); // do pressure first - PFact->SetParameterList(pParamList); - PFact->AddFactoryManager(M11); - PFact->AddFactoryManager(M22); - M.SetFactory("P", PFact); + // Zero out row sums by fixing the diagonal + filteredA->resumeFill(); + size_t numRows = filteredA->getRowMap()->getLocalNumElements(); + for (size_t i = 0; i < numRows; i++) { + ArrayView inds; + ArrayView vals; + filteredA->getLocalRowView(i, inds, vals); + + size_t nnz = inds.size(); - RCP RFact = rcp(new GenericRFactory()); - RFact->SetFactory("P", PFact); - M.SetFactory("R", RFact); + Array valsNew = vals; - RCP AcFact = rcp(new BlockedRAPFactory()); - AcFact->SetFactory("R", RFact); - AcFact->SetFactory("P", PFact); - M.SetFactory("A", AcFact); + LO diagIndex = -1; + SC diag = Teuchos::ScalarTraits::zero(); + for (size_t j = 0; j < nnz; j++) { + diag += vals[j]; + if (inds[j] == Teuchos::as(i)) + diagIndex = j; + } + TEUCHOS_TEST_FOR_EXCEPTION(diagIndex == -1, MueLu::Exceptions::RuntimeError, + "No diagonal found"); + if (nnz <= 1) + continue; - // Smoothers will be set later - M.SetFactory("Smoother", Teuchos::null); + valsNew[diagIndex] -= diag; - RCP coarseFact = rcp(new SmootherFactory(rcp(new BlockedDirectSolver()), Teuchos::null)); - // M.SetFactory("CoarseSolver", coarseFact); - M.SetFactory("CoarseSolver", Teuchos::null); + filteredA->replaceLocalValues(i, inds, valsNew); } + filteredA->fillComplete(); + + return filteredA; +} + +template +void MueLuTpetraQ2Q1PreconditionerFactory:: + SetDependencyTree(MueLu::FactoryManager& M, const ParameterList& paramList) const { + typedef MueLu::BlockedPFactory BlockedPFactory; + typedef MueLu::GenericRFactory GenericRFactory; + typedef MueLu::BlockedRAPFactory BlockedRAPFactory; + typedef MueLu::SmootherFactory SmootherFactory; + typedef MueLu::BlockedDirectSolver BlockedDirectSolver; + typedef MueLu::FactoryManager FactoryManager; + + // Pressure and velocity dependency trees are identical. The only + // difference is that pressure has to go first, so that velocity can use + // some of pressure data + RCP M11 = rcp(new FactoryManager()), M22 = rcp(new FactoryManager()); + M11->SetKokkosRefactor(paramList.get("use kokkos refactor")); + M22->SetKokkosRefactor(paramList.get("use kokkos refactor")); + SetBlockDependencyTree(*M11, 0, 0, "velocity", paramList); + SetBlockDependencyTree(*M22, 1, 1, "pressure", paramList); + + RCP PFact = rcp(new BlockedPFactory()); + ParameterList pParamList = *(PFact->GetValidParameterList()); + pParamList.set("backwards", true); // do pressure first + PFact->SetParameterList(pParamList); + PFact->AddFactoryManager(M11); + PFact->AddFactoryManager(M22); + M.SetFactory("P", PFact); + + RCP RFact = rcp(new GenericRFactory()); + RFact->SetFactory("P", PFact); + M.SetFactory("R", RFact); + + RCP AcFact = rcp(new BlockedRAPFactory()); + AcFact->SetFactory("R", RFact); + AcFact->SetFactory("P", PFact); + M.SetFactory("A", AcFact); + + // Smoothers will be set later + M.SetFactory("Smoother", Teuchos::null); + + RCP coarseFact = rcp(new SmootherFactory(rcp(new BlockedDirectSolver()), Teuchos::null)); + // M.SetFactory("CoarseSolver", coarseFact); + M.SetFactory("CoarseSolver", Teuchos::null); +} + +template +void MueLuTpetraQ2Q1PreconditionerFactory:: + SetBlockDependencyTree(MueLu::FactoryManager& M, LocalOrdinal row, LocalOrdinal col, const std::string& mode, const ParameterList& paramList) const { + typedef MueLu::ConstraintFactory ConstraintFactory; + typedef MueLu::EminPFactory EminPFactory; + typedef MueLu::GenericRFactory GenericRFactory; + typedef MueLu::PatternFactory PatternFactory; + typedef MueLu::Q2Q1PFactory Q2Q1PFactory; + typedef MueLu::Q2Q1uPFactory Q2Q1uPFactory; + typedef MueLu::SubBlockAFactory SubBlockAFactory; + + RCP AFact = rcp(new SubBlockAFactory()); + AFact->SetFactory("A", MueLu::NoFactory::getRCP()); + AFact->SetParameter("block row", Teuchos::ParameterEntry(row)); + AFact->SetParameter("block col", Teuchos::ParameterEntry(col)); + M.SetFactory("A", AFact); + + RCP Q2Q1Fact; + + const bool isStructured = false; + + if (isStructured) { + Q2Q1Fact = rcp(new Q2Q1PFactory); + + } else { + Q2Q1Fact = rcp(new Q2Q1uPFactory); + ParameterList q2q1ParamList = *(Q2Q1Fact->GetValidParameterList()); + q2q1ParamList.set("mode", mode); + if (paramList.isParameter("dump status")) + q2q1ParamList.set("dump status", paramList.get("dump status")); + if (paramList.isParameter("phase2")) + q2q1ParamList.set("phase2", paramList.get("phase2")); + if (paramList.isParameter("tau_2")) + q2q1ParamList.set("tau_2", paramList.get("tau_2")); + Q2Q1Fact->SetParameterList(q2q1ParamList); + } + Q2Q1Fact->SetFactory("A", AFact); + M.SetFactory("Ptent", Q2Q1Fact); + + RCP patternFact = rcp(new PatternFactory); + ParameterList patternParams = *(patternFact->GetValidParameterList()); + // Our prolongator constructs the exact pattern we are going to use, + // therefore we do not expand it + patternParams.set("emin: pattern order", 0); + patternFact->SetParameterList(patternParams); + patternFact->SetFactory("A", AFact); + patternFact->SetFactory("P", Q2Q1Fact); + M.SetFactory("Ppattern", patternFact); + + RCP CFact = rcp(new ConstraintFactory); + CFact->SetFactory("Ppattern", patternFact); + M.SetFactory("Constraint", CFact); + + RCP EminPFact = rcp(new EminPFactory()); + ParameterList eminParams = *(EminPFact->GetValidParameterList()); + if (paramList.isParameter("emin: num iterations")) + eminParams.set("emin: num iterations", paramList.get("emin: num iterations")); + if (mode == "pressure") { + eminParams.set("emin: iterative method", "cg"); + } else { + eminParams.set("emin: iterative method", "gmres"); + if (paramList.isParameter("emin: iterative method")) + eminParams.set("emin: iterative method", paramList.get("emin: iterative method")); + } + EminPFact->SetParameterList(eminParams); + EminPFact->SetFactory("A", AFact); + EminPFact->SetFactory("Constraint", CFact); + EminPFact->SetFactory("P", Q2Q1Fact); + M.SetFactory("P", EminPFact); + + if (mode == "velocity" && (!paramList.isParameter("velocity: use transpose") || paramList.get("velocity: use transpose") == false)) { + // Pressure system is symmetric, so it does not matter + // Velocity system may benefit from running emin in restriction mode (with A^T) + RCP RFact = rcp(new GenericRFactory()); + RFact->SetFactory("P", EminPFact); + M.SetFactory("R", RFact); + } +} + +template +RCP +MueLuTpetraQ2Q1PreconditionerFactory:: + GetSmoother(const std::string& type, const ParameterList& paramList, bool coarseSolver) const { + typedef Teuchos::ParameterEntry ParameterEntry; + + typedef MueLu::BlockedDirectSolver BlockedDirectSolver; + typedef MueLu::BraessSarazinSmoother BraessSarazinSmoother; + typedef MueLu::DirectSolver DirectSolver; + typedef MueLu::FactoryManager FactoryManager; + typedef MueLu::SchurComplementFactory SchurComplementFactory; + typedef MueLu::SmootherFactory SmootherFactory; + typedef MueLu::SmootherPrototype SmootherPrototype; + typedef MueLu::TrilinosSmoother TrilinosSmoother; + + RCP smootherPrototype; + if (type == "none") { + return Teuchos::null; + + } else if (type == "vanka") { + // Set up Vanka smoothing via a combination of Schwarz and block relaxation. + ParameterList schwarzList; + schwarzList.set("schwarz: overlap level", as(0)); + schwarzList.set("schwarz: zero starting solution", false); + schwarzList.set("subdomain solver name", "Block_Relaxation"); + + ParameterList& innerSolverList = schwarzList.sublist("subdomain solver parameters"); + innerSolverList.set("partitioner: type", "user"); + innerSolverList.set("partitioner: overlap", MUELU_GPD("partitioner: overlap", int, 1)); + innerSolverList.set("relaxation: type", MUELU_GPD("relaxation: type", std::string, "Gauss-Seidel")); + innerSolverList.set("relaxation: sweeps", MUELU_GPD("relaxation: sweeps", int, 1)); + innerSolverList.set("relaxation: damping factor", MUELU_GPD("relaxation: damping factor", double, 0.5)); + innerSolverList.set("relaxation: zero starting solution", false); + // innerSolverList.set("relaxation: backward mode", MUELU_GPD("relaxation: backward mode", bool, true); NOT SUPPORTED YET + + std::string ifpackType = "SCHWARZ"; + + smootherPrototype = rcp(new TrilinosSmoother(ifpackType, schwarzList)); + + } else if (type == "schwarz") { + std::string ifpackType = "SCHWARZ"; + + smootherPrototype = rcp(new TrilinosSmoother(ifpackType, paramList)); + + } else if (type == "braess-sarazin") { + // Define smoother/solver for BraessSarazin + SC omega = MUELU_GPD("bs: omega", double, 1.0); + bool lumping = MUELU_GPD("bs: lumping", bool, false); + + RCP schurFact = rcp(new SchurComplementFactory()); + schurFact->SetParameter("omega", ParameterEntry(omega)); + schurFact->SetParameter("lumping", ParameterEntry(lumping)); + schurFact->SetFactory("A", MueLu::NoFactory::getRCP()); + + // Schur complement solver + RCP schurSmootherPrototype; + std::string schurSmootherType = (paramList.isParameter("schur smoother: type") ? paramList.get("schur smoother: type") : "RELAXATION"); + if (schurSmootherType == "RELAXATION") { + ParameterList schurSmootherParams = paramList.sublist("schur smoother: params"); + // schurSmootherParams.set("relaxation: damping factor", omega); + schurSmootherPrototype = rcp(new TrilinosSmoother(schurSmootherType, schurSmootherParams)); + } else { + schurSmootherPrototype = rcp(new DirectSolver()); + } + schurSmootherPrototype->SetFactory("A", schurFact); - template - void - MueLuTpetraQ2Q1PreconditionerFactory:: - SetBlockDependencyTree(MueLu::FactoryManager& M, LocalOrdinal row, LocalOrdinal col, const std::string& mode, const ParameterList& paramList) const { - typedef MueLu::ConstraintFactory ConstraintFactory; - typedef MueLu::EminPFactory EminPFactory; - typedef MueLu::GenericRFactory GenericRFactory; - typedef MueLu::PatternFactory PatternFactory; - typedef MueLu::Q2Q1PFactory Q2Q1PFactory; - typedef MueLu::Q2Q1uPFactory Q2Q1uPFactory; - typedef MueLu::SubBlockAFactory SubBlockAFactory; + RCP schurSmootherFact = rcp(new SmootherFactory(schurSmootherPrototype)); - RCP AFact = rcp(new SubBlockAFactory()); - AFact->SetFactory ("A", MueLu::NoFactory::getRCP()); - AFact->SetParameter("block row", Teuchos::ParameterEntry(row)); - AFact->SetParameter("block col", Teuchos::ParameterEntry(col)); - M.SetFactory("A", AFact); + // Define temporary FactoryManager that is used as input for BraessSarazin smoother + RCP braessManager = rcp(new FactoryManager()); + braessManager->SetFactory("A", schurFact); // SchurComplement operator for correction step (defined as "A") + braessManager->SetFactory("Smoother", schurSmootherFact); // solver/smoother for correction step + braessManager->SetFactory("PreSmoother", schurSmootherFact); + braessManager->SetFactory("PostSmoother", schurSmootherFact); + braessManager->SetIgnoreUserData(true); // always use data from factories defined in factory manager - RCP Q2Q1Fact; + smootherPrototype = rcp(new BraessSarazinSmoother()); + smootherPrototype->SetParameter("Sweeps", ParameterEntry(MUELU_GPD("bs: sweeps", int, 1))); + smootherPrototype->SetParameter("lumping", ParameterEntry(lumping)); + smootherPrototype->SetParameter("Damping factor", ParameterEntry(omega)); + smootherPrototype->SetParameter("q2q1 mode", ParameterEntry(true)); + rcp_dynamic_cast(smootherPrototype)->AddFactoryManager(braessManager, 0); // set temporary factory manager in BraessSarazin smoother - const bool isStructured = false; + } else if (type == "ilu") { + std::string ifpackType = "RILUK"; - if (isStructured) { - Q2Q1Fact = rcp(new Q2Q1PFactory); + smootherPrototype = rcp(new TrilinosSmoother(ifpackType, paramList)); - } else { - Q2Q1Fact = rcp(new Q2Q1uPFactory); - ParameterList q2q1ParamList = *(Q2Q1Fact->GetValidParameterList()); - q2q1ParamList.set("mode", mode); - if (paramList.isParameter("dump status")) - q2q1ParamList.set("dump status", paramList.get("dump status")); - if (paramList.isParameter("phase2")) - q2q1ParamList.set("phase2", paramList.get("phase2")); - if (paramList.isParameter("tau_2")) - q2q1ParamList.set("tau_2", paramList.get("tau_2")); - Q2Q1Fact->SetParameterList(q2q1ParamList); - } - Q2Q1Fact->SetFactory("A", AFact); - M.SetFactory("Ptent", Q2Q1Fact); - - RCP patternFact = rcp(new PatternFactory); - ParameterList patternParams = *(patternFact->GetValidParameterList()); - // Our prolongator constructs the exact pattern we are going to use, - // therefore we do not expand it - patternParams.set("emin: pattern order", 0); - patternFact->SetParameterList(patternParams); - patternFact->SetFactory("A", AFact); - patternFact->SetFactory("P", Q2Q1Fact); - M.SetFactory("Ppattern", patternFact); - - RCP CFact = rcp(new ConstraintFactory); - CFact->SetFactory("Ppattern", patternFact); - M.SetFactory("Constraint", CFact); - - RCP EminPFact = rcp(new EminPFactory()); - ParameterList eminParams = *(EminPFact->GetValidParameterList()); - if (paramList.isParameter("emin: num iterations")) - eminParams.set("emin: num iterations", paramList.get("emin: num iterations")); - if (mode == "pressure") { - eminParams.set("emin: iterative method", "cg"); - } else { - eminParams.set("emin: iterative method", "gmres"); - if (paramList.isParameter("emin: iterative method")) - eminParams.set("emin: iterative method", paramList.get("emin: iterative method")); - } - EminPFact->SetParameterList(eminParams); - EminPFact->SetFactory("A", AFact); - EminPFact->SetFactory("Constraint", CFact); - EminPFact->SetFactory("P", Q2Q1Fact); - M.SetFactory("P", EminPFact); - - if (mode == "velocity" && (!paramList.isParameter("velocity: use transpose") || paramList.get("velocity: use transpose") == false)) { - // Pressure system is symmetric, so it does not matter - // Velocity system may benefit from running emin in restriction mode (with A^T) - RCP RFact = rcp(new GenericRFactory()); - RFact->SetFactory("P", EminPFact); - M.SetFactory("R", RFact); - } + } else if (type == "direct") { + smootherPrototype = rcp(new BlockedDirectSolver()); + + } else { + throw MueLu::Exceptions::RuntimeError("Unknown smoother type: \"" + type + "\""); } - template - RCP - MueLuTpetraQ2Q1PreconditionerFactory:: - GetSmoother(const std::string& type, const ParameterList& paramList, bool coarseSolver) const { - typedef Teuchos::ParameterEntry ParameterEntry; - - typedef MueLu::BlockedDirectSolver BlockedDirectSolver; - typedef MueLu::BraessSarazinSmoother BraessSarazinSmoother; - typedef MueLu::DirectSolver DirectSolver; - typedef MueLu::FactoryManager FactoryManager; - typedef MueLu::SchurComplementFactory SchurComplementFactory; - typedef MueLu::SmootherFactory SmootherFactory; - typedef MueLu::SmootherPrototype SmootherPrototype; - typedef MueLu::TrilinosSmoother TrilinosSmoother; - - RCP smootherPrototype; - if (type == "none") { - return Teuchos::null; - - } else if (type == "vanka") { - // Set up Vanka smoothing via a combination of Schwarz and block relaxation. - ParameterList schwarzList; - schwarzList.set("schwarz: overlap level", as(0)); - schwarzList.set("schwarz: zero starting solution", false); - schwarzList.set("subdomain solver name", "Block_Relaxation"); - - ParameterList& innerSolverList = schwarzList.sublist("subdomain solver parameters"); - innerSolverList.set("partitioner: type", "user"); - innerSolverList.set("partitioner: overlap", MUELU_GPD("partitioner: overlap", int, 1)); - innerSolverList.set("relaxation: type", MUELU_GPD("relaxation: type", std::string, "Gauss-Seidel")); - innerSolverList.set("relaxation: sweeps", MUELU_GPD("relaxation: sweeps", int, 1)); - innerSolverList.set("relaxation: damping factor", MUELU_GPD("relaxation: damping factor", double, 0.5)); - innerSolverList.set("relaxation: zero starting solution", false); - // innerSolverList.set("relaxation: backward mode", MUELU_GPD("relaxation: backward mode", bool, true); NOT SUPPORTED YET - - std::string ifpackType = "SCHWARZ"; - - smootherPrototype = rcp(new TrilinosSmoother(ifpackType, schwarzList)); - - } else if (type == "schwarz") { - - std::string ifpackType = "SCHWARZ"; - - smootherPrototype = rcp(new TrilinosSmoother(ifpackType, paramList)); - - } else if (type == "braess-sarazin") { - // Define smoother/solver for BraessSarazin - SC omega = MUELU_GPD("bs: omega", double, 1.0); - bool lumping = MUELU_GPD("bs: lumping", bool, false); - - RCP schurFact = rcp(new SchurComplementFactory()); - schurFact->SetParameter("omega", ParameterEntry(omega)); - schurFact->SetParameter("lumping", ParameterEntry(lumping)); - schurFact->SetFactory ("A", MueLu::NoFactory::getRCP()); - - // Schur complement solver - RCP schurSmootherPrototype; - std::string schurSmootherType = (paramList.isParameter("schur smoother: type") ? paramList.get("schur smoother: type") : "RELAXATION"); - if (schurSmootherType == "RELAXATION") { - ParameterList schurSmootherParams = paramList.sublist("schur smoother: params"); - // schurSmootherParams.set("relaxation: damping factor", omega); - schurSmootherPrototype = rcp(new TrilinosSmoother(schurSmootherType, schurSmootherParams)); - } else { - schurSmootherPrototype = rcp(new DirectSolver()); - } - schurSmootherPrototype->SetFactory("A", schurFact); - - RCP schurSmootherFact = rcp(new SmootherFactory(schurSmootherPrototype)); - - // Define temporary FactoryManager that is used as input for BraessSarazin smoother - RCP braessManager = rcp(new FactoryManager()); - braessManager->SetFactory("A", schurFact); // SchurComplement operator for correction step (defined as "A") - braessManager->SetFactory("Smoother", schurSmootherFact); // solver/smoother for correction step - braessManager->SetFactory("PreSmoother", schurSmootherFact); - braessManager->SetFactory("PostSmoother", schurSmootherFact); - braessManager->SetIgnoreUserData(true); // always use data from factories defined in factory manager - - smootherPrototype = rcp(new BraessSarazinSmoother()); - smootherPrototype->SetParameter("Sweeps", ParameterEntry(MUELU_GPD("bs: sweeps", int, 1))); - smootherPrototype->SetParameter("lumping", ParameterEntry(lumping)); - smootherPrototype->SetParameter("Damping factor", ParameterEntry(omega)); - smootherPrototype->SetParameter("q2q1 mode", ParameterEntry(true)); - rcp_dynamic_cast(smootherPrototype)->AddFactoryManager(braessManager, 0); // set temporary factory manager in BraessSarazin smoother - - } else if (type == "ilu") { - std::string ifpackType = "RILUK"; - - smootherPrototype = rcp(new TrilinosSmoother(ifpackType, paramList)); - - } else if (type == "direct") { - smootherPrototype = rcp(new BlockedDirectSolver()); + return coarseSolver ? rcp(new SmootherFactory(smootherPrototype, Teuchos::null)) : rcp(new SmootherFactory(smootherPrototype)); +} - } else { - throw MueLu::Exceptions::RuntimeError("Unknown smoother type: \"" + type + "\""); - } +template +RCP > +MueLuTpetraQ2Q1PreconditionerFactory::Absolute(const Xpetra::Matrix& A) const { + typedef Xpetra::CrsMatrix CrsMatrix; + typedef Xpetra::CrsMatrixWrap CrsMatrixWrap; + typedef Xpetra::Matrix Matrix; - return coarseSolver ? rcp(new SmootherFactory(smootherPrototype, Teuchos::null)) : rcp(new SmootherFactory(smootherPrototype)); - } + const CrsMatrixWrap& Awrap = dynamic_cast(A); - template - RCP > - MueLuTpetraQ2Q1PreconditionerFactory::Absolute(const Xpetra::Matrix& A) const { - typedef Xpetra::CrsMatrix CrsMatrix; - typedef Xpetra::CrsMatrixWrap CrsMatrixWrap; - typedef Xpetra::Matrix Matrix; - - const CrsMatrixWrap& Awrap = dynamic_cast(A); - - ArrayRCP iaA; - ArrayRCP jaA; - ArrayRCP valA; - Awrap.getCrsMatrix()->getAllValues(iaA, jaA, valA); - - ArrayRCP iaB (iaA .size()); - ArrayRCP jaB (jaA .size()); - ArrayRCP valB(valA.size()); - for (int i = 0; i < iaA .size(); i++) iaB [i] = iaA[i]; - for (int i = 0; i < jaA .size(); i++) jaB [i] = jaA[i]; - for (int i = 0; i < valA.size(); i++) valB[i] = Teuchos::ScalarTraits::magnitude(valA[i]); - - RCP B = rcp(new CrsMatrixWrap(A.getRowMap(), A.getColMap(), 0)); - RCP Bcrs = rcp_dynamic_cast(B)->getCrsMatrix(); - Bcrs->setAllValues(iaB, jaB, valB); - Bcrs->expertStaticFillComplete(A.getDomainMap(), A.getRangeMap()); - - return B; - } + ArrayRCP iaA; + ArrayRCP jaA; + ArrayRCP valA; + Awrap.getCrsMatrix()->getAllValues(iaA, jaA, valA); - // Public functions overridden from Teuchos::Describable - template - std::string MueLuTpetraQ2Q1PreconditionerFactory::description() const { - return "Thyra::MueLuTpetraQ2Q1PreconditionerFactory"; - } + ArrayRCP iaB(iaA.size()); + ArrayRCP jaB(jaA.size()); + ArrayRCP valB(valA.size()); + for (int i = 0; i < iaA.size(); i++) iaB[i] = iaA[i]; + for (int i = 0; i < jaA.size(); i++) jaB[i] = jaA[i]; + for (int i = 0; i < valA.size(); i++) valB[i] = Teuchos::ScalarTraits::magnitude(valA[i]); + + RCP B = rcp(new CrsMatrixWrap(A.getRowMap(), A.getColMap(), 0)); + RCP Bcrs = rcp_dynamic_cast(B)->getCrsMatrix(); + Bcrs->setAllValues(iaB, jaB, valB); + Bcrs->expertStaticFillComplete(A.getDomainMap(), A.getRangeMap()); + + return B; +} + +// Public functions overridden from Teuchos::Describable +template +std::string MueLuTpetraQ2Q1PreconditionerFactory::description() const { + return "Thyra::MueLuTpetraQ2Q1PreconditionerFactory"; +} -} // namespace Thyra +} // namespace Thyra #endif -#endif // ifdef THYRA_MUELU_TPETRA_Q2Q1PRECONDITIONER_FACTORY_DEF_HPP +#endif // ifdef THYRA_MUELU_TPETRA_Q2Q1PRECONDITIONER_FACTORY_DEF_HPP diff --git a/packages/muelu/adapters/stratimikos/Thyra_XpetraLinearOp_decl.hpp b/packages/muelu/adapters/stratimikos/Thyra_XpetraLinearOp_decl.hpp index ac0fcb2c3d4e..4ab8c289873b 100644 --- a/packages/muelu/adapters/stratimikos/Thyra_XpetraLinearOp_decl.hpp +++ b/packages/muelu/adapters/stratimikos/Thyra_XpetraLinearOp_decl.hpp @@ -51,23 +51,19 @@ #include "Xpetra_Operator.hpp" #include "Teuchos_ConstNonconstObjectContainer.hpp" - namespace Thyra { - /** \brief Concrete Thyra::LinearOpBase subclass for Xpetra::Operator. * * \todo Move this to Thyra?? * * \ingroup Xpetra_Thyra_Op_Vec_adapters_grp */ -template +template class XpetraLinearOp - : virtual public Thyra::LinearOpDefaultBase -{ -public: - + : virtual public Thyra::LinearOpDefaultBase { + public: /** \name Constructors/initializers. */ //@{ @@ -76,24 +72,22 @@ class XpetraLinearOp /** \brief Initialize. */ void initialize( - const RCP > &rangeSpace, - const RCP > &domainSpace, - const RCP > &xpetraOperator - ); + const RCP > &rangeSpace, + const RCP > &domainSpace, + const RCP > &xpetraOperator); /** \brief Initialize. */ void constInitialize( - const RCP > &rangeSpace, - const RCP > &domainSpace, - const RCP > &xpetraOperator - ); + const RCP > &rangeSpace, + const RCP > &domainSpace, + const RCP > &xpetraOperator); /** \brief Get embedded non-const Xpetra::Operator. */ - RCP > + RCP > getXpetraOperator(); /** \brief Get embedded const Xpetra::Operator. */ - RCP > + RCP > getConstXpetraOperator() const; //@} @@ -109,8 +103,7 @@ class XpetraLinearOp //@} -protected: - + protected: /** \name Protected Overridden functions from LinearOpBase. */ //@{ @@ -119,36 +112,31 @@ class XpetraLinearOp /** \brief . */ void applyImpl( - const Thyra::EOpTransp M_trans, - const Thyra::MultiVectorBase &X_in, - const Teuchos::Ptr > &Y_inout, - const Scalar alpha, - const Scalar beta - ) const; + const Thyra::EOpTransp M_trans, + const Thyra::MultiVectorBase &X_in, + const Teuchos::Ptr > &Y_inout, + const Scalar alpha, + const Scalar beta) const; //@} -private: - + private: RCP > - rangeSpace_; + rangeSpace_; RCP > - domainSpace_; + domainSpace_; - Teuchos::ConstNonconstObjectContainer > - xpetraOperator_; + Teuchos::ConstNonconstObjectContainer > + xpetraOperator_; - template + template void initializeImpl( - const RCP > &rangeSpace, - const RCP > &domainSpace, - const RCP &xpetraOperator - ); - + const RCP > &rangeSpace, + const RCP > &domainSpace, + const RCP &xpetraOperator); }; - /** \brief Nonmmeber constructor for XpetraLinearOp. * * \relates XpetraLinearOp @@ -156,18 +144,15 @@ class XpetraLinearOp template RCP > xpetraLinearOp( - const RCP > &rangeSpace, - const RCP > &domainSpace, - const RCP > &xpetraOperator - ) -{ + const RCP > &rangeSpace, + const RCP > &domainSpace, + const RCP > &xpetraOperator) { const RCP > op = - Teuchos::rcp(new XpetraLinearOp); + Teuchos::rcp(new XpetraLinearOp); op->initialize(rangeSpace, domainSpace, xpetraOperator); return op; } - /** \brief Nonmmeber constructor for XpetraLinearOp. * * \relates XpetraLinearOp @@ -175,17 +160,15 @@ xpetraLinearOp( template RCP > constXpetraLinearOp( - const RCP > &rangeSpace, - const RCP > &domainSpace, - const RCP > &xpetraOperator - ) -{ + const RCP > &rangeSpace, + const RCP > &domainSpace, + const RCP > &xpetraOperator) { const RCP > op = - Teuchos::rcp(new XpetraLinearOp); + Teuchos::rcp(new XpetraLinearOp); op->constInitialize(rangeSpace, domainSpace, xpetraOperator); return op; } } // namespace Thyra -#endif // THYRA_XPETRA_LINEAR_OP_DECL_HPP +#endif // THYRA_XPETRA_LINEAR_OP_DECL_HPP diff --git a/packages/muelu/adapters/stratimikos/Thyra_XpetraLinearOp_def.hpp b/packages/muelu/adapters/stratimikos/Thyra_XpetraLinearOp_def.hpp index 9a7517e1e280..05f1cf52a364 100644 --- a/packages/muelu/adapters/stratimikos/Thyra_XpetraLinearOp_def.hpp +++ b/packages/muelu/adapters/stratimikos/Thyra_XpetraLinearOp_def.hpp @@ -56,77 +56,58 @@ namespace Thyra { - // Constructors/initializers - template -XpetraLinearOp::XpetraLinearOp() -{} - +XpetraLinearOp::XpetraLinearOp() {} template -void XpetraLinearOp::initialize( - const RCP > &rangeSpace, - const RCP > &domainSpace, - const RCP > &xpetraOperator - ) -{ +void XpetraLinearOp::initialize( + const RCP > &rangeSpace, + const RCP > &domainSpace, + const RCP > &xpetraOperator) { initializeImpl(rangeSpace, domainSpace, xpetraOperator); } - template -void XpetraLinearOp::constInitialize( - const RCP > &rangeSpace, - const RCP > &domainSpace, - const RCP > &xpetraOperator - ) -{ +void XpetraLinearOp::constInitialize( + const RCP > &rangeSpace, + const RCP > &domainSpace, + const RCP > &xpetraOperator) { initializeImpl(rangeSpace, domainSpace, xpetraOperator); } - template -RCP > -XpetraLinearOp::getXpetraOperator() -{ +RCP > +XpetraLinearOp::getXpetraOperator() { return xpetraOperator_.getNonconstObj(); } - template -RCP > -XpetraLinearOp::getConstXpetraOperator() const -{ +RCP > +XpetraLinearOp::getConstXpetraOperator() const { return xpetraOperator_; } - // Public Overridden functions from LinearOpBase - template RCP > -XpetraLinearOp::range() const -{ +XpetraLinearOp::range() const { return rangeSpace_; } - template RCP > -XpetraLinearOp::domain() const -{ +XpetraLinearOp::domain() const { return domainSpace_; } // Protected Overridden functions from LinearOpBase template -bool XpetraLinearOp::opSupportedImpl( - Thyra::EOpTransp M_trans) const -{ +bool XpetraLinearOp::opSupportedImpl( + Thyra::EOpTransp M_trans) const { if (is_null(xpetraOperator_)) return false; @@ -142,30 +123,27 @@ bool XpetraLinearOp::opSupportedImpl( return xpetraOperator_->hasTransposeApply(); } - template -void XpetraLinearOp::applyImpl( - const Thyra::EOpTransp M_trans, - const Thyra::MultiVectorBase &X_in, - const Teuchos::Ptr > &Y_inout, - const Scalar alpha, - const Scalar beta - ) const -{ - using Teuchos::rcpFromRef; +void XpetraLinearOp::applyImpl( + const Thyra::EOpTransp M_trans, + const Thyra::MultiVectorBase &X_in, + const Teuchos::Ptr > &Y_inout, + const Scalar alpha, + const Scalar beta) const { using Teuchos::rcpFromPtr; + using Teuchos::rcpFromRef; TEUCHOS_TEST_FOR_EXCEPTION(getConstXpetraOperator() == Teuchos::null, MueLu::Exceptions::RuntimeError, "XpetraLinearOp::applyImpl: internal Xpetra::Operator is null."); - RCP< const Teuchos::Comm< int > > comm = getConstXpetraOperator()->getRangeMap()->getComm(); + RCP > comm = getConstXpetraOperator()->getRangeMap()->getComm(); - const RCP > tX_in = - Xpetra::ThyraUtils::toXpetra(rcpFromRef(X_in), comm); - RCP > tY_inout = - Xpetra::ThyraUtils::toXpetra(rcpFromPtr(Y_inout), comm); + const RCP > tX_in = + Xpetra::ThyraUtils::toXpetra(rcpFromRef(X_in), comm); + RCP > tY_inout = + Xpetra::ThyraUtils::toXpetra(rcpFromPtr(Y_inout), comm); Teuchos::ETransp transp; switch (M_trans) { - case NOTRANS: transp = Teuchos::NO_TRANS; break; - case TRANS: transp = Teuchos::TRANS; break; + case NOTRANS: transp = Teuchos::NO_TRANS; break; + case TRANS: transp = Teuchos::TRANS; break; case CONJTRANS: transp = Teuchos::CONJ_TRANS; break; default: TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::NotImplemented, "Thyra::XpetraLinearOp::apply. Unknown value for M_trans. Only NOTRANS, TRANS and CONJTRANS are supported."); } @@ -173,25 +151,25 @@ void XpetraLinearOp::applyImpl( xpetraOperator_->apply(*tX_in, *tY_inout, transp, alpha, beta); // check whether Y is a product vector - RCP > rgMapExtractor = Teuchos::null; + RCP > rgMapExtractor = Teuchos::null; Teuchos::Ptr > prodY_inout = Teuchos::ptr_dynamic_cast >(Y_inout); - if(prodY_inout != Teuchos::null) { + if (prodY_inout != Teuchos::null) { // If Y is a product vector we split up the data from tY and merge them // into the product vector. The necessary Xpetra::MapExtractor is extracted // from the fine level operator (not this!) // get underlying fine level operator (BlockedCrsMatrix) // to extract the range MapExtractor - RCP > mueXop = - Teuchos::rcp_dynamic_cast >(xpetraOperator_.getNonconstObj()); + RCP > mueXop = + Teuchos::rcp_dynamic_cast >(xpetraOperator_.getNonconstObj()); - RCP > A = - mueXop->GetHierarchy()->GetLevel(0)->template Get > >("A"); + RCP > A = + mueXop->GetHierarchy()->GetLevel(0)->template Get > >("A"); TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(A)); - RCP > bA = - Teuchos::rcp_dynamic_cast >(A); + RCP > bA = + Teuchos::rcp_dynamic_cast >(A); TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(bA)); rgMapExtractor = bA->getRangeMapExtractor(); @@ -199,30 +177,24 @@ void XpetraLinearOp::applyImpl( } } - // private - template -template -void XpetraLinearOp::initializeImpl( - const RCP > &rangeSpace, - const RCP > &domainSpace, - const RCP &xpetraOperator - ) -{ +template +void XpetraLinearOp::initializeImpl( + const RCP > &rangeSpace, + const RCP > &domainSpace, + const RCP &xpetraOperator) { #ifdef THYRA_DEBUG TEUCHOS_ASSERT(nonnull(rangeSpace)); TEUCHOS_ASSERT(nonnull(domainSpace)); TEUCHOS_ASSERT(nonnull(xpetraOperator)); #endif - rangeSpace_ = rangeSpace; - domainSpace_ = domainSpace; + rangeSpace_ = rangeSpace; + domainSpace_ = domainSpace; xpetraOperator_ = xpetraOperator; } - -} // namespace Thyra - +} // namespace Thyra #endif // THYRA_XPETRA_LINEAR_OP_HPP diff --git a/packages/muelu/adapters/stratimikos/Xpetra_ThyraLinearOp.hpp b/packages/muelu/adapters/stratimikos/Xpetra_ThyraLinearOp.hpp index 0ffa0e9e9cbc..60d8261c7063 100644 --- a/packages/muelu/adapters/stratimikos/Xpetra_ThyraLinearOp.hpp +++ b/packages/muelu/adapters/stratimikos/Xpetra_ThyraLinearOp.hpp @@ -61,168 +61,164 @@ #include #include - namespace MueLu { - /*! @brief Wraps an existing Thyra::LinearOp as a Xpetra::Operator. -*/ - template - class XpetraThyraLinearOp : public Xpetra::Operator { - protected: - XpetraThyraLinearOp() = default; - public: - - //! @name Constructor/Destructor - //@{ - - //! Constructor - XpetraThyraLinearOp(RCP > A, - RCP params) : A_(A) { - throw Exceptions::RuntimeError("Interface not supported"); - }; - - //! Destructor. - ~XpetraThyraLinearOp() = default; - - //@} - - //! Returns the Tpetra::Map object associated with the domain of this operator. - Teuchos::RCP > getDomainMap() const { - throw Exceptions::RuntimeError("Interface not supported"); - } - - // //! Returns the Tpetra::Map object associated with the range of this operator. - Teuchos::RCP > getRangeMap() const { - throw Exceptions::RuntimeError("Interface not supported"); - } - - //! Returns in Y the result of a Xpetra::Operator applied to a Xpetra::MultiVector X. - /*! - \param[in] X - Xpetra::MultiVector of dimension NumVectors to multiply with matrix. - \param[out] Y - Xpetra::MultiVector of dimension NumVectors containing result. - */ - void apply(const Xpetra::MultiVector& X, - Xpetra::MultiVector& Y, - Teuchos::ETransp mode = Teuchos::NO_TRANS, - Scalar alpha = Teuchos::ScalarTraits::one(), - Scalar beta = Teuchos::ScalarTraits::one()) const { - throw Exceptions::RuntimeError("Interface not supported"); - } - - //! Indicates whether this operator supports applying the adjoint operator. - bool hasTransposeApply() const { return false; } - - //! Compute a residual R = B - (*this) * X - void residual(const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > & X, - const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > & B, - Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > & R) const { - throw Exceptions::RuntimeError("Interface not supported"); - } - - - private: - RCP > A_; +/*! @brief Wraps an existing Thyra::LinearOp as a Xpetra::Operator. + */ +template +class XpetraThyraLinearOp : public Xpetra::Operator { + protected: + XpetraThyraLinearOp() = default; + + public: + //! @name Constructor/Destructor + //@{ + + //! Constructor + XpetraThyraLinearOp(RCP> A, + RCP params) + : A_(A) { + throw Exceptions::RuntimeError("Interface not supported"); }; - - // Partial specialization for Scalar == double. - // Allows to avoid issues with Stokhos instantiating Thyra objects. - template - class XpetraThyraLinearOp : public Xpetra::Operator { - - using Scalar = double; - - protected: - XpetraThyraLinearOp() = default; - public: - - //! @name Constructor/Destructor - //@{ - - //! Constructor - XpetraThyraLinearOp(RCP > A, - RCP params) : A_(A) { - // Build Thyra linear algebra objects - RCP > thyraA = Xpetra::ThyraUtils::toThyra(Teuchos::rcp_dynamic_cast>(A)->getCrsMatrix()); - - Stratimikos::LinearSolverBuilder linearSolverBuilder; - typedef Thyra::PreconditionerFactoryBase Base; - typedef Thyra::MueLuPreconditionerFactory ImplMueLu; - linearSolverBuilder.setPreconditioningStrategyFactory(Teuchos::abstractFactoryStd(), "MueLu"); - - linearSolverBuilder.setParameterList(params); - - // Build a new "solver factory" according to the previously specified parameter list. - // RCP > solverFactory = Thyra::createLinearSolveStrategy(linearSolverBuilder); - - auto precFactory = Thyra::createPreconditioningStrategy(linearSolverBuilder); - auto prec = precFactory->createPrec(); - - precFactory->initializePrec(Thyra::defaultLinearOpSource(thyraA), prec.get(), Thyra::SUPPORT_SOLVE_UNSPECIFIED); - prec_ = prec; - }; - - //! Destructor. - ~XpetraThyraLinearOp() = default; - - //@} - - //! Returns the Tpetra::Map object associated with the domain of this operator. - Teuchos::RCP > getDomainMap() const { - return A_->getDomainMap(); - } - - // //! Returns the Tpetra::Map object associated with the range of this operator. - Teuchos::RCP > getRangeMap() const { - return A_->getRangeMap(); - } - - //! Returns in Y the result of a Xpetra::Operator applied to a Xpetra::MultiVector X. - /*! - \param[in] X - Xpetra::MultiVector of dimension NumVectors to multiply with matrix. - \param[out] Y - Xpetra::MultiVector of dimension NumVectors containing result. - */ - void apply(const Xpetra::MultiVector& X, - Xpetra::MultiVector& Y, - Teuchos::ETransp mode = Teuchos::NO_TRANS, - Scalar alpha = Teuchos::ScalarTraits::one(), - Scalar beta = Teuchos::ScalarTraits::one()) const { - - RCP > rcpX = Teuchos::rcpFromRef(X); - RCP > thyraX = Xpetra::ThyraUtils::toThyraMultiVector(rcpX); - - RCP > rcpY = Teuchos::rcpFromRef(Y); - RCP > thyraY = Teuchos::rcp_const_cast >(Xpetra::ThyraUtils::toThyraMultiVector(rcpY)); - - prec_->getUnspecifiedPrecOp()->apply(Thyra::NOTRANS, *thyraX, thyraY.ptr(), alpha, beta); - Y = *Xpetra::ThyraUtils::toXpetra(thyraY, Y.getMap()->getComm()); - } - - //! Indicates whether this operator supports applying the adjoint operator. - bool hasTransposeApply() const { return false; } - - //! Compute a residual R = B - (*this) * X - void residual(const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > & X, - const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > & B, - Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > & R) const { - using STS = Teuchos::ScalarTraits; - R.update(STS::one(),B,STS::zero()); - this->apply (X, R, Teuchos::NO_TRANS, -STS::one(), STS::one()); - } - - - private: - RCP > A_; - RCP > prec_; + //! Destructor. + ~XpetraThyraLinearOp() = default; + + //@} + + //! Returns the Tpetra::Map object associated with the domain of this operator. + Teuchos::RCP> getDomainMap() const { + throw Exceptions::RuntimeError("Interface not supported"); + } + + // //! Returns the Tpetra::Map object associated with the range of this operator. + Teuchos::RCP> getRangeMap() const { + throw Exceptions::RuntimeError("Interface not supported"); + } + + //! Returns in Y the result of a Xpetra::Operator applied to a Xpetra::MultiVector X. + /*! + \param[in] X - Xpetra::MultiVector of dimension NumVectors to multiply with matrix. + \param[out] Y - Xpetra::MultiVector of dimension NumVectors containing result. + */ + void apply(const Xpetra::MultiVector& X, + Xpetra::MultiVector& Y, + Teuchos::ETransp mode = Teuchos::NO_TRANS, + Scalar alpha = Teuchos::ScalarTraits::one(), + Scalar beta = Teuchos::ScalarTraits::one()) const { + throw Exceptions::RuntimeError("Interface not supported"); + } + + //! Indicates whether this operator supports applying the adjoint operator. + bool hasTransposeApply() const { return false; } + + //! Compute a residual R = B - (*this) * X + void residual(const Xpetra::MultiVector& X, + const Xpetra::MultiVector& B, + Xpetra::MultiVector& R) const { + throw Exceptions::RuntimeError("Interface not supported"); + } + + private: + RCP> A_; +}; + +// Partial specialization for Scalar == double. +// Allows to avoid issues with Stokhos instantiating Thyra objects. +template +class XpetraThyraLinearOp : public Xpetra::Operator { + using Scalar = double; + + protected: + XpetraThyraLinearOp() = default; + + public: + //! @name Constructor/Destructor + //@{ + + //! Constructor + XpetraThyraLinearOp(RCP> A, + RCP params) + : A_(A) { + // Build Thyra linear algebra objects + RCP> thyraA = Xpetra::ThyraUtils::toThyra(Teuchos::rcp_dynamic_cast>(A)->getCrsMatrix()); + + Stratimikos::LinearSolverBuilder linearSolverBuilder; + typedef Thyra::PreconditionerFactoryBase Base; + typedef Thyra::MueLuPreconditionerFactory ImplMueLu; + linearSolverBuilder.setPreconditioningStrategyFactory(Teuchos::abstractFactoryStd(), "MueLu"); + + linearSolverBuilder.setParameterList(params); + + // Build a new "solver factory" according to the previously specified parameter list. + // RCP > solverFactory = Thyra::createLinearSolveStrategy(linearSolverBuilder); + + auto precFactory = Thyra::createPreconditioningStrategy(linearSolverBuilder); + auto prec = precFactory->createPrec(); + + precFactory->initializePrec(Thyra::defaultLinearOpSource(thyraA), prec.get(), Thyra::SUPPORT_SOLVE_UNSPECIFIED); + prec_ = prec; }; -} // namespace - -#endif // defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA) - -#endif // MUELU_XPETRA_THYRALINEAROP_HPP + //! Destructor. + ~XpetraThyraLinearOp() = default; + + //@} + + //! Returns the Tpetra::Map object associated with the domain of this operator. + Teuchos::RCP> getDomainMap() const { + return A_->getDomainMap(); + } + + // //! Returns the Tpetra::Map object associated with the range of this operator. + Teuchos::RCP> getRangeMap() const { + return A_->getRangeMap(); + } + + //! Returns in Y the result of a Xpetra::Operator applied to a Xpetra::MultiVector X. + /*! + \param[in] X - Xpetra::MultiVector of dimension NumVectors to multiply with matrix. + \param[out] Y - Xpetra::MultiVector of dimension NumVectors containing result. + */ + void apply(const Xpetra::MultiVector& X, + Xpetra::MultiVector& Y, + Teuchos::ETransp mode = Teuchos::NO_TRANS, + Scalar alpha = Teuchos::ScalarTraits::one(), + Scalar beta = Teuchos::ScalarTraits::one()) const { + RCP> rcpX = Teuchos::rcpFromRef(X); + RCP> thyraX = Xpetra::ThyraUtils::toThyraMultiVector(rcpX); + + RCP> rcpY = Teuchos::rcpFromRef(Y); + RCP> thyraY = Teuchos::rcp_const_cast>(Xpetra::ThyraUtils::toThyraMultiVector(rcpY)); + + prec_->getUnspecifiedPrecOp()->apply(Thyra::NOTRANS, *thyraX, thyraY.ptr(), alpha, beta); + Y = *Xpetra::ThyraUtils::toXpetra(thyraY, Y.getMap()->getComm()); + } + + //! Indicates whether this operator supports applying the adjoint operator. + bool hasTransposeApply() const { return false; } + + //! Compute a residual R = B - (*this) * X + void residual(const Xpetra::MultiVector& X, + const Xpetra::MultiVector& B, + Xpetra::MultiVector& R) const { + using STS = Teuchos::ScalarTraits; + R.update(STS::one(), B, STS::zero()); + this->apply(X, R, Teuchos::NO_TRANS, -STS::one(), STS::one()); + } + + private: + RCP> A_; + RCP> prec_; +}; + +} // namespace MueLu + +#endif // defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA) + +#endif // MUELU_XPETRA_THYRALINEAROP_HPP diff --git a/packages/muelu/adapters/tpetra/MueLu_CreateTpetraPreconditioner.hpp b/packages/muelu/adapters/tpetra/MueLu_CreateTpetraPreconditioner.hpp index e1fe95d2bdc5..90be6d89c4a5 100644 --- a/packages/muelu/adapters/tpetra/MueLu_CreateTpetraPreconditioner.hpp +++ b/packages/muelu/adapters/tpetra/MueLu_CreateTpetraPreconditioner.hpp @@ -24,7 +24,6 @@ #include #include - #if defined(HAVE_MUELU_AMGX) #include #include @@ -33,165 +32,154 @@ namespace MueLu { - - /*! - @brief Helper function to create a MueLu or AMGX preconditioner that can be used by Tpetra. - @ingroup MueLuAdapters - Given a Tpetra::Operator, this function returns a constructed MueLu preconditioner. - @param[in] inA Matrix - @param[in] inParamList Parameter list - */ - template - Teuchos::RCP > - CreateTpetraPreconditioner(const Teuchos::RCP > &inA, - Teuchos::ParameterList& inParamList) - { - typedef Scalar SC; - typedef LocalOrdinal LO; - typedef GlobalOrdinal GO; - typedef Node NO; - - using Teuchos::ParameterList; - - typedef Xpetra::MultiVector MultiVector; - typedef Xpetra::Matrix Matrix; - typedef Hierarchy Hierarchy; - typedef Tpetra::CrsMatrix crs_matrix_type; - typedef Tpetra::BlockCrsMatrix block_crs_matrix_type; +/*! + @brief Helper function to create a MueLu or AMGX preconditioner that can be used by Tpetra. + @ingroup MueLuAdapters + Given a Tpetra::Operator, this function returns a constructed MueLu preconditioner. + @param[in] inA Matrix + @param[in] inParamList Parameter list +*/ +template +Teuchos::RCP > +CreateTpetraPreconditioner(const Teuchos::RCP >& inA, + Teuchos::ParameterList& inParamList) { + typedef Scalar SC; + typedef LocalOrdinal LO; + typedef GlobalOrdinal GO; + typedef Node NO; + + using Teuchos::ParameterList; + + typedef Xpetra::MultiVector MultiVector; + typedef Xpetra::Matrix Matrix; + typedef Hierarchy Hierarchy; + typedef Tpetra::CrsMatrix crs_matrix_type; + typedef Tpetra::BlockCrsMatrix block_crs_matrix_type; #if defined(HAVE_MUELU_AMGX) - std::string externalMG = "use external multigrid package"; - if (inParamList.isParameter(externalMG) && inParamList.get(externalMG) == "amgx"){ - const RCP constCrsA = rcp_dynamic_cast(inA); - TEUCHOS_TEST_FOR_EXCEPTION(constCrsA == Teuchos::null, Exceptions::RuntimeError, "CreateTpetraPreconditioner: failed to dynamic cast to Tpetra::CrsMatrix, which is required to be able to use AmgX."); - return rcp(new AMGXOperator(constCrsA,inParamList)); - } -#endif - - // Wrap A - RCP A; - RCP bcrsA = rcp_dynamic_cast(inA); - RCP crsA = rcp_dynamic_cast(inA); - if (crsA != Teuchos::null) - A = TpetraCrs_To_XpetraMatrix(crsA); - else if (bcrsA != Teuchos::null) { - RCP > temp = rcp(new Xpetra::TpetraBlockCrsMatrix(bcrsA)); - TEUCHOS_TEST_FOR_EXCEPTION(temp==Teuchos::null, Exceptions::RuntimeError, "CreateTpetraPreconditioner: cast from Tpetra::BlockCrsMatrix to Xpetra::TpetraBlockCrsMatrix failed."); - A = rcp(new Xpetra::CrsMatrixWrap(temp)); - } - else { - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "CreateTpetraPreconditioner: only Tpetra CrsMatrix and BlockCrsMatrix types are supported."); - } - - Teuchos::ParameterList& userList = inParamList.sublist("user data"); - if (userList.isParameter("Coordinates")) { - RCP::coordinateType,LO,GO,NO> > coordinates = Teuchos::null; - try { - coordinates = TpetraMultiVector_To_XpetraMultiVector::coordinateType,LO,GO,NO>(userList.get::coordinateType, LocalOrdinal, GlobalOrdinal, Node> > >("Coordinates")); - } catch(Teuchos::Exceptions::InvalidParameterType&) { - coordinates = userList.get::coordinateType, LocalOrdinal, GlobalOrdinal, Node> > >("Coordinates"); - } - userList.set::coordinateType,LO,GO,NO> > >("Coordinates", coordinates); - } - - if (userList.isParameter("Nullspace")) { - RCP nullspace = Teuchos::null; - try { - nullspace = TpetraMultiVector_To_XpetraMultiVector(userList.get > >("Nullspace")); - } catch(Teuchos::Exceptions::InvalidParameterType&) { - nullspace = userList.get > >("Nullspace"); - } - userList.set >("Nullspace", nullspace); - } - - RCP H = MueLu::CreateXpetraPreconditioner(A, inParamList); - return rcp(new TpetraOperator(H)); + std::string externalMG = "use external multigrid package"; + if (inParamList.isParameter(externalMG) && inParamList.get(externalMG) == "amgx") { + const RCP constCrsA = rcp_dynamic_cast(inA); + TEUCHOS_TEST_FOR_EXCEPTION(constCrsA == Teuchos::null, Exceptions::RuntimeError, "CreateTpetraPreconditioner: failed to dynamic cast to Tpetra::CrsMatrix, which is required to be able to use AmgX."); + return rcp(new AMGXOperator(constCrsA, inParamList)); } +#endif - - /*! - @brief Helper function to create a MueLu preconditioner that can be used by Tpetra. - @ingroup MueLuAdapters - - Given a Tpetra::Operator, this function returns a constructed MueLu preconditioner. - - @param[in] inA Matrix - @param[in] xmlFileName XML file containing MueLu options - */ - template - Teuchos::RCP > - CreateTpetraPreconditioner(const Teuchos::RCP >& inA, - const std::string& xmlFileName) - { - Teuchos::ParameterList paramList; - Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, Teuchos::Ptr(¶mList), *inA->getDomainMap()->getComm()); - return CreateTpetraPreconditioner(inA, paramList); - } - - - /*! - @brief Helper function to create a MueLu preconditioner that can be used by Tpetra. - @ingroup MueLuAdapters - - Given a Tpetra::Operator, this function returns a constructed MueLu preconditioner. - - @param[in] inA Matrix - */ - template - Teuchos::RCP > - CreateTpetraPreconditioner(const Teuchos::RCP >& inA) - { - Teuchos::ParameterList paramList; - return CreateTpetraPreconditioner(inA, paramList); + // Wrap A + RCP A; + RCP bcrsA = rcp_dynamic_cast(inA); + RCP crsA = rcp_dynamic_cast(inA); + if (crsA != Teuchos::null) + A = TpetraCrs_To_XpetraMatrix(crsA); + else if (bcrsA != Teuchos::null) { + RCP > temp = rcp(new Xpetra::TpetraBlockCrsMatrix(bcrsA)); + TEUCHOS_TEST_FOR_EXCEPTION(temp == Teuchos::null, Exceptions::RuntimeError, "CreateTpetraPreconditioner: cast from Tpetra::BlockCrsMatrix to Xpetra::TpetraBlockCrsMatrix failed."); + A = rcp(new Xpetra::CrsMatrixWrap(temp)); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "CreateTpetraPreconditioner: only Tpetra CrsMatrix and BlockCrsMatrix types are supported."); } - - /*! - @brief Helper function to reuse an existing MueLu preconditioner. - @ingroup MueLuAdapters - - @param[in] inA Matrix - @param[in] Op Existing MueLu preconditioner. - */ - template - void ReuseTpetraPreconditioner(const Teuchos::RCP >& inA, - MueLu::TpetraOperator& Op) { - typedef Scalar SC; - typedef LocalOrdinal LO; - typedef GlobalOrdinal GO; - typedef Node NO; - - typedef Xpetra::Matrix Matrix; - typedef MueLu ::Hierarchy Hierarchy; - - RCP H = Op.GetHierarchy(); - RCP A = TpetraCrs_To_XpetraMatrix(inA); - - MueLu::ReuseXpetraPreconditioner(A, H); + Teuchos::ParameterList& userList = inParamList.sublist("user data"); + if (userList.isParameter("Coordinates")) { + RCP::coordinateType, LO, GO, NO> > coordinates = Teuchos::null; + try { + coordinates = TpetraMultiVector_To_XpetraMultiVector::coordinateType, LO, GO, NO>(userList.get::coordinateType, LocalOrdinal, GlobalOrdinal, Node> > >("Coordinates")); + } catch (Teuchos::Exceptions::InvalidParameterType&) { + coordinates = userList.get::coordinateType, LocalOrdinal, GlobalOrdinal, Node> > >("Coordinates"); + } + userList.set::coordinateType, LO, GO, NO> > >("Coordinates", coordinates); } - template - void ReuseTpetraPreconditioner(const Teuchos::RCP >& inA, - MueLu::TpetraOperator& Op) { - typedef Scalar SC; - typedef LocalOrdinal LO; - typedef GlobalOrdinal GO; - typedef Node NO; - - typedef Xpetra::Matrix Matrix; - typedef MueLu ::Hierarchy Hierarchy; - - RCP H = Op.GetHierarchy(); - RCP > temp = rcp(new Xpetra::TpetraBlockCrsMatrix(inA)); - TEUCHOS_TEST_FOR_EXCEPTION(temp==Teuchos::null, Exceptions::RuntimeError, "ReuseTpetraPreconditioner: cast from Tpetra::BlockCrsMatrix to Xpetra::TpetraBlockCrsMatrix failed."); - RCP A = rcp(new Xpetra::CrsMatrixWrap(temp)); - - MueLu::ReuseXpetraPreconditioner(A, H); + if (userList.isParameter("Nullspace")) { + RCP nullspace = Teuchos::null; + try { + nullspace = TpetraMultiVector_To_XpetraMultiVector(userList.get > >("Nullspace")); + } catch (Teuchos::Exceptions::InvalidParameterType&) { + nullspace = userList.get > >("Nullspace"); + } + userList.set >("Nullspace", nullspace); } - - -} //namespace - -#endif //ifndef MUELU_CREATE_TPETRA_PRECONDITIONER_HPP - + RCP H = MueLu::CreateXpetraPreconditioner(A, inParamList); + return rcp(new TpetraOperator(H)); +} + +/*! + @brief Helper function to create a MueLu preconditioner that can be used by Tpetra. + @ingroup MueLuAdapters + + Given a Tpetra::Operator, this function returns a constructed MueLu preconditioner. + + @param[in] inA Matrix + @param[in] xmlFileName XML file containing MueLu options +*/ +template +Teuchos::RCP > +CreateTpetraPreconditioner(const Teuchos::RCP >& inA, + const std::string& xmlFileName) { + Teuchos::ParameterList paramList; + Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, Teuchos::Ptr(¶mList), *inA->getDomainMap()->getComm()); + return CreateTpetraPreconditioner(inA, paramList); +} + +/*! + @brief Helper function to create a MueLu preconditioner that can be used by Tpetra. + @ingroup MueLuAdapters + + Given a Tpetra::Operator, this function returns a constructed MueLu preconditioner. + + @param[in] inA Matrix +*/ +template +Teuchos::RCP > +CreateTpetraPreconditioner(const Teuchos::RCP >& inA) { + Teuchos::ParameterList paramList; + return CreateTpetraPreconditioner(inA, paramList); +} + +/*! + @brief Helper function to reuse an existing MueLu preconditioner. + @ingroup MueLuAdapters + + @param[in] inA Matrix + @param[in] Op Existing MueLu preconditioner. +*/ +template +void ReuseTpetraPreconditioner(const Teuchos::RCP >& inA, + MueLu::TpetraOperator& Op) { + typedef Scalar SC; + typedef LocalOrdinal LO; + typedef GlobalOrdinal GO; + typedef Node NO; + + typedef Xpetra::Matrix Matrix; + typedef MueLu ::Hierarchy Hierarchy; + + RCP H = Op.GetHierarchy(); + RCP A = TpetraCrs_To_XpetraMatrix(inA); + + MueLu::ReuseXpetraPreconditioner(A, H); +} + +template +void ReuseTpetraPreconditioner(const Teuchos::RCP >& inA, + MueLu::TpetraOperator& Op) { + typedef Scalar SC; + typedef LocalOrdinal LO; + typedef GlobalOrdinal GO; + typedef Node NO; + + typedef Xpetra::Matrix Matrix; + typedef MueLu ::Hierarchy Hierarchy; + + RCP H = Op.GetHierarchy(); + RCP > temp = rcp(new Xpetra::TpetraBlockCrsMatrix(inA)); + TEUCHOS_TEST_FOR_EXCEPTION(temp == Teuchos::null, Exceptions::RuntimeError, "ReuseTpetraPreconditioner: cast from Tpetra::BlockCrsMatrix to Xpetra::TpetraBlockCrsMatrix failed."); + RCP A = rcp(new Xpetra::CrsMatrixWrap(temp)); + + MueLu::ReuseXpetraPreconditioner(A, H); +} + +} // namespace MueLu + +#endif // ifndef MUELU_CREATE_TPETRA_PRECONDITIONER_HPP diff --git a/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacianOperator_decl.hpp b/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacianOperator_decl.hpp index dfd15d15f89b..2f7f822cf614 100644 --- a/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacianOperator_decl.hpp +++ b/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacianOperator_decl.hpp @@ -58,106 +58,105 @@ namespace MueLu { - /*! @brief Wraps an existing MueLu::Hierarchy as a Tpetra::Operator, with an optional two-level correction. - Intended to be used with MueLu::ShiftedLaplacian. +/*! @brief Wraps an existing MueLu::Hierarchy as a Tpetra::Operator, with an optional two-level correction. + Intended to be used with MueLu::ShiftedLaplacian. +*/ +template ::scalar_type, + class LocalOrdinal = typename Tpetra::Operator::local_ordinal_type, + class GlobalOrdinal = typename Tpetra::Operator::global_ordinal_type, + class Node = typename Tpetra::Operator::node_type> +class ShiftedLaplacianOperator + : public Tpetra::Operator { + typedef Xpetra::Matrix Matrix; + typedef Tpetra::CrsMatrix CrsMatrix; + typedef Tpetra::MultiVector MV; + typedef Tpetra::Operator OP; + typedef MueLu::Utilities MUtils; + + public: + //! @name Constructor/Destructor + //@{ + + //! Constructor + ShiftedLaplacianOperator(const RCP >& H) + : Hierarchy_(H) + , option_(0) {} + + //! Auxiliary Constructor + ShiftedLaplacianOperator(const RCP >& H, + const RCP A, int cycles, int iters, int option, double tol) + : Hierarchy_(H) + , A_(A) + , cycles_(cycles) + , iters_(iters) + , option_(option) + , tol_(tol) { + // setup 2-level correction + /*RCP< MueLu::Level > Level1 = H -> GetLevel(1); + R_ = Level1 -> Get< RCP >("R"); + P_ = Level1 -> Get< RCP >("P"); + //RCP AP = Level1 -> Get< RCP >("AP graph"); + RCP AP; + AP = MUtils::Multiply(*A_, false, *P_, false, AP); + // Optimization storage option. If matrix is not changing later, allow this. + bool doOptimizedStorage = true; + // Reuse coarse matrix memory if available (multiple solve) + //RCP Ac = Level1 -> Get< RCP >("RAP graph"); + RCP Ac; + Ac = MUtils::Multiply(*R_, false, *AP, false, Ac, true, doOptimizedStorage); + Ac_ = MUtils::Op2NonConstTpetraCrs(Ac); + + // Setup Belos for two-level correction + BelosList_ = rcp( new Teuchos::ParameterList("GMRES") ); + BelosList_ -> set("Maximum Iterations", iters_ ); + BelosList_ -> set("Convergence Tolerance", tol_ ); + BelosLP_ = rcp( new Belos::LinearProblem ); + BelosLP_ -> setOperator ( Ac_ ); + BelosSM_ = rcp( new Belos::BlockGmresSolMgr(BelosLP_, BelosList_) );*/ + } + + //! Destructor. + virtual ~ShiftedLaplacianOperator() {} + + //@} + + //! Returns the Tpetra::Map object associated with the domain of this operator. + Teuchos::RCP > getDomainMap() const; + + //! Returns the Tpetra::Map object associated with the range of this operator. + Teuchos::RCP > getRangeMap() const; + + //! Returns in Y the result of a Tpetra::Operator applied to a Tpetra::MultiVector X. + /*! + \param[in] X - Tpetra::MultiVector of dimension NumVectors to multiply with matrix. + \param[out] Y -Tpetra::MultiVector of dimension NumVectors containing result. + */ - template ::scalar_type, - class LocalOrdinal = typename Tpetra::Operator::local_ordinal_type, - class GlobalOrdinal = typename Tpetra::Operator::global_ordinal_type, - class Node = typename Tpetra::Operator::node_type> - class ShiftedLaplacianOperator - : public Tpetra::Operator - { - - typedef Xpetra::Matrix Matrix; - typedef Tpetra::CrsMatrix CrsMatrix; - typedef Tpetra::MultiVector MV; - typedef Tpetra::Operator OP; - typedef MueLu::Utilities MUtils; - public: - - //! @name Constructor/Destructor - //@{ - - //! Constructor - ShiftedLaplacianOperator(const RCP > & H) : Hierarchy_(H), option_(0) { } - - //! Auxiliary Constructor - ShiftedLaplacianOperator(const RCP > & H, - const RCP A, int cycles, int iters, int option, double tol) : Hierarchy_(H), A_(A), cycles_(cycles), iters_(iters), option_(option), tol_(tol) - { - - // setup 2-level correction - /*RCP< MueLu::Level > Level1 = H -> GetLevel(1); - R_ = Level1 -> Get< RCP >("R"); - P_ = Level1 -> Get< RCP >("P"); - //RCP AP = Level1 -> Get< RCP >("AP graph"); - RCP AP; - AP = MUtils::Multiply(*A_, false, *P_, false, AP); - // Optimization storage option. If matrix is not changing later, allow this. - bool doOptimizedStorage = true; - // Reuse coarse matrix memory if available (multiple solve) - //RCP Ac = Level1 -> Get< RCP >("RAP graph"); - RCP Ac; - Ac = MUtils::Multiply(*R_, false, *AP, false, Ac, true, doOptimizedStorage); - Ac_ = MUtils::Op2NonConstTpetraCrs(Ac); - - // Setup Belos for two-level correction - BelosList_ = rcp( new Teuchos::ParameterList("GMRES") ); - BelosList_ -> set("Maximum Iterations", iters_ ); - BelosList_ -> set("Convergence Tolerance", tol_ ); - BelosLP_ = rcp( new Belos::LinearProblem ); - BelosLP_ -> setOperator ( Ac_ ); - BelosSM_ = rcp( new Belos::BlockGmresSolMgr(BelosLP_, BelosList_) );*/ - - } - - - - //! Destructor. - virtual ~ShiftedLaplacianOperator() { } - - //@} - - //! Returns the Tpetra::Map object associated with the domain of this operator. - Teuchos::RCP > getDomainMap() const; - - //! Returns the Tpetra::Map object associated with the range of this operator. - Teuchos::RCP > getRangeMap() const; - - //! Returns in Y the result of a Tpetra::Operator applied to a Tpetra::MultiVector X. - /*! - \param[in] X - Tpetra::MultiVector of dimension NumVectors to multiply with matrix. - \param[out] Y -Tpetra::MultiVector of dimension NumVectors containing result. - - */ - void apply(const Tpetra::MultiVector& X, - Tpetra::MultiVector& Y, - Teuchos::ETransp mode = Teuchos::NO_TRANS, - Scalar alpha = Teuchos::ScalarTraits::one(), - Scalar beta = Teuchos::ScalarTraits::one()) const; - - //! Indicates whether this operator supports applying the adjoint operator. - bool hasTransposeApply() const; - - private: - - RCP > Hierarchy_; - RCP< Xpetra::Matrix > R_, P_, A_; - RCP< Tpetra::CrsMatrix > Ac_; - RCP< Teuchos::ParameterList > BelosList_; - - //RCP< Belos::LinearProblem > BelosLP_; - //RCP< Belos::SolverManager > BelosSM_; - - // cycles -> number of V-cycles - // iters -> number of GMRES iterations per correction - // option -> 0 if no correction is desired - int cycles_, iters_, option_; - double tol_; - - }; - -} // namespace - -#endif // MUELU_SHIFTEDLAPLACIANOPERATOR_DECL_HPP + void apply(const Tpetra::MultiVector& X, + Tpetra::MultiVector& Y, + Teuchos::ETransp mode = Teuchos::NO_TRANS, + Scalar alpha = Teuchos::ScalarTraits::one(), + Scalar beta = Teuchos::ScalarTraits::one()) const; + + //! Indicates whether this operator supports applying the adjoint operator. + bool hasTransposeApply() const; + + private: + RCP > Hierarchy_; + RCP > R_, P_, A_; + RCP > Ac_; + RCP BelosList_; + + // RCP< Belos::LinearProblem > BelosLP_; + // RCP< Belos::SolverManager > BelosSM_; + + // cycles -> number of V-cycles + // iters -> number of GMRES iterations per correction + // option -> 0 if no correction is desired + int cycles_, iters_, option_; + double tol_; +}; + +} // namespace MueLu + +#endif // MUELU_SHIFTEDLAPLACIANOPERATOR_DECL_HPP diff --git a/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacianOperator_def.hpp b/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacianOperator_def.hpp index 69512d6e8f45..fce9b5f4ec8b 100644 --- a/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacianOperator_def.hpp +++ b/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacianOperator_def.hpp @@ -59,78 +59,74 @@ #include "MueLu_Hierarchy.hpp" #include "MueLu_Utilities.hpp" - namespace MueLu { // ------------- getDomainMap ----------------------- template -Teuchos::RCP > -ShiftedLaplacianOperator:: -getDomainMap () const -{ +Teuchos::RCP > +ShiftedLaplacianOperator:: + getDomainMap() const { typedef Xpetra::Matrix XMatrix; - RCP L0 = Hierarchy_->GetLevel (0); - RCP A = L0->Get > ("A"); + RCP L0 = Hierarchy_->GetLevel(0); + RCP A = L0->Get >("A"); RCP > tpbA = - Teuchos::rcp_dynamic_cast >(A); + Teuchos::rcp_dynamic_cast >(A); if (tpbA != Teuchos::null) { - return Xpetra::toTpetraNonZero (tpbA->getDomainMap ()); + return Xpetra::toTpetraNonZero(tpbA->getDomainMap()); } RCP > tpA = - Utilities::Op2NonConstTpetraCrs (A); - return tpA->getDomainMap (); + Utilities::Op2NonConstTpetraCrs(A); + return tpA->getDomainMap(); } // ------------- getRangeMap ----------------------- template -Teuchos::RCP > -ShiftedLaplacianOperator:: -getRangeMap () const -{ +Teuchos::RCP > +ShiftedLaplacianOperator:: + getRangeMap() const { typedef Xpetra::Matrix XMatrix; - RCP L0 = Hierarchy_->GetLevel(0); - RCP A = L0->Get< RCP >("A"); + RCP L0 = Hierarchy_->GetLevel(0); + RCP A = L0->Get >("A"); RCP > tpbA = - Teuchos::rcp_dynamic_cast >(A); - if(tpbA != Teuchos::null) + Teuchos::rcp_dynamic_cast >(A); + if (tpbA != Teuchos::null) return Xpetra::toTpetraNonZero(tpbA->getRangeMap()); - RCP< Tpetra::CrsMatrix > tpA = - Utilities::Op2NonConstTpetraCrs(A); + RCP > tpA = + Utilities::Op2NonConstTpetraCrs(A); return tpA->getRangeMap(); } // ------------- apply ----------------------- template -void ShiftedLaplacianOperator::apply(const Tpetra::MultiVector& X, - Tpetra::MultiVector& Y, - Teuchos::ETransp /* mode */, Scalar /* alpha */, Scalar /* beta */) const { - - typedef Tpetra::MultiVector TMV; - typedef Xpetra::TpetraMultiVector XTMV; +void ShiftedLaplacianOperator::apply(const Tpetra::MultiVector& X, + Tpetra::MultiVector& Y, + Teuchos::ETransp /* mode */, Scalar /* alpha */, Scalar /* beta */) const { + typedef Tpetra::MultiVector TMV; + typedef Xpetra::TpetraMultiVector XTMV; // typedef Xpetra::MultiVector XMV; // unused - TMV & temp_x = const_cast(X); + TMV& temp_x = const_cast(X); const XTMV tX(rcpFromRef(temp_x)); - XTMV tY(rcpFromRef(Y)); + XTMV tY(rcpFromRef(Y)); try { tY.putScalar(0.0); Hierarchy_->Iterate(tX, tY, cycles_, true); } - catch(std::exception& e) { - //FIXME add message and rethrow + catch (std::exception& e) { + // FIXME add message and rethrow std::cerr << "Caught an exception in MueLu::ShiftedLaplacianOperator::ApplyInverse():" << std::endl - << e.what() << std::endl; + << e.what() << std::endl; } // update solution with 2-grid error correction @@ -160,15 +156,14 @@ void ShiftedLaplacianOperator::apply(con std::cerr << "Caught an exception in MueLu::ShiftedLaplacianOperator::ApplyInverse():" << std::endl << e.what() << std::endl; }*/ - } // ------------- apply ----------------------- template -bool ShiftedLaplacianOperator::hasTransposeApply() const { +bool ShiftedLaplacianOperator::hasTransposeApply() const { return false; } -} // namespace +} // namespace MueLu -#endif //ifdef MUELU_SHIFTEDLAPLACIANOPERATOR_DEF_HPP +#endif // ifdef MUELU_SHIFTEDLAPLACIANOPERATOR_DEF_HPP diff --git a/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacian_decl.hpp b/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacian_decl.hpp index 0a4edf9c5a1b..c35bd00f661d 100644 --- a/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacian_decl.hpp +++ b/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacian_decl.hpp @@ -91,226 +91,221 @@ namespace MueLu { - /*! - @brief Shifted Laplacian Helmholtz solver - - This class provides a black box solver for indefinite Helmholtz problems. - An AMG-Shifted Laplacian is used as a preconditioner for Krylov iterative - solvers in Belos. - - @ingroup MueLuAdapters - */ - template - class ShiftedLaplacian : public BaseClass { +/*! + @brief Shifted Laplacian Helmholtz solver + + This class provides a black box solver for indefinite Helmholtz problems. + An AMG-Shifted Laplacian is used as a preconditioner for Krylov iterative + solvers in Belos. + + @ingroup MueLuAdapters +*/ +template +class ShiftedLaplacian : public BaseClass { #undef MUELU_SHIFTEDLAPLACIAN_SHORT #include "MueLu_UseShortNames.hpp" - typedef Tpetra::Vector TVEC; - typedef Tpetra::MultiVector TMV; - typedef Tpetra::Operator OP; + typedef Tpetra::Vector TVEC; + typedef Tpetra::MultiVector TMV; + typedef Tpetra::Operator OP; #ifdef HAVE_MUELU_TPETRA_INST_INT_INT - typedef Belos::LinearProblem LinearProblem; - typedef Belos::SolverManager SolverManager; - typedef Belos::SolverFactory SolverFactory; + typedef Belos::LinearProblem LinearProblem; + typedef Belos::SolverManager SolverManager; + typedef Belos::SolverFactory SolverFactory; #endif - public: - + public: /* FIXME 26-June-2015 JJH: This contructor is setting numerous defaults. However, they don't match the defaults FIXME int the method setParameters(). There also isn't any parameter validation that I can see. */ - //! Constructors - ShiftedLaplacian(): - numPDEs_(1), - Smoother_("schwarz"), - Aggregation_("uncoupled"), - Nullspace_("constant"), - numLevels_(5), - coarseGridSize_(100), - omega_(2.0*M_PI), - iters_(500), - blksize_(1), - tol_(1.0e-4), - nsweeps_(5), - ncycles_(1), - cycles_(8), - subiters_(10), - option_(1), - nproblems_(0), - solverType_(1), - restart_size_(100), - recycle_size_(25), - smoother_sweeps_(4), - smoother_damping_((SC)1.0), - krylov_type_(1), - krylov_iterations_(5), - krylov_preconditioner_(1), - ilu_leveloffill_(5.0), - ilu_abs_thresh_(0.0), - ilu_rel_thresh_(1.0), - ilu_diagpivotthresh_(0.1), - ilu_drop_tol_(0.01), - ilu_fill_tol_(0.01), - ilu_relax_val_(1.0), - ilu_rowperm_("LargeDiag"), - ilu_colperm_("COLAMD"), - ilu_drop_rule_("DROP_BASIC"), - ilu_normtype_("INF_NORM"), - ilu_milutype_("SILU"), - schwarz_overlap_(0), - schwarz_usereorder_(true), - schwarz_combinemode_(Tpetra::ADD), - schwarz_ordermethod_("rcm"), - GridTransfersExist_(false), - isSymmetric_(true) - { } - - // Destructor - virtual ~ShiftedLaplacian(); - - // Parameters - void setParameters(Teuchos::RCP< Teuchos::ParameterList > paramList); - - // Set matrices - void setProblemMatrix(RCP& A); - void setProblemMatrix(RCP< Tpetra::CrsMatrix >& TpetraA); - void setPreconditioningMatrix(RCP& P); - void setPreconditioningMatrix(RCP< Tpetra::CrsMatrix >& TpetraP); - void setstiff(RCP& K); - void setstiff(RCP< Tpetra::CrsMatrix >& TpetraK); - void setmass(RCP& M); - void setmass(RCP< Tpetra::CrsMatrix >& TpetraM); - void setcoords(RCP& Coords); - void setNullSpace(RCP NullSpace); - void setLevelShifts(std::vector levelshifts); - - // initialize: set parameters and factories, construct - // prolongation and restriction matrices - void initialize(); - // setupFastRAP: setup hierarchy with - // prolongators of the stiffness matrix - // constant complex shifts - void setupFastRAP(); - // setupSlowRAP: setup hierarchy with - // prolongators of the stiffness matrix - // variable complex shifts - void setupSlowRAP(); - // setupNormalRAP: setup hierarchy with - // prolongators of the preconditioning matrix - void setupNormalRAP(); - // setupSolver: initialize Belos solver - void setupSolver(); - // resetLinearProblem: for multiple frequencies; - // reset the Belos operator if the frequency changes - void resetLinearProblem(); - - - // Solve phase - int solve(const RCP B, RCP& X); - void multigrid_apply(const RCP B, - RCP& X); - void multigrid_apply(const RCP > B, - RCP >& X); - int GetIterations(); - typename Teuchos::ScalarTraits::magnitudeType GetResidual(); - - RCP Manager_; - - private: - - // Problem options - // numPDEs_ -> number of DOFs at each node - int numPDEs_; - - // Multigrid options - // numLevels_ -> number of Multigrid levels - // coarseGridSize_ -> size of coarsest grid (if current level has less DOFs, stop coarsening) - std::string Smoother_, Aggregation_, Nullspace_; - int numLevels_, coarseGridSize_; - - // Shifted Laplacian/Helmholtz parameters - double omega_; - std::vector levelshifts_; - - // Krylov solver inputs - // iters -> max number of iterations - // tol -> residual tolerance - int iters_, blksize_; - double tol_; - int nsweeps_, ncycles_; - int cycles_, subiters_, option_, nproblems_, solverType_; - int restart_size_, recycle_size_; - - // Smoother parameters - int smoother_sweeps_; - Scalar smoother_damping_; - int krylov_type_; - int krylov_iterations_; - int krylov_preconditioner_; - double ilu_leveloffill_, ilu_abs_thresh_, ilu_rel_thresh_, ilu_diagpivotthresh_; - double ilu_drop_tol_, ilu_fill_tol_, ilu_relax_val_; - std::string ilu_rowperm_, ilu_colperm_, ilu_drop_rule_, ilu_normtype_, ilu_milutype_; - int schwarz_overlap_; - bool schwarz_usereorder_; - Tpetra::CombineMode schwarz_combinemode_; - std::string schwarz_ordermethod_; - - // flags for setup - bool GridTransfersExist_; - bool isSymmetric_; - - // Xpetra matrices - // K_ -> stiffness matrix - // M_ -> mass matrix - // A_ -> Problem matrix - // P_ -> Preconditioning matrix - RCP K_, M_, A_, P_; - RCP Coords_, NullSpace_; - - // Multigrid Hierarchy - RCP Hierarchy_; - - // Factories and prototypes - RCP TentPfact_; - RCP Pfact_; - RCP PgPfact_; - RCP TransPfact_; - RCP Rfact_; - RCP Acfact_; - RCP Acshift_; - RCP Amalgfact_; - RCP Dropfact_; - RCP UCaggfact_; - RCP CoarseMapfact_; - RCP smooProto_, coarsestSmooProto_; - RCP smooFact_, coarsestSmooFact_; - Teuchos::ParameterList coarsestSmooList_; - std::string precType_; - Teuchos::ParameterList precList_; - - // Operator and Preconditioner - RCP< MueLu::ShiftedLaplacianOperator > MueLuOp_; - RCP< Tpetra::CrsMatrix > TpetraA_; + //! Constructors + ShiftedLaplacian() + : numPDEs_(1) + , Smoother_("schwarz") + , Aggregation_("uncoupled") + , Nullspace_("constant") + , numLevels_(5) + , coarseGridSize_(100) + , omega_(2.0 * M_PI) + , iters_(500) + , blksize_(1) + , tol_(1.0e-4) + , nsweeps_(5) + , ncycles_(1) + , cycles_(8) + , subiters_(10) + , option_(1) + , nproblems_(0) + , solverType_(1) + , restart_size_(100) + , recycle_size_(25) + , smoother_sweeps_(4) + , smoother_damping_((SC)1.0) + , krylov_type_(1) + , krylov_iterations_(5) + , krylov_preconditioner_(1) + , ilu_leveloffill_(5.0) + , ilu_abs_thresh_(0.0) + , ilu_rel_thresh_(1.0) + , ilu_diagpivotthresh_(0.1) + , ilu_drop_tol_(0.01) + , ilu_fill_tol_(0.01) + , ilu_relax_val_(1.0) + , ilu_rowperm_("LargeDiag") + , ilu_colperm_("COLAMD") + , ilu_drop_rule_("DROP_BASIC") + , ilu_normtype_("INF_NORM") + , ilu_milutype_("SILU") + , schwarz_overlap_(0) + , schwarz_usereorder_(true) + , schwarz_combinemode_(Tpetra::ADD) + , schwarz_ordermethod_("rcm") + , GridTransfersExist_(false) + , isSymmetric_(true) {} + + // Destructor + virtual ~ShiftedLaplacian(); + + // Parameters + void setParameters(Teuchos::RCP paramList); + + // Set matrices + void setProblemMatrix(RCP& A); + void setProblemMatrix(RCP >& TpetraA); + void setPreconditioningMatrix(RCP& P); + void setPreconditioningMatrix(RCP >& TpetraP); + void setstiff(RCP& K); + void setstiff(RCP >& TpetraK); + void setmass(RCP& M); + void setmass(RCP >& TpetraM); + void setcoords(RCP& Coords); + void setNullSpace(RCP NullSpace); + void setLevelShifts(std::vector levelshifts); + + // initialize: set parameters and factories, construct + // prolongation and restriction matrices + void initialize(); + // setupFastRAP: setup hierarchy with + // prolongators of the stiffness matrix + // constant complex shifts + void setupFastRAP(); + // setupSlowRAP: setup hierarchy with + // prolongators of the stiffness matrix + // variable complex shifts + void setupSlowRAP(); + // setupNormalRAP: setup hierarchy with + // prolongators of the preconditioning matrix + void setupNormalRAP(); + // setupSolver: initialize Belos solver + void setupSolver(); + // resetLinearProblem: for multiple frequencies; + // reset the Belos operator if the frequency changes + void resetLinearProblem(); + + // Solve phase + int solve(const RCP B, RCP& X); + void multigrid_apply(const RCP B, + RCP& X); + void multigrid_apply(const RCP > B, + RCP >& X); + int GetIterations(); + typename Teuchos::ScalarTraits::magnitudeType GetResidual(); + + RCP Manager_; + + private: + // Problem options + // numPDEs_ -> number of DOFs at each node + int numPDEs_; + + // Multigrid options + // numLevels_ -> number of Multigrid levels + // coarseGridSize_ -> size of coarsest grid (if current level has less DOFs, stop coarsening) + std::string Smoother_, Aggregation_, Nullspace_; + int numLevels_, coarseGridSize_; + + // Shifted Laplacian/Helmholtz parameters + double omega_; + std::vector levelshifts_; + + // Krylov solver inputs + // iters -> max number of iterations + // tol -> residual tolerance + int iters_, blksize_; + double tol_; + int nsweeps_, ncycles_; + int cycles_, subiters_, option_, nproblems_, solverType_; + int restart_size_, recycle_size_; + + // Smoother parameters + int smoother_sweeps_; + Scalar smoother_damping_; + int krylov_type_; + int krylov_iterations_; + int krylov_preconditioner_; + double ilu_leveloffill_, ilu_abs_thresh_, ilu_rel_thresh_, ilu_diagpivotthresh_; + double ilu_drop_tol_, ilu_fill_tol_, ilu_relax_val_; + std::string ilu_rowperm_, ilu_colperm_, ilu_drop_rule_, ilu_normtype_, ilu_milutype_; + int schwarz_overlap_; + bool schwarz_usereorder_; + Tpetra::CombineMode schwarz_combinemode_; + std::string schwarz_ordermethod_; + + // flags for setup + bool GridTransfersExist_; + bool isSymmetric_; + + // Xpetra matrices + // K_ -> stiffness matrix + // M_ -> mass matrix + // A_ -> Problem matrix + // P_ -> Preconditioning matrix + RCP K_, M_, A_, P_; + RCP Coords_, NullSpace_; + + // Multigrid Hierarchy + RCP Hierarchy_; + + // Factories and prototypes + RCP TentPfact_; + RCP Pfact_; + RCP PgPfact_; + RCP TransPfact_; + RCP Rfact_; + RCP Acfact_; + RCP Acshift_; + RCP Amalgfact_; + RCP Dropfact_; + RCP UCaggfact_; + RCP CoarseMapfact_; + RCP smooProto_, coarsestSmooProto_; + RCP smooFact_, coarsestSmooFact_; + Teuchos::ParameterList coarsestSmooList_; + std::string precType_; + Teuchos::ParameterList precList_; + + // Operator and Preconditioner + RCP > MueLuOp_; + RCP > TpetraA_; #ifdef HAVE_MUELU_TPETRA_INST_INT_INT - // Belos Linear Problem and Solver - RCP LinearProblem_; - RCP SolverManager_; - RCP SolverFactory_; - RCP BelosList_; + // Belos Linear Problem and Solver + RCP LinearProblem_; + RCP SolverManager_; + RCP SolverFactory_; + RCP BelosList_; #endif +}; - }; - -} +} // namespace MueLu #define MUELU_SHIFTEDLAPLACIAN_SHORT -#endif //if defined(HAVE_MUELU_IFPACK2) and defined(HAVE_MUELU_TPETRA) +#endif // if defined(HAVE_MUELU_IFPACK2) and defined(HAVE_MUELU_TPETRA) -#endif // MUELU_SHIFTEDLAPLACIAN_DECL_HPP +#endif // MUELU_SHIFTEDLAPLACIAN_DECL_HPP diff --git a/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacian_def.hpp b/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacian_def.hpp index 231849305e7a..79751844e0d6 100644 --- a/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacian_def.hpp +++ b/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacian_def.hpp @@ -75,278 +75,255 @@ namespace MueLu { // Destructor -template -ShiftedLaplacian::~ShiftedLaplacian() {} +template +ShiftedLaplacian::~ShiftedLaplacian() {} // Input -template -void ShiftedLaplacian::setParameters(Teuchos::RCP< Teuchos::ParameterList > paramList) { - +template +void ShiftedLaplacian::setParameters(Teuchos::RCP paramList) { // Parameters - coarseGridSize_ = paramList->get("MueLu: coarse size", 1000); - numLevels_ = paramList->get("MueLu: levels", 3); - int stype = paramList->get("MueLu: smoother", 8); - if(stype==1) { Smoother_="jacobi"; } - else if(stype==2) { Smoother_="gauss-seidel"; } - else if(stype==3) { Smoother_="symmetric gauss-seidel"; } - else if(stype==4) { Smoother_="chebyshev"; } - else if(stype==5) { Smoother_="krylov"; } - else if(stype==6) { Smoother_="ilut"; } - else if(stype==7) { Smoother_="riluk"; } - else if(stype==8) { Smoother_="schwarz"; } - else if(stype==9) { Smoother_="superilu"; } - else if(stype==10) { Smoother_="superlu"; } - else { Smoother_="schwarz"; } - smoother_sweeps_ = paramList->get("MueLu: sweeps", 5); - smoother_damping_ = paramList->get("MueLu: relax val", 1.0); - ncycles_ = paramList->get("MueLu: cycles", 1); - iters_ = paramList->get("MueLu: iterations", 500); - solverType_ = paramList->get("MueLu: solver type", 1); + coarseGridSize_ = paramList->get("MueLu: coarse size", 1000); + numLevels_ = paramList->get("MueLu: levels", 3); + int stype = paramList->get("MueLu: smoother", 8); + if (stype == 1) { + Smoother_ = "jacobi"; + } else if (stype == 2) { + Smoother_ = "gauss-seidel"; + } else if (stype == 3) { + Smoother_ = "symmetric gauss-seidel"; + } else if (stype == 4) { + Smoother_ = "chebyshev"; + } else if (stype == 5) { + Smoother_ = "krylov"; + } else if (stype == 6) { + Smoother_ = "ilut"; + } else if (stype == 7) { + Smoother_ = "riluk"; + } else if (stype == 8) { + Smoother_ = "schwarz"; + } else if (stype == 9) { + Smoother_ = "superilu"; + } else if (stype == 10) { + Smoother_ = "superlu"; + } else { + Smoother_ = "schwarz"; + } + smoother_sweeps_ = paramList->get("MueLu: sweeps", 5); + smoother_damping_ = paramList->get("MueLu: relax val", 1.0); + ncycles_ = paramList->get("MueLu: cycles", 1); + iters_ = paramList->get("MueLu: iterations", 500); + solverType_ = paramList->get("MueLu: solver type", 1); restart_size_ = paramList->get("MueLu: restart size", 100); - recycle_size_ = paramList->get("MueLu: recycle size", 25); - isSymmetric_ = paramList->get("MueLu: symmetric", true); - ilu_leveloffill_ = paramList->get("MueLu: level-of-fill", 5); - ilu_abs_thresh_ = paramList->get("MueLu: abs thresh", 0.0); - ilu_rel_thresh_ = paramList->get("MueLu: rel thresh", 1.0); - ilu_diagpivotthresh_ = paramList->get("MueLu: piv thresh", 0.1); - ilu_drop_tol_ = paramList->get("MueLu: drop tol", 0.01); - ilu_fill_tol_ = paramList->get("MueLu: fill tol", 0.01); - schwarz_overlap_ = paramList->get("MueLu: overlap", 0); + recycle_size_ = paramList->get("MueLu: recycle size", 25); + isSymmetric_ = paramList->get("MueLu: symmetric", true); + ilu_leveloffill_ = paramList->get("MueLu: level-of-fill", 5); + ilu_abs_thresh_ = paramList->get("MueLu: abs thresh", 0.0); + ilu_rel_thresh_ = paramList->get("MueLu: rel thresh", 1.0); + ilu_diagpivotthresh_ = paramList->get("MueLu: piv thresh", 0.1); + ilu_drop_tol_ = paramList->get("MueLu: drop tol", 0.01); + ilu_fill_tol_ = paramList->get("MueLu: fill tol", 0.01); + schwarz_overlap_ = paramList->get("MueLu: overlap", 0); schwarz_usereorder_ = paramList->get("MueLu: use reorder", true); - int combinemode = paramList->get("MueLu: combine mode", 1); - if(combinemode==0) { schwarz_combinemode_ = Tpetra::ZERO; } - else { schwarz_combinemode_ = Tpetra::ADD; } - tol_ = paramList->get("MueLu: tolerance", 0.001); - + int combinemode = paramList->get("MueLu: combine mode", 1); + if (combinemode == 0) { + schwarz_combinemode_ = Tpetra::ZERO; + } else { + schwarz_combinemode_ = Tpetra::ADD; + } + tol_ = paramList->get("MueLu: tolerance", 0.001); } -template -void ShiftedLaplacian::setProblemMatrix(RCP& A) { - - A_=A; - if(A_!=Teuchos::null) +template +void ShiftedLaplacian::setProblemMatrix(RCP& A) { + A_ = A; + if (A_ != Teuchos::null) TpetraA_ = Utilities::Op2NonConstTpetraCrs(A_); #ifdef HAVE_MUELU_TPETRA_INST_INT_INT - if(LinearProblem_!=Teuchos::null) - LinearProblem_ -> setOperator ( TpetraA_ ); + if (LinearProblem_ != Teuchos::null) + LinearProblem_->setOperator(TpetraA_); #else TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "ShiftedLaplacian only available with Tpetra and GO=int enabled."); #endif - } -template -void ShiftedLaplacian::setProblemMatrix(RCP< Tpetra::CrsMatrix >& TpetraA) { - - TpetraA_=TpetraA; +template +void ShiftedLaplacian::setProblemMatrix(RCP >& TpetraA) { + TpetraA_ = TpetraA; #ifdef HAVE_MUELU_TPETRA_INST_INT_INT - if(LinearProblem_!=Teuchos::null) - LinearProblem_ -> setOperator ( TpetraA_ ); + if (LinearProblem_ != Teuchos::null) + LinearProblem_->setOperator(TpetraA_); #endif - } -template -void ShiftedLaplacian::setPreconditioningMatrix(RCP& P) { - - P_=P; - GridTransfersExist_=false; - +template +void ShiftedLaplacian::setPreconditioningMatrix(RCP& P) { + P_ = P; + GridTransfersExist_ = false; } -template -void ShiftedLaplacian::setPreconditioningMatrix(RCP< Tpetra::CrsMatrix >& TpetraP) { - - RCP< Xpetra::CrsMatrix > Atmp - = rcp( new Xpetra::TpetraCrsMatrix(TpetraP) ); - P_= rcp( new Xpetra::CrsMatrixWrap(Atmp) ); - GridTransfersExist_=false; - +template +void ShiftedLaplacian::setPreconditioningMatrix(RCP >& TpetraP) { + RCP > Atmp = rcp(new Xpetra::TpetraCrsMatrix(TpetraP)); + P_ = rcp(new Xpetra::CrsMatrixWrap(Atmp)); + GridTransfersExist_ = false; } -template -void ShiftedLaplacian::setstiff(RCP& K) { - - K_=K; - +template +void ShiftedLaplacian::setstiff(RCP& K) { + K_ = K; } -template -void ShiftedLaplacian::setstiff(RCP< Tpetra::CrsMatrix >& TpetraK) { - - RCP< Xpetra::CrsMatrix > Atmp - = rcp( new Xpetra::TpetraCrsMatrix(TpetraK) ); - K_= rcp( new Xpetra::CrsMatrixWrap(Atmp) ); - +template +void ShiftedLaplacian::setstiff(RCP >& TpetraK) { + RCP > Atmp = rcp(new Xpetra::TpetraCrsMatrix(TpetraK)); + K_ = rcp(new Xpetra::CrsMatrixWrap(Atmp)); } -template -void ShiftedLaplacian::setmass(RCP& M) { - - M_=M; - +template +void ShiftedLaplacian::setmass(RCP& M) { + M_ = M; } -template -void ShiftedLaplacian::setmass(RCP< Tpetra::CrsMatrix >& TpetraM) { - - RCP< Xpetra::CrsMatrix > Atmp - = rcp( new Xpetra::TpetraCrsMatrix(TpetraM) ); - M_= rcp( new Xpetra::CrsMatrixWrap(Atmp) ); - +template +void ShiftedLaplacian::setmass(RCP >& TpetraM) { + RCP > Atmp = rcp(new Xpetra::TpetraCrsMatrix(TpetraM)); + M_ = rcp(new Xpetra::CrsMatrixWrap(Atmp)); } -template -void ShiftedLaplacian::setcoords(RCP& Coords) { - - Coords_=Coords; - +template +void ShiftedLaplacian::setcoords(RCP& Coords) { + Coords_ = Coords; } -template -void ShiftedLaplacian::setNullSpace(RCP NullSpace) { - - NullSpace_=NullSpace; - +template +void ShiftedLaplacian::setNullSpace(RCP NullSpace) { + NullSpace_ = NullSpace; } -template -void ShiftedLaplacian::setLevelShifts(std::vector levelshifts) { - - levelshifts_=levelshifts; - numLevels_=levelshifts_.size(); - +template +void ShiftedLaplacian::setLevelShifts(std::vector levelshifts) { + levelshifts_ = levelshifts; + numLevels_ = levelshifts_.size(); } // initialize -template -void ShiftedLaplacian::initialize() { - - TentPfact_ = rcp( new TentativePFactory ); - Pfact_ = rcp( new SaPFactory ); - PgPfact_ = rcp( new PgPFactory ); - TransPfact_ = rcp( new TransPFactory ); - Rfact_ = rcp( new GenericRFactory ); - Acfact_ = rcp( new RAPFactory ); - Acshift_ = rcp( new RAPShiftFactory ); - Amalgfact_ = rcp( new AmalgamationFactory ); - Dropfact_ = rcp( new CoalesceDropFactory ); - UCaggfact_ = rcp( new UncoupledAggregationFactory ); - CoarseMapfact_ = rcp( new CoarseMapFactory ); - Manager_ = rcp( new FactoryManager ); - Manager_ -> SetFactory("UnAmalgamationInfo", Amalgfact_); +template +void ShiftedLaplacian::initialize() { + TentPfact_ = rcp(new TentativePFactory); + Pfact_ = rcp(new SaPFactory); + PgPfact_ = rcp(new PgPFactory); + TransPfact_ = rcp(new TransPFactory); + Rfact_ = rcp(new GenericRFactory); + Acfact_ = rcp(new RAPFactory); + Acshift_ = rcp(new RAPShiftFactory); + Amalgfact_ = rcp(new AmalgamationFactory); + Dropfact_ = rcp(new CoalesceDropFactory); + UCaggfact_ = rcp(new UncoupledAggregationFactory); + CoarseMapfact_ = rcp(new CoarseMapFactory); + Manager_ = rcp(new FactoryManager); + Manager_->SetFactory("UnAmalgamationInfo", Amalgfact_); Teuchos::ParameterList params; - params.set("lightweight wrap",true); - params.set("aggregation: drop scheme","classical"); - Dropfact_ -> SetParameterList(params); - Manager_ -> SetFactory("Graph", Dropfact_); - Manager_ -> SetFactory("Aggregates", UCaggfact_ ); - Manager_ -> SetFactory("CoarseMap", CoarseMapfact_); - Manager_ -> SetFactory("Ptent", TentPfact_); - if(isSymmetric_==true) { - Manager_ -> SetFactory("P", Pfact_); - Manager_ -> SetFactory("R", TransPfact_); - } - else { - Manager_ -> SetFactory("P", PgPfact_); - Manager_ -> SetFactory("R", Rfact_); + params.set("lightweight wrap", true); + params.set("aggregation: drop scheme", "classical"); + Dropfact_->SetParameterList(params); + Manager_->SetFactory("Graph", Dropfact_); + Manager_->SetFactory("Aggregates", UCaggfact_); + Manager_->SetFactory("CoarseMap", CoarseMapfact_); + Manager_->SetFactory("Ptent", TentPfact_); + if (isSymmetric_ == true) { + Manager_->SetFactory("P", Pfact_); + Manager_->SetFactory("R", TransPfact_); + } else { + Manager_->SetFactory("P", PgPfact_); + Manager_->SetFactory("R", Rfact_); solverType_ = 10; } // choose smoother - if(Smoother_=="jacobi") { + if (Smoother_ == "jacobi") { precType_ = "RELAXATION"; precList_.set("relaxation: type", "Jacobi"); precList_.set("relaxation: sweeps", smoother_sweeps_); precList_.set("relaxation: damping factor", smoother_damping_); - } - else if(Smoother_=="gauss-seidel") { + } else if (Smoother_ == "gauss-seidel") { precType_ = "RELAXATION"; precList_.set("relaxation: type", "Gauss-Seidel"); precList_.set("relaxation: sweeps", smoother_sweeps_); precList_.set("relaxation: damping factor", smoother_damping_); - } - else if(Smoother_=="symmetric gauss-seidel") { + } else if (Smoother_ == "symmetric gauss-seidel") { precType_ = "RELAXATION"; precList_.set("relaxation: type", "Symmetric Gauss-Seidel"); precList_.set("relaxation: sweeps", smoother_sweeps_); precList_.set("relaxation: damping factor", smoother_damping_); - } - else if(Smoother_=="chebyshev") { + } else if (Smoother_ == "chebyshev") { precType_ = "CHEBYSHEV"; - } - else if(Smoother_=="krylov") { + } else if (Smoother_ == "krylov") { precType_ = "KRYLOV"; precList_.set("krylov: iteration type", krylov_type_); precList_.set("krylov: number of iterations", krylov_iterations_); - precList_.set("krylov: residual tolerance",1.0e-8); - precList_.set("krylov: block size",1); + precList_.set("krylov: residual tolerance", 1.0e-8); + precList_.set("krylov: block size", 1); precList_.set("krylov: preconditioner type", krylov_preconditioner_); - precList_.set("relaxation: sweeps",1); - solverType_=10; - } - else if(Smoother_=="ilut") { + precList_.set("relaxation: sweeps", 1); + solverType_ = 10; + } else if (Smoother_ == "ilut") { precType_ = "ILUT"; precList_.set("fact: ilut level-of-fill", ilu_leveloffill_); precList_.set("fact: absolute threshold", ilu_abs_thresh_); precList_.set("fact: relative threshold", ilu_rel_thresh_); - precList_.set("fact: drop tolerance", ilu_drop_tol_); - precList_.set("fact: relax value", ilu_relax_val_); - } - else if(Smoother_=="riluk") { + precList_.set("fact: drop tolerance", ilu_drop_tol_); + precList_.set("fact: relax value", ilu_relax_val_); + } else if (Smoother_ == "riluk") { precType_ = "RILUK"; precList_.set("fact: iluk level-of-fill", ilu_leveloffill_); precList_.set("fact: absolute threshold", ilu_abs_thresh_); precList_.set("fact: relative threshold", ilu_rel_thresh_); - precList_.set("fact: drop tolerance", ilu_drop_tol_); - precList_.set("fact: relax value", ilu_relax_val_); - } - else if(Smoother_=="schwarz") { + precList_.set("fact: drop tolerance", ilu_drop_tol_); + precList_.set("fact: relax value", ilu_relax_val_); + } else if (Smoother_ == "schwarz") { precType_ = "SCHWARZ"; precList_.set("schwarz: overlap level", schwarz_overlap_); precList_.set("schwarz: combine mode", schwarz_combinemode_); precList_.set("schwarz: use reordering", schwarz_usereorder_); // precList_.set("schwarz: filter singletons", true); // Disabled due to issues w/ Ifpack2/Zoltan2 w.r.t. Issue #560 - CMS 8/26/16 - precList_.set("order_method",schwarz_ordermethod_); - precList_.sublist("schwarz: reordering list").set("order_method",schwarz_ordermethod_); + precList_.set("order_method", schwarz_ordermethod_); + precList_.sublist("schwarz: reordering list").set("order_method", schwarz_ordermethod_); precList_.sublist("schwarz: subdomain solver parameters").set("fact: ilut level-of-fill", ilu_leveloffill_); precList_.sublist("schwarz: subdomain solver parameters").set("fact: absolute threshold", ilu_abs_thresh_); precList_.sublist("schwarz: subdomain solver parameters").set("fact: relative threshold", ilu_rel_thresh_); - precList_.sublist("schwarz: subdomain solver parameters").set("fact: drop tolerance", ilu_drop_tol_); - precList_.sublist("schwarz: subdomain solver parameters").set("fact: relax value", ilu_relax_val_); - } - else if(Smoother_=="superilu") { + precList_.sublist("schwarz: subdomain solver parameters").set("fact: drop tolerance", ilu_drop_tol_); + precList_.sublist("schwarz: subdomain solver parameters").set("fact: relax value", ilu_relax_val_); + } else if (Smoother_ == "superilu") { precType_ = "superlu"; precList_.set("RowPerm", ilu_rowperm_); precList_.set("ColPerm", ilu_colperm_); precList_.set("DiagPivotThresh", ilu_diagpivotthresh_); - precList_.set("ILU_DropRule",ilu_drop_rule_); - precList_.set("ILU_DropTol",ilu_drop_tol_); - precList_.set("ILU_FillFactor",ilu_leveloffill_); - precList_.set("ILU_Norm",ilu_normtype_); - precList_.set("ILU_MILU",ilu_milutype_); - precList_.set("ILU_FillTol",ilu_fill_tol_); - precList_.set("ILU_Flag",true); - } - else if(Smoother_=="superlu") { + precList_.set("ILU_DropRule", ilu_drop_rule_); + precList_.set("ILU_DropTol", ilu_drop_tol_); + precList_.set("ILU_FillFactor", ilu_leveloffill_); + precList_.set("ILU_Norm", ilu_normtype_); + precList_.set("ILU_MILU", ilu_milutype_); + precList_.set("ILU_FillTol", ilu_fill_tol_); + precList_.set("ILU_Flag", true); + } else if (Smoother_ == "superlu") { precType_ = "superlu"; precList_.set("ColPerm", ilu_colperm_); precList_.set("DiagPivotThresh", ilu_diagpivotthresh_); } #ifdef HAVE_MUELU_TPETRA_INST_INT_INT // construct smoother - smooProto_ = rcp( new Ifpack2Smoother(precType_,precList_) ); - smooFact_ = rcp( new SmootherFactory(smooProto_) ); + smooProto_ = rcp(new Ifpack2Smoother(precType_, precList_)); + smooFact_ = rcp(new SmootherFactory(smooProto_)); #if defined(HAVE_MUELU_AMESOS2) and defined(HAVE_AMESOS2_SUPERLU) - coarsestSmooProto_ = rcp( new DirectSolver("Superlu",coarsestSmooList_) ); + coarsestSmooProto_ = rcp(new DirectSolver("Superlu", coarsestSmooList_)); #elif defined(HAVE_MUELU_AMESOS2) and defined(HAVE_AMESOS2_KLU2) - coarsestSmooProto_ = rcp( new DirectSolver("Klu",coarsestSmooList_) ); + coarsestSmooProto_ = rcp(new DirectSolver("Klu", coarsestSmooList_)); #elif defined(HAVE_MUELU_AMESOS2) and defined(HAVE_AMESOS2_SUPERLUDIST) - coarsestSmooProto_ = rcp( new DirectSolver("Superludist",coarsestSmooList_) ); + coarsestSmooProto_ = rcp(new DirectSolver("Superludist", coarsestSmooList_)); #else - coarsestSmooProto_ = rcp( new Ifpack2Smoother(precType_,precList_) ); + coarsestSmooProto_ = rcp(new Ifpack2Smoother(precType_, precList_)); #endif - coarsestSmooFact_ = rcp( new SmootherFactory(coarsestSmooProto_, Teuchos::null) ); + coarsestSmooFact_ = rcp(new SmootherFactory(coarsestSmooProto_, Teuchos::null)); // For setupSlowRAP and setupFastRAP, the prolongation/restriction matrices // are constructed with the stiffness matrix. These matrices are kept for future @@ -354,150 +331,143 @@ void ShiftedLaplacian::initialize() { // useful for multiple frequency problems - when the frequency/preconditioner // changes, you only compute coarse grids (RAPs) and setup level smoothers when // you call Hierarchy->Setup(). - if(K_!=Teuchos::null) { - Manager_ -> SetFactory("Smoother", Teuchos::null); - Manager_ -> SetFactory("CoarseSolver", Teuchos::null); - Hierarchy_ = rcp( new Hierarchy(K_) ); - if(NullSpace_!=Teuchos::null) - Hierarchy_ -> GetLevel(0) -> Set("Nullspace", NullSpace_); - if(isSymmetric_==true) { - Hierarchy_ -> Keep("P", Pfact_.get()); - Hierarchy_ -> Keep("R", TransPfact_.get()); - Hierarchy_ -> SetImplicitTranspose(true); - } - else { - Hierarchy_ -> Keep("P", PgPfact_.get()); - Hierarchy_ -> Keep("R", Rfact_.get()); + if (K_ != Teuchos::null) { + Manager_->SetFactory("Smoother", Teuchos::null); + Manager_->SetFactory("CoarseSolver", Teuchos::null); + Hierarchy_ = rcp(new Hierarchy(K_)); + if (NullSpace_ != Teuchos::null) + Hierarchy_->GetLevel(0)->Set("Nullspace", NullSpace_); + if (isSymmetric_ == true) { + Hierarchy_->Keep("P", Pfact_.get()); + Hierarchy_->Keep("R", TransPfact_.get()); + Hierarchy_->SetImplicitTranspose(true); + } else { + Hierarchy_->Keep("P", PgPfact_.get()); + Hierarchy_->Keep("R", Rfact_.get()); } - Hierarchy_ -> Keep("Ptent", TentPfact_.get()); - Hierarchy_ -> SetMaxCoarseSize( coarseGridSize_ ); - Hierarchy_ -> Setup(*Manager_, 0, numLevels_); - GridTransfersExist_=true; + Hierarchy_->Keep("Ptent", TentPfact_.get()); + Hierarchy_->SetMaxCoarseSize(coarseGridSize_); + Hierarchy_->Setup(*Manager_, 0, numLevels_); + GridTransfersExist_ = true; } // Use preconditioning matrix to setup prolongation/restriction operators else { - Manager_ -> SetFactory("Smoother", smooFact_); - Manager_ -> SetFactory("CoarseSolver", coarsestSmooFact_); - Hierarchy_ = rcp( new Hierarchy(P_) ); - if(NullSpace_!=Teuchos::null) - Hierarchy_ -> GetLevel(0) -> Set("Nullspace", NullSpace_); - if(isSymmetric_==true) - Hierarchy_ -> SetImplicitTranspose(true); - Hierarchy_ -> SetMaxCoarseSize( coarseGridSize_ ); - Hierarchy_ -> Setup(*Manager_, 0, numLevels_); - GridTransfersExist_=true; + Manager_->SetFactory("Smoother", smooFact_); + Manager_->SetFactory("CoarseSolver", coarsestSmooFact_); + Hierarchy_ = rcp(new Hierarchy(P_)); + if (NullSpace_ != Teuchos::null) + Hierarchy_->GetLevel(0)->Set("Nullspace", NullSpace_); + if (isSymmetric_ == true) + Hierarchy_->SetImplicitTranspose(true); + Hierarchy_->SetMaxCoarseSize(coarseGridSize_); + Hierarchy_->Setup(*Manager_, 0, numLevels_); + GridTransfersExist_ = true; } // Belos Linear Problem and Solver Manager - BelosList_ = rcp( new Teuchos::ParameterList("GMRES") ); - BelosList_ -> set("Maximum Iterations",iters_ ); - BelosList_ -> set("Convergence Tolerance",tol_ ); - BelosList_ -> set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); - BelosList_ -> set("Output Frequency",1); - BelosList_ -> set("Output Style",Belos::Brief); - BelosList_ -> set("Num Blocks",restart_size_); - BelosList_ -> set("Num Recycled Blocks",recycle_size_); + BelosList_ = rcp(new Teuchos::ParameterList("GMRES")); + BelosList_->set("Maximum Iterations", iters_); + BelosList_->set("Convergence Tolerance", tol_); + BelosList_->set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); + BelosList_->set("Output Frequency", 1); + BelosList_->set("Output Style", Belos::Brief); + BelosList_->set("Num Blocks", restart_size_); + BelosList_->set("Num Recycled Blocks", recycle_size_); #else TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "ShiftedLaplacian only available with Tpetra and GO=int enabled."); #endif } // setup coarse grids for new frequency -template -void ShiftedLaplacian::setupFastRAP() { - - int numLevels = Hierarchy_ -> GetNumLevels(); - Manager_ -> SetFactory("Smoother", smooFact_); - Manager_ -> SetFactory("CoarseSolver", coarsestSmooFact_); - Hierarchy_ -> GetLevel(0) -> Set("A", P_); - Hierarchy_ -> Setup(*Manager_, 0, numLevels); +template +void ShiftedLaplacian::setupFastRAP() { + int numLevels = Hierarchy_->GetNumLevels(); + Manager_->SetFactory("Smoother", smooFact_); + Manager_->SetFactory("CoarseSolver", coarsestSmooFact_); + Hierarchy_->GetLevel(0)->Set("A", P_); + Hierarchy_->Setup(*Manager_, 0, numLevels); setupSolver(); - } // setup coarse grids for new frequency -template -void ShiftedLaplacian::setupSlowRAP() { - - int numLevels = Hierarchy_ -> GetNumLevels(); +template +void ShiftedLaplacian::setupSlowRAP() { + int numLevels = Hierarchy_->GetNumLevels(); Acshift_->SetShifts(levelshifts_); - Manager_ -> SetFactory("Smoother", smooFact_); - Manager_ -> SetFactory("CoarseSolver", coarsestSmooFact_); - Manager_ -> SetFactory("A", Acshift_); - Manager_ -> SetFactory("K", Acshift_); - Manager_ -> SetFactory("M", Acshift_); - Hierarchy_ -> GetLevel(0) -> Set("A", P_); - Hierarchy_ -> GetLevel(0) -> Set("K", K_); - Hierarchy_ -> GetLevel(0) -> Set("M", M_); - Hierarchy_ -> Setup(*Manager_, 0, numLevels); + Manager_->SetFactory("Smoother", smooFact_); + Manager_->SetFactory("CoarseSolver", coarsestSmooFact_); + Manager_->SetFactory("A", Acshift_); + Manager_->SetFactory("K", Acshift_); + Manager_->SetFactory("M", Acshift_); + Hierarchy_->GetLevel(0)->Set("A", P_); + Hierarchy_->GetLevel(0)->Set("K", K_); + Hierarchy_->GetLevel(0)->Set("M", M_); + Hierarchy_->Setup(*Manager_, 0, numLevels); setupSolver(); - } // setup coarse grids for new frequency -template -void ShiftedLaplacian::setupNormalRAP() { - +template +void ShiftedLaplacian::setupNormalRAP() { // Only setup hierarchy again if preconditioning matrix has changed - if( GridTransfersExist_ == false ) { - Hierarchy_ = rcp( new Hierarchy(P_) ); - if(NullSpace_!=Teuchos::null) - Hierarchy_ -> GetLevel(0) -> Set("Nullspace", NullSpace_); - if(isSymmetric_==true) - Hierarchy_ -> SetImplicitTranspose(true); - Hierarchy_ -> SetMaxCoarseSize( coarseGridSize_ ); - Hierarchy_ -> Setup(*Manager_, 0, numLevels_); - GridTransfersExist_=true; + if (GridTransfersExist_ == false) { + Hierarchy_ = rcp(new Hierarchy(P_)); + if (NullSpace_ != Teuchos::null) + Hierarchy_->GetLevel(0)->Set("Nullspace", NullSpace_); + if (isSymmetric_ == true) + Hierarchy_->SetImplicitTranspose(true); + Hierarchy_->SetMaxCoarseSize(coarseGridSize_); + Hierarchy_->Setup(*Manager_, 0, numLevels_); + GridTransfersExist_ = true; } setupSolver(); - } -template -void ShiftedLaplacian::setupSolver() { - +template +void ShiftedLaplacian::setupSolver() { #ifdef HAVE_MUELU_TPETRA_INST_INT_INT // Define Preconditioner and Operator - MueLuOp_ = rcp( new MueLu::ShiftedLaplacianOperator - (Hierarchy_, A_, ncycles_, subiters_, option_, tol_) ); + MueLuOp_ = rcp(new MueLu::ShiftedLaplacianOperator(Hierarchy_, A_, ncycles_, subiters_, option_, tol_)); // Belos Linear Problem - if(LinearProblem_==Teuchos::null) - LinearProblem_ = rcp( new LinearProblem ); - LinearProblem_ -> setOperator ( TpetraA_ ); - LinearProblem_ -> setRightPrec( MueLuOp_ ); - if(SolverManager_==Teuchos::null) { + if (LinearProblem_ == Teuchos::null) + LinearProblem_ = rcp(new LinearProblem); + LinearProblem_->setOperator(TpetraA_); + LinearProblem_->setRightPrec(MueLuOp_); + if (SolverManager_ == Teuchos::null) { std::string solverName; - SolverFactory_= rcp( new SolverFactory() ); - if(solverType_==1) { solverName="Block GMRES"; } - else if(solverType_==2) { solverName="Recycling GMRES"; } - else { solverName="Flexible GMRES"; } - SolverManager_ = SolverFactory_->create( solverName, BelosList_ ); - SolverManager_ -> setProblem( LinearProblem_ ); + SolverFactory_ = rcp(new SolverFactory()); + if (solverType_ == 1) { + solverName = "Block GMRES"; + } else if (solverType_ == 2) { + solverName = "Recycling GMRES"; + } else { + solverName = "Flexible GMRES"; + } + SolverManager_ = SolverFactory_->create(solverName, BelosList_); + SolverManager_->setProblem(LinearProblem_); } #else TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "ShiftedLaplacian only available with Tpetra and GO=int enabled."); #endif } -template -void ShiftedLaplacian::resetLinearProblem() -{ +template +void ShiftedLaplacian::resetLinearProblem() { #ifdef HAVE_MUELU_TPETRA_INST_INT_INT - LinearProblem_ -> setOperator ( TpetraA_ ); + LinearProblem_->setOperator(TpetraA_); #else TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "ShiftedLaplacian only available with Tpetra and GO=int enabled."); #endif } // Solve phase -template -int ShiftedLaplacian::solve(const RCP B, RCP& X) -{ +template +int ShiftedLaplacian::solve(const RCP B, RCP& X) { #ifdef HAVE_MUELU_TPETRA_INST_INT_INT // Set left and right hand sides for Belos - LinearProblem_ -> setProblem(X, B); + LinearProblem_->setProblem(X, B); // iterative solve - SolverManager_ -> solve(); + SolverManager_->solve(); #else TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "ShiftedLaplacian only available with Tpetra and GO=int enabled."); #endif @@ -505,33 +475,28 @@ int ShiftedLaplacian::solve(const RCP -void ShiftedLaplacian::multigrid_apply(const RCP B, - RCP& X) -{ +template +void ShiftedLaplacian::multigrid_apply(const RCP B, + RCP& X) { // Set left and right hand sides for Belos - Hierarchy_ -> Iterate(*B, *X, 1, true, 0); + Hierarchy_->Iterate(*B, *X, 1, true, 0); } // Solve phase -template -void ShiftedLaplacian::multigrid_apply(const RCP > B, - RCP >& X) -{ - Teuchos::RCP< Xpetra::MultiVector > XpetraX - = Teuchos::rcp( new Xpetra::TpetraMultiVector(X) ); - Teuchos::RCP< Xpetra::MultiVector > XpetraB - = Teuchos::rcp( new Xpetra::TpetraMultiVector(B) ); +template +void ShiftedLaplacian::multigrid_apply(const RCP > B, + RCP >& X) { + Teuchos::RCP > XpetraX = Teuchos::rcp(new Xpetra::TpetraMultiVector(X)); + Teuchos::RCP > XpetraB = Teuchos::rcp(new Xpetra::TpetraMultiVector(B)); // Set left and right hand sides for Belos - Hierarchy_ -> Iterate(*XpetraB, *XpetraX, 1, true, 0); + Hierarchy_->Iterate(*XpetraB, *XpetraX, 1, true, 0); } // Get most recent iteration count -template -int ShiftedLaplacian::GetIterations() -{ +template +int ShiftedLaplacian::GetIterations() { #ifdef HAVE_MUELU_TPETRA_INST_INT_INT - int numiters = SolverManager_ -> getNumIters(); + int numiters = SolverManager_->getNumIters(); return numiters; #else TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "ShiftedLaplacian only available with Tpetra and GO=int enabled."); @@ -540,13 +505,12 @@ int ShiftedLaplacian::GetIterations() } // Get most recent solver tolerance achieved -template +template typename Teuchos::ScalarTraits::magnitudeType -ShiftedLaplacian::GetResidual() -{ +ShiftedLaplacian::GetResidual() { typedef typename Teuchos::ScalarTraits::magnitudeType MT; #ifdef HAVE_MUELU_TPETRA_INST_INT_INT - MT residual = SolverManager_ -> achievedTol(); + MT residual = SolverManager_->achievedTol(); return residual; #else TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "ShiftedLaplacian only available with Tpetra and GO=int enabled."); @@ -554,9 +518,9 @@ ShiftedLaplacian::GetResidual() #endif } -} +} // namespace MueLu #define MUELU_SHIFTEDLAPLACIAN_SHORT -#endif //if defined(HAVE_MUELU_IFPACK2) -#endif // MUELU_SHIFTEDLAPLACIAN_DEF_HPP +#endif // if defined(HAVE_MUELU_IFPACK2) +#endif // MUELU_SHIFTEDLAPLACIAN_DEF_HPP diff --git a/packages/muelu/adapters/tpetra/MueLu_TpetraOperatorAsRowMatrix.hpp b/packages/muelu/adapters/tpetra/MueLu_TpetraOperatorAsRowMatrix.hpp index 1852785c6a0c..19cc68d7abe1 100644 --- a/packages/muelu/adapters/tpetra/MueLu_TpetraOperatorAsRowMatrix.hpp +++ b/packages/muelu/adapters/tpetra/MueLu_TpetraOperatorAsRowMatrix.hpp @@ -46,242 +46,236 @@ namespace MueLu { - template ::scalar_type, - class LocalOrdinal = typename Tpetra::Operator::local_ordinal_type, - class GlobalOrdinal = typename Tpetra::Operator::global_ordinal_type, - class Node = typename Tpetra::Operator::node_type> - class TpetraOperatorAsRowMatrix : public Tpetra::RowMatrix { - - public: - using op_type = Tpetra::Operator; - using vec_type = Tpetra::Vector; - - //! The RowMatrix representing the base class of CrsMatrix - using row_matrix_type = Tpetra::RowMatrix; - - using impl_scalar_type = typename row_matrix_type::impl_scalar_type; - using mag_type = typename Kokkos::ArithTraits::mag_type; - - using local_inds_device_view_type = - typename row_matrix_type::local_inds_device_view_type; - using local_inds_host_view_type = - typename row_matrix_type::local_inds_host_view_type; - using nonconst_local_inds_host_view_type = - typename row_matrix_type::nonconst_local_inds_host_view_type; - - using global_inds_device_view_type = - typename row_matrix_type::global_inds_device_view_type; - using global_inds_host_view_type = - typename row_matrix_type::global_inds_host_view_type; - using nonconst_global_inds_host_view_type = - typename row_matrix_type::nonconst_global_inds_host_view_type; - - using values_device_view_type = - typename row_matrix_type::values_device_view_type; - using values_host_view_type = - typename row_matrix_type::values_host_view_type; - using nonconst_values_host_view_type = - typename row_matrix_type::nonconst_values_host_view_type; - - //! @name Constructor/Destructor - //@{ - - //! Constructor - TpetraOperatorAsRowMatrix(const RCP& op) - : - op_(op), - diag_(Teuchos::null) - { } - - TpetraOperatorAsRowMatrix(const RCP& op, - const RCP& diag) - : - op_(op), - diag_(diag) - { } - - //! Returns the Tpetra::Map object associated with the domain of this operator. - Teuchos::RCP > getDomainMap() const { - return op_->getDomainMap(); - } - - //! Returns the Tpetra::Map object associated with the range of this operator. - Teuchos::RCP > getRangeMap() const { - return op_->getRangeMap(); - } - - //! Returns in Y the result of a Tpetra::Operator applied to a Tpetra::MultiVector X. - /*! - \param[in] X - Tpetra::MultiVector of dimension NumVectors to multiply with matrix. - \param[out] Y -Tpetra::MultiVector of dimension NumVectors containing result. - */ - void apply(const Tpetra::MultiVector& X, - Tpetra::MultiVector& Y, - Teuchos::ETransp mode = Teuchos::NO_TRANS, - Scalar alpha = Teuchos::ScalarTraits::one(), - Scalar beta = Teuchos::ScalarTraits::zero()) const { - op_->apply(X, Y, mode, alpha, beta); - } - - // Fake RowMatrix interface - Teuchos::RCP > getRowMap() const { - return op_->getRangeMap(); - } - - Teuchos::RCP > getColMap() const { - throw MueLu::Exceptions::RuntimeError("Not implemented."); - } - - typename row_matrix_type::local_ordinal_type getBlockSize() const { - throw MueLu::Exceptions::RuntimeError("Not implemented."); - } - - Teuchos::RCP > getComm() const { - return op_->getDomainMap()->getComm(); - } - - Teuchos::RCP > getGraph() const { - throw MueLu::Exceptions::RuntimeError("Not implemented."); - } - - Tpetra::global_size_t getGlobalNumRows() const { - return getRowMap()->getGlobalNumElements(); - } - - Tpetra::global_size_t getGlobalNumCols() const { - return getDomainMap()->getGlobalNumElements(); - } - - size_t getLocalNumRows() const { - return getRowMap()->getLocalNumElements(); - } - - size_t getLocalNumCols() const { - throw MueLu::Exceptions::RuntimeError("Not implemented."); - } - - GlobalOrdinal getIndexBase() const { - throw MueLu::Exceptions::RuntimeError("Not implemented."); - } - - Tpetra::global_size_t getGlobalNumEntries() const { - return 0; - } - - size_t getLocalNumEntries() const { - throw MueLu::Exceptions::RuntimeError("Not implemented."); - } - - size_t getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const { - throw MueLu::Exceptions::RuntimeError("Not implemented."); - } - - size_t getNumEntriesInLocalRow (LocalOrdinal localRow) const { - throw MueLu::Exceptions::RuntimeError("Not implemented."); - } - - size_t getGlobalMaxNumRowEntries () const { - throw MueLu::Exceptions::RuntimeError("Not implemented."); - } - - size_t getLocalMaxNumRowEntries () const { - throw MueLu::Exceptions::RuntimeError("Not implemented."); - } - - bool hasColMap () const { - return false; - } - - bool isLocallyIndexed() const { - return true; - } - - bool isGloballyIndexed() const { - return true; - } - - bool isFillComplete() const { - return true; - } - - bool supportsRowViews() const { - return false; - } - - void - getGlobalRowCopy (GlobalOrdinal GlobalRow, - nonconst_global_inds_host_view_type &Indices, - nonconst_values_host_view_type &Values, - size_t& NumEntries) const { - throw MueLu::Exceptions::RuntimeError("Not implemented."); - } - - void - getLocalRowCopy (LocalOrdinal LocalRow, - nonconst_local_inds_host_view_type &Indices, - nonconst_values_host_view_type &Values, - size_t& NumEntries) const { - throw MueLu::Exceptions::RuntimeError("Not implemented."); - } - - void - getGlobalRowView (GlobalOrdinal GlobalRow, - global_inds_host_view_type &indices, - values_host_view_type &values) const { - throw MueLu::Exceptions::RuntimeError("Not implemented."); - } - - void - getLocalRowView (LocalOrdinal LocalRow, - local_inds_host_view_type & indices, - values_host_view_type & values) const { - throw MueLu::Exceptions::RuntimeError("Not implemented."); - } - - void getLocalDiagCopy (Tpetra::Vector &diag) const { - if (diag_.is_null()) - throw MueLu::Exceptions::RuntimeError("No diagonal available."); - else - diag = *diag_; - } - - void leftScale (const Tpetra::Vector& x) { - throw MueLu::Exceptions::RuntimeError("Not implemented."); - } - - void rightScale (const Tpetra::Vector& x) { - throw MueLu::Exceptions::RuntimeError("Not implemented."); - } - - mag_type getFrobeniusNorm() const { - return 0.; - } - - // void describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel) const { - // using std::setw; - // using std::endl; - // const size_t numRows = nearField_->getRowMap()->getGlobalNumElements(); - // const size_t nnzNearField = nearField_->getGlobalNumEntries(); - // const double nnzNearPerRow = Teuchos::as(nnzNearField)/numRows; - // const size_t nnzKernelApprox = kernelApproximations_->pointA_->getGlobalNumEntries(); - // const size_t numClusterPairs = kernelApproximations_->blockA_->getGlobalNumEntries(); - // const size_t nnzBasis = basisMatrix_->getGlobalNumEntries(); - // size_t nnzTransfer = 0; - // for (size_t i = 0; ipointA_->getGlobalNumEntries(); - // const size_t nnzTotal = nnzNearField+nnzKernelApprox+nnzBasis+nnzTransfer; - // const double nnzTotalPerRow = Teuchos::as(nnzTotal)/numRows; - // std::ostringstream oss; - // oss << std::left; - // oss << setw(9) << "rows" << setw(12) << "nnz(near)" << setw(14) << "nnz(near)/row" << setw(12) << "nnz(basis)" << setw(15) << "#cluster pairs" << setw(12)<< "nnz(kernel)" << setw(14) << "nnz(transfer)" << setw(12) << "nnz(total)" << setw(14) << "nnz(total)/row" << endl; - // oss << setw(9) << numRows << setw(12) << nnzNearField << setw(14) << nnzNearPerRow << setw(12) << nnzBasis << setw(15) << numClusterPairs << setw(12) << nnzKernelApprox << setw(14) << nnzTransfer << setw(12) << nnzTotal << setw(14) << nnzTotalPerRow << endl; - // out << oss.str(); - // } - - private: - - RCP op_; - RCP diag_; - }; - -} +template ::scalar_type, + class LocalOrdinal = typename Tpetra::Operator::local_ordinal_type, + class GlobalOrdinal = typename Tpetra::Operator::global_ordinal_type, + class Node = typename Tpetra::Operator::node_type> +class TpetraOperatorAsRowMatrix : public Tpetra::RowMatrix { + public: + using op_type = Tpetra::Operator; + using vec_type = Tpetra::Vector; + + //! The RowMatrix representing the base class of CrsMatrix + using row_matrix_type = Tpetra::RowMatrix; + + using impl_scalar_type = typename row_matrix_type::impl_scalar_type; + using mag_type = typename Kokkos::ArithTraits::mag_type; + + using local_inds_device_view_type = + typename row_matrix_type::local_inds_device_view_type; + using local_inds_host_view_type = + typename row_matrix_type::local_inds_host_view_type; + using nonconst_local_inds_host_view_type = + typename row_matrix_type::nonconst_local_inds_host_view_type; + + using global_inds_device_view_type = + typename row_matrix_type::global_inds_device_view_type; + using global_inds_host_view_type = + typename row_matrix_type::global_inds_host_view_type; + using nonconst_global_inds_host_view_type = + typename row_matrix_type::nonconst_global_inds_host_view_type; + + using values_device_view_type = + typename row_matrix_type::values_device_view_type; + using values_host_view_type = + typename row_matrix_type::values_host_view_type; + using nonconst_values_host_view_type = + typename row_matrix_type::nonconst_values_host_view_type; + + //! @name Constructor/Destructor + //@{ + + //! Constructor + TpetraOperatorAsRowMatrix(const RCP& op) + : op_(op) + , diag_(Teuchos::null) {} + + TpetraOperatorAsRowMatrix(const RCP& op, + const RCP& diag) + : op_(op) + , diag_(diag) {} + + //! Returns the Tpetra::Map object associated with the domain of this operator. + Teuchos::RCP > getDomainMap() const { + return op_->getDomainMap(); + } + + //! Returns the Tpetra::Map object associated with the range of this operator. + Teuchos::RCP > getRangeMap() const { + return op_->getRangeMap(); + } + + //! Returns in Y the result of a Tpetra::Operator applied to a Tpetra::MultiVector X. + /*! + \param[in] X - Tpetra::MultiVector of dimension NumVectors to multiply with matrix. + \param[out] Y -Tpetra::MultiVector of dimension NumVectors containing result. + */ + void apply(const Tpetra::MultiVector& X, + Tpetra::MultiVector& Y, + Teuchos::ETransp mode = Teuchos::NO_TRANS, + Scalar alpha = Teuchos::ScalarTraits::one(), + Scalar beta = Teuchos::ScalarTraits::zero()) const { + op_->apply(X, Y, mode, alpha, beta); + } + + // Fake RowMatrix interface + Teuchos::RCP > getRowMap() const { + return op_->getRangeMap(); + } + + Teuchos::RCP > getColMap() const { + throw MueLu::Exceptions::RuntimeError("Not implemented."); + } + + typename row_matrix_type::local_ordinal_type getBlockSize() const { + throw MueLu::Exceptions::RuntimeError("Not implemented."); + } + + Teuchos::RCP > getComm() const { + return op_->getDomainMap()->getComm(); + } + + Teuchos::RCP > getGraph() const { + throw MueLu::Exceptions::RuntimeError("Not implemented."); + } + + Tpetra::global_size_t getGlobalNumRows() const { + return getRowMap()->getGlobalNumElements(); + } + + Tpetra::global_size_t getGlobalNumCols() const { + return getDomainMap()->getGlobalNumElements(); + } + + size_t getLocalNumRows() const { + return getRowMap()->getLocalNumElements(); + } + + size_t getLocalNumCols() const { + throw MueLu::Exceptions::RuntimeError("Not implemented."); + } + + GlobalOrdinal getIndexBase() const { + throw MueLu::Exceptions::RuntimeError("Not implemented."); + } + + Tpetra::global_size_t getGlobalNumEntries() const { + return 0; + } + + size_t getLocalNumEntries() const { + throw MueLu::Exceptions::RuntimeError("Not implemented."); + } + + size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const { + throw MueLu::Exceptions::RuntimeError("Not implemented."); + } + + size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const { + throw MueLu::Exceptions::RuntimeError("Not implemented."); + } + + size_t getGlobalMaxNumRowEntries() const { + throw MueLu::Exceptions::RuntimeError("Not implemented."); + } + + size_t getLocalMaxNumRowEntries() const { + throw MueLu::Exceptions::RuntimeError("Not implemented."); + } + + bool hasColMap() const { + return false; + } + + bool isLocallyIndexed() const { + return true; + } + + bool isGloballyIndexed() const { + return true; + } + + bool isFillComplete() const { + return true; + } + + bool supportsRowViews() const { + return false; + } + + void + getGlobalRowCopy(GlobalOrdinal GlobalRow, + nonconst_global_inds_host_view_type& Indices, + nonconst_values_host_view_type& Values, + size_t& NumEntries) const { + throw MueLu::Exceptions::RuntimeError("Not implemented."); + } + + void + getLocalRowCopy(LocalOrdinal LocalRow, + nonconst_local_inds_host_view_type& Indices, + nonconst_values_host_view_type& Values, + size_t& NumEntries) const { + throw MueLu::Exceptions::RuntimeError("Not implemented."); + } + + void + getGlobalRowView(GlobalOrdinal GlobalRow, + global_inds_host_view_type& indices, + values_host_view_type& values) const { + throw MueLu::Exceptions::RuntimeError("Not implemented."); + } + + void + getLocalRowView(LocalOrdinal LocalRow, + local_inds_host_view_type& indices, + values_host_view_type& values) const { + throw MueLu::Exceptions::RuntimeError("Not implemented."); + } + + void getLocalDiagCopy(Tpetra::Vector& diag) const { + if (diag_.is_null()) + throw MueLu::Exceptions::RuntimeError("No diagonal available."); + else + diag = *diag_; + } + + void leftScale(const Tpetra::Vector& x) { + throw MueLu::Exceptions::RuntimeError("Not implemented."); + } + + void rightScale(const Tpetra::Vector& x) { + throw MueLu::Exceptions::RuntimeError("Not implemented."); + } + + mag_type getFrobeniusNorm() const { + return 0.; + } + + // void describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel) const { + // using std::setw; + // using std::endl; + // const size_t numRows = nearField_->getRowMap()->getGlobalNumElements(); + // const size_t nnzNearField = nearField_->getGlobalNumEntries(); + // const double nnzNearPerRow = Teuchos::as(nnzNearField)/numRows; + // const size_t nnzKernelApprox = kernelApproximations_->pointA_->getGlobalNumEntries(); + // const size_t numClusterPairs = kernelApproximations_->blockA_->getGlobalNumEntries(); + // const size_t nnzBasis = basisMatrix_->getGlobalNumEntries(); + // size_t nnzTransfer = 0; + // for (size_t i = 0; ipointA_->getGlobalNumEntries(); + // const size_t nnzTotal = nnzNearField+nnzKernelApprox+nnzBasis+nnzTransfer; + // const double nnzTotalPerRow = Teuchos::as(nnzTotal)/numRows; + // std::ostringstream oss; + // oss << std::left; + // oss << setw(9) << "rows" << setw(12) << "nnz(near)" << setw(14) << "nnz(near)/row" << setw(12) << "nnz(basis)" << setw(15) << "#cluster pairs" << setw(12)<< "nnz(kernel)" << setw(14) << "nnz(transfer)" << setw(12) << "nnz(total)" << setw(14) << "nnz(total)/row" << endl; + // oss << setw(9) << numRows << setw(12) << nnzNearField << setw(14) << nnzNearPerRow << setw(12) << nnzBasis << setw(15) << numClusterPairs << setw(12) << nnzKernelApprox << setw(14) << nnzTransfer << setw(12) << nnzTotal << setw(14) << nnzTotalPerRow << endl; + // out << oss.str(); + // } + + private: + RCP op_; + RCP diag_; +}; + +} // namespace MueLu diff --git a/packages/muelu/adapters/tpetra/MueLu_TpetraOperator_decl.hpp b/packages/muelu/adapters/tpetra/MueLu_TpetraOperator_decl.hpp index 4b5628140372..ecaa3be9ba9d 100644 --- a/packages/muelu/adapters/tpetra/MueLu_TpetraOperator_decl.hpp +++ b/packages/muelu/adapters/tpetra/MueLu_TpetraOperator_decl.hpp @@ -56,67 +56,68 @@ namespace MueLu { /*! @brief Wraps an existing MueLu::Hierarchy as a Tpetra::Operator. -*/ - template ::scalar_type, - class LocalOrdinal = typename Tpetra::Operator::local_ordinal_type, - class GlobalOrdinal = typename Tpetra::Operator::global_ordinal_type, - class Node = typename Tpetra::Operator::node_type> - class TpetraOperator : public Tpetra::Operator { - protected: - TpetraOperator() = delete; - public: + */ +template ::scalar_type, + class LocalOrdinal = typename Tpetra::Operator::local_ordinal_type, + class GlobalOrdinal = typename Tpetra::Operator::global_ordinal_type, + class Node = typename Tpetra::Operator::node_type> +class TpetraOperator : public Tpetra::Operator { + protected: + TpetraOperator() = delete; - //! @name Constructor/Destructor - //@{ + public: + //! @name Constructor/Destructor + //@{ - //! Constructor - TpetraOperator(const RCP >& Op) : Operator_(Op){ } + //! Constructor + TpetraOperator(const RCP >& Op) + : Operator_(Op) {} - //! Constructor - TpetraOperator(const RCP >& H) : Hierarchy_(H){ } + //! Constructor + TpetraOperator(const RCP >& H) + : Hierarchy_(H) {} - //! Destructor. - virtual ~TpetraOperator() { } + //! Destructor. + virtual ~TpetraOperator() {} - //@} + //@} - //! Returns the Tpetra::Map object associated with the domain of this operator. - Teuchos::RCP > getDomainMap() const; + //! Returns the Tpetra::Map object associated with the domain of this operator. + Teuchos::RCP > getDomainMap() const; - //! Returns the Tpetra::Map object associated with the range of this operator. - Teuchos::RCP > getRangeMap() const; + //! Returns the Tpetra::Map object associated with the range of this operator. + Teuchos::RCP > getRangeMap() const; - //! Returns in Y the result of a Tpetra::Operator applied to a Tpetra::MultiVector X. - /*! - \param[in] X - Tpetra::MultiVector of dimension NumVectors to multiply with matrix. - \param[out] Y -Tpetra::MultiVector of dimension NumVectors containing result. - */ - void apply(const Tpetra::MultiVector& X, - Tpetra::MultiVector& Y, - Teuchos::ETransp mode = Teuchos::NO_TRANS, - Scalar alpha = Teuchos::ScalarTraits::one(), - Scalar beta = Teuchos::ScalarTraits::one()) const; + //! Returns in Y the result of a Tpetra::Operator applied to a Tpetra::MultiVector X. + /*! + \param[in] X - Tpetra::MultiVector of dimension NumVectors to multiply with matrix. + \param[out] Y -Tpetra::MultiVector of dimension NumVectors containing result. + */ + void apply(const Tpetra::MultiVector& X, + Tpetra::MultiVector& Y, + Teuchos::ETransp mode = Teuchos::NO_TRANS, + Scalar alpha = Teuchos::ScalarTraits::one(), + Scalar beta = Teuchos::ScalarTraits::one()) const; - //! Indicates whether this operator supports applying the adjoint operator. - bool hasTransposeApply() const; + //! Indicates whether this operator supports applying the adjoint operator. + bool hasTransposeApply() const; - //! @name MueLu specific - //@{ + //! @name MueLu specific + //@{ - //! Direct access to the underlying MueLu::Hierarchy. - RCP > GetHierarchy() const; + //! Direct access to the underlying MueLu::Hierarchy. + RCP > GetHierarchy() const; - //! Direct access to the underlying MueLu::Operator - RCP > GetOperator() const; + //! Direct access to the underlying MueLu::Operator + RCP > GetOperator() const; - //@} + //@} - private: - RCP > Hierarchy_; - RCP > Operator_; + private: + RCP > Hierarchy_; + RCP > Operator_; +}; - }; +} // namespace MueLu -} // namespace - -#endif // MUELU_TPETRAOPERATOR_DECL_HPP +#endif // MUELU_TPETRAOPERATOR_DECL_HPP diff --git a/packages/muelu/adapters/tpetra/MueLu_TpetraOperator_def.hpp b/packages/muelu/adapters/tpetra/MueLu_TpetraOperator_def.hpp index 0d2ae1581503..9f029b47d7af 100644 --- a/packages/muelu/adapters/tpetra/MueLu_TpetraOperator_def.hpp +++ b/packages/muelu/adapters/tpetra/MueLu_TpetraOperator_def.hpp @@ -60,89 +60,90 @@ #include "MueLu_Hierarchy.hpp" #include "MueLu_Utilities.hpp" - namespace MueLu { -template -Teuchos::RCP > -TpetraOperator::getDomainMap() const { +template +Teuchos::RCP > +TpetraOperator::getDomainMap() const { typedef Xpetra::Matrix Matrix; typedef Xpetra::Map Map; typedef Xpetra::BlockedMap BlockedMap; RCP domainMap; - if(!Hierarchy_.is_null()) domainMap = Hierarchy_->GetLevel(0)->template Get >("A")->getDomainMap(); - else domainMap = Operator_->getDomainMap(); - + if (!Hierarchy_.is_null()) + domainMap = Hierarchy_->GetLevel(0)->template Get >("A")->getDomainMap(); + else + domainMap = Operator_->getDomainMap(); RCP bDomainMap = Teuchos::rcp_dynamic_cast(domainMap); - if(bDomainMap.is_null() == false) { + if (bDomainMap.is_null() == false) { return Xpetra::toTpetraNonZero(bDomainMap->getFullMap()); } return Xpetra::toTpetraNonZero(domainMap); } -template -Teuchos::RCP > TpetraOperator::getRangeMap() const { +template +Teuchos::RCP > TpetraOperator::getRangeMap() const { typedef Xpetra::Matrix Matrix; typedef Xpetra::Map Map; typedef Xpetra::BlockedMap BlockedMap; - RCP rangeMap; - if(!Hierarchy_.is_null()) rangeMap = Hierarchy_->GetLevel(0)->template Get >("A")->getRangeMap(); - else rangeMap = Operator_->getRangeMap(); + if (!Hierarchy_.is_null()) + rangeMap = Hierarchy_->GetLevel(0)->template Get >("A")->getRangeMap(); + else + rangeMap = Operator_->getRangeMap(); RCP bRangeMap = Teuchos::rcp_dynamic_cast(rangeMap); - if(bRangeMap.is_null() == false) { + if (bRangeMap.is_null() == false) { return Xpetra::toTpetraNonZero(bRangeMap->getFullMap()); } return Xpetra::toTpetraNonZero(rangeMap); } -template -void TpetraOperator::apply(const Tpetra::MultiVector& X, - Tpetra::MultiVector& Y, - Teuchos::ETransp mode, Scalar /* alpha */, Scalar /* beta */) const { - typedef Tpetra::MultiVector TMV; - typedef Xpetra::TpetraMultiVector XTMV; +template +void TpetraOperator::apply(const Tpetra::MultiVector& X, + Tpetra::MultiVector& Y, + Teuchos::ETransp mode, Scalar /* alpha */, Scalar /* beta */) const { + typedef Tpetra::MultiVector TMV; + typedef Xpetra::TpetraMultiVector XTMV; - TEUCHOS_TEST_FOR_EXCEPTION(mode!=Teuchos::NO_TRANS,std::logic_error,"MueLu::TpetraOperator does not support applying the adjoint operator"); + TEUCHOS_TEST_FOR_EXCEPTION(mode != Teuchos::NO_TRANS, std::logic_error, "MueLu::TpetraOperator does not support applying the adjoint operator"); try { - TMV& temp_x = const_cast(X); + TMV& temp_x = const_cast(X); const XTMV tX(rcpFromRef(temp_x)); - XTMV tY(rcpFromRef(Y)); + XTMV tY(rcpFromRef(Y)); - if(!Hierarchy_.is_null()) + if (!Hierarchy_.is_null()) Hierarchy_->Iterate(tX, tY, 1, true); else Operator_->apply(tX, tY); } catch (std::exception& e) { std::cerr << "MueLu::TpetraOperator::apply : detected an exception" << std::endl - << e.what() << std::endl; + << e.what() << std::endl; throw; } } -template -bool TpetraOperator::hasTransposeApply() const { +template +bool TpetraOperator::hasTransposeApply() const { return false; } -template +template RCP > -TpetraOperator::GetHierarchy() const { +TpetraOperator::GetHierarchy() const { return Hierarchy_; } -template +template RCP > -TpetraOperator::GetOperator() const { +TpetraOperator::GetOperator() const { return Operator_; } -} // namespace +} // namespace MueLu -#endif //ifdef MUELU_TPETRAOPERATOR_DEF_HPP +#endif // ifdef MUELU_TPETRAOPERATOR_DEF_HPP diff --git a/packages/muelu/doc/MueLu_DoxygenDocumentation.hpp b/packages/muelu/doc/MueLu_DoxygenDocumentation.hpp index ca314b66e204..270b288d0c03 100644 --- a/packages/muelu/doc/MueLu_DoxygenDocumentation.hpp +++ b/packages/muelu/doc/MueLu_DoxygenDocumentation.hpp @@ -115,4 +115,4 @@ The MueLu User's Guide is located in muelu/doc/UsersGuide and at the /* ************************************************************************ */ /* ************************************************************************ */ -#endif //ifndef MUELU_DOXYGEN_DOCUMENTATION_HPP +#endif // ifndef MUELU_DOXYGEN_DOCUMENTATION_HPP diff --git a/packages/muelu/doc/Tutorial/src/Challenge.cpp b/packages/muelu/doc/Tutorial/src/Challenge.cpp index 0ae754397f61..a3db1e26519a 100644 --- a/packages/muelu/doc/Tutorial/src/Challenge.cpp +++ b/packages/muelu/doc/Tutorial/src/Challenge.cpp @@ -88,7 +88,6 @@ #if defined(HAVE_MUELU_EPETRA) #include - // prescribe types // run plain Epetra typedef double Scalar; @@ -105,20 +104,19 @@ typedef Xpetra::EpetraNode Node; * */ - -int main(int argc, char *argv[]) { +int main(int argc, char* argv[]) { #if defined(HAVE_MUELU_EPETRA) #include "MueLu_UseShortNames.hpp" using Teuchos::RCP; using Teuchos::rcp; Teuchos::oblackholestream blackhole; - Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole); + Teuchos::GlobalMPISession mpiSession(&argc, &argv, &blackhole); bool success = false; try { RCP > comm = Teuchos::DefaultComm::getComm(); - RCP out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + RCP out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); out->setOutputToRootOnly(0); #ifndef HAVE_XPETRA_INT_LONG_LONG @@ -130,13 +128,14 @@ int main(int argc, char *argv[]) { // ========================================================================= Teuchos::CommandLineProcessor clp(false); - std::string xmlFileName = "xml/muelu_ParameterList.xml"; clp.setOption("xml", &xmlFileName, "read parameters from a file [default = 'xml/muelu_ParameterList.xml']"); + std::string xmlFileName = "xml/muelu_ParameterList.xml"; + clp.setOption("xml", &xmlFileName, "read parameters from a file [default = 'xml/muelu_ParameterList.xml']"); - int globalNumDofs = 0; //7020; + int globalNumDofs = 0; // 7020; clp.setOption("globalNumDofs", &globalNumDofs, "global number of degrees of freedom [has to be set by user, default = 0 -> error]"); int nDofsPerNode = 1; clp.setOption("nDofsPerNode", &nDofsPerNode, "number of degrees of freedom per node [has to be set by user, default = 1]"); - int nProcs = comm->getSize(); + int nProcs = comm->getSize(); std::string dsolveType = "cg"; clp.setOption("solver", &dsolveType, "solve type: (none | cg | gmres | standalone) [default = cg]"); double dtol = 1e-12; @@ -146,90 +145,88 @@ int main(int argc, char *argv[]) { std::string coordsFile = ""; clp.setOption("coordinates", &coordsFile, "file name containing coordinates in matrix market format"); - switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } - if(globalNumDofs == 0) { + if (globalNumDofs == 0) { std::cout << "Please specify '--globalNumDofs'! Simulation cannot run without that parameter correctly set" << std::endl; return EXIT_FAILURE; } - int nLocalDofs = (int) globalNumDofs / nProcs; - nLocalDofs = nLocalDofs - (nLocalDofs % nDofsPerNode); + int nLocalDofs = (int)globalNumDofs / nProcs; + nLocalDofs = nLocalDofs - (nLocalDofs % nDofsPerNode); int nCumulatedDofs = 0; - MueLu_sumAll(comm,nLocalDofs, nCumulatedDofs); + MueLu_sumAll(comm, nLocalDofs, nCumulatedDofs); - if(comm->getRank() == nProcs-1) { + if (comm->getRank() == nProcs - 1) { nLocalDofs += globalNumDofs - nCumulatedDofs; } // read in problem - Epetra_Map emap (globalNumDofs, nLocalDofs, 0, *Xpetra::toEpetra(comm)); - Epetra_CrsMatrix * ptrA = 0; - Epetra_Vector * ptrf = 0; + Epetra_Map emap(globalNumDofs, nLocalDofs, 0, *Xpetra::toEpetra(comm)); + Epetra_CrsMatrix* ptrA = 0; + Epetra_Vector* ptrf = 0; Epetra_MultiVector* ptrNS = 0; std::cout << "Reading matrix market file" << std::endl; std::stringstream ssA, ssB, ssNS; - ssA << problemFile << "_A.txt"; - ssB << problemFile << "_b.txt"; + ssA << problemFile << "_A.txt"; + ssB << problemFile << "_b.txt"; ssNS << problemFile << "_ns.txt"; - std::string fileA = ssA.str(); - std::string fileB = ssB.str(); + std::string fileA = ssA.str(); + std::string fileB = ssB.str(); std::string fileNS = ssNS.str(); - EpetraExt::MatrixMarketFileToCrsMatrix(fileA.c_str(),emap,emap,emap,ptrA); - EpetraExt::MatrixMarketFileToVector(fileB.c_str(),emap,ptrf); + EpetraExt::MatrixMarketFileToCrsMatrix(fileA.c_str(), emap, emap, emap, ptrA); + EpetraExt::MatrixMarketFileToVector(fileB.c_str(), emap, ptrf); EpetraExt::MatrixMarketFileToMultiVector(fileNS.c_str(), emap, ptrNS); - RCP epA = Teuchos::rcp(ptrA); - RCP epB = Teuchos::rcp(ptrf); + RCP epA = Teuchos::rcp(ptrA); + RCP epB = Teuchos::rcp(ptrf); RCP epNS = Teuchos::rcp(ptrNS); // read in coordinates RCP xCoords = Teuchos::null; - if(coordsFile != "") { + if (coordsFile != "") { Epetra_MultiVector* ptrcoords = 0; - Epetra_Map coords_emap (globalNumDofs/nDofsPerNode, nLocalDofs/nDofsPerNode, 0, *Xpetra::toEpetra(comm)); + Epetra_Map coords_emap(globalNumDofs / nDofsPerNode, nLocalDofs / nDofsPerNode, 0, *Xpetra::toEpetra(comm)); EpetraExt::MatrixMarketFileToMultiVector(coordsFile.c_str(), coords_emap, ptrcoords); RCP epCoords = Teuchos::rcp(ptrcoords); - xCoords = Teuchos::rcp(new Xpetra::EpetraMultiVectorT(epCoords)); + xCoords = Teuchos::rcp(new Xpetra::EpetraMultiVectorT(epCoords)); } // Epetra_CrsMatrix -> Xpetra::Matrix - RCP exA = Teuchos::rcp(new Xpetra::EpetraCrsMatrixT(epA)); + RCP exA = Teuchos::rcp(new Xpetra::EpetraCrsMatrixT(epA)); RCP crsOp = Teuchos::rcp(new CrsMatrixWrap(exA)); - RCP Op = Teuchos::rcp_dynamic_cast(crsOp); + RCP Op = Teuchos::rcp_dynamic_cast(crsOp); Op->SetFixedBlockSize(nDofsPerNode); - RCP xNS = Teuchos::rcp(new Xpetra::EpetraMultiVectorT(epNS)); + RCP xNS = Teuchos::rcp(new Xpetra::EpetraMultiVectorT(epNS)); // Epetra_Map -> Xpetra::Map - const RCP< const Map> map = Xpetra::toXpetra(emap); + const RCP map = Xpetra::toXpetra(emap); - ParameterListInterpreter mueLuFactory(xmlFileName,*comm); - RCP H = mueLuFactory.CreateHierarchy(); + ParameterListInterpreter mueLuFactory(xmlFileName, *comm); + RCP H = mueLuFactory.CreateHierarchy(); RCP Finest = H->GetLevel(0); Finest->setDefaultVerbLevel(Teuchos::VERB_HIGH); - Finest->Set("A",Op); - Finest->Set("Nullspace",xNS); - if(xCoords != Teuchos::null) Finest->Set("Coordinates",xCoords); + Finest->Set("A", Op); + Finest->Set("Nullspace", xNS); + if (xCoords != Teuchos::null) Finest->Set("Coordinates", xCoords); mueLuFactory.SetupHierarchy(*H); - #ifdef HAVE_MUELU_AZTECOO H->IsPreconditioner(true); - MueLu::EpetraOperator mueluPrec(H); // Wrap MueLu preconditioner into an Epetra Operator + MueLu::EpetraOperator mueluPrec(H); // Wrap MueLu preconditioner into an Epetra Operator // create a solution vector RCP epX = rcp(new Epetra_Vector(epA->RowMap())); - epX->PutScalar((Scalar) 0.0); + epX->PutScalar((Scalar)0.0); Epetra_LinearProblem eProblem(epA.get(), epX.get(), epB.get()); @@ -240,28 +237,28 @@ int main(int argc, char *argv[]) { solver.SetAztecOption(AZ_solver, AZ_cg); else if (dsolveType == "gmres") solver.SetAztecOption(AZ_solver, AZ_gmres); - else { // use fix point method instead + else { // use fix point method instead solver.SetAztecOption(AZ_solver, AZ_fixed_pt); } solver.SetAztecOption(AZ_output, 1); solver.Iterate(500, dtol); - { //TODO: simplify this - RCP mueluX = rcp(new Xpetra::EpetraVectorT(epX)); - RCP mueluB = rcp(new Xpetra::EpetraVectorT(epB)); + { // TODO: simplify this + RCP mueluX = rcp(new Xpetra::EpetraVectorT(epX)); + RCP mueluB = rcp(new Xpetra::EpetraVectorT(epB)); // Print relative residual norm Teuchos::ScalarTraits::magnitudeType residualNorms = Utilities::ResidualNorm(*Op, *mueluX, *mueluB)[0]; if (comm->getRank() == 0) std::cout << "||Residual|| = " << residualNorms << std::endl; } -#endif // HAVE_MUELU_AZTECOO +#endif // HAVE_MUELU_AZTECOO success = true; } TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success); - return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); + return (success ? EXIT_SUCCESS : EXIT_FAILURE); #else return EXIT_SUCCESS; #endif // #ifdef defined(HAVE_MUELU_EPETRA) and defined(HAVE_MUELU_SERIAL) diff --git a/packages/muelu/doc/Tutorial/src/MLParameterList.cpp b/packages/muelu/doc/Tutorial/src/MLParameterList.cpp index a4a713670df9..dc3e7469200d 100644 --- a/packages/muelu/doc/Tutorial/src/MLParameterList.cpp +++ b/packages/muelu/doc/Tutorial/src/MLParameterList.cpp @@ -47,7 +47,7 @@ #include -#include // getParametersFromXmlFile() +#include // getParametersFromXmlFile() #if defined(HAVE_MUELU_ML) && defined(HAVE_MUELU_EPETRA) #include #include @@ -91,38 +91,42 @@ int main(int argc, char *argv[]) { // Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL); - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); + RCP > comm = Teuchos::DefaultComm::getComm(); // // Parameters // - //TODO: FIXME: option by default does not work for MueLu/Tpetra + // TODO: FIXME: option by default does not work for MueLu/Tpetra int nIts = 9; - Teuchos::CommandLineProcessor clp(false); // Note: + Teuchos::CommandLineProcessor clp(false); // Note: - Galeri::Xpetra::Parameters matrixParameters(clp, 256); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra + Galeri::Xpetra::Parameters matrixParameters(clp, 256); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra - std::string xmlFileName; clp.setOption("xml", &xmlFileName, "read parameters from a file. Otherwise, this example uses by default an hard-coded parameter list."); - int muelu = true; clp.setOption("muelu", &muelu, "use muelu"); //TODO: bool instead of int - int ml = true; + std::string xmlFileName; + clp.setOption("xml", &xmlFileName, "read parameters from a file. Otherwise, this example uses by default an hard-coded parameter list."); + int muelu = true; + clp.setOption("muelu", &muelu, "use muelu"); // TODO: bool instead of int + int ml = true; #if defined(HAVE_MUELU_ML) && defined(HAVE_MUELU_EPETRA) - clp.setOption("ml", &ml, "use ml"); + clp.setOption("ml", &ml, "use ml"); #endif - switch (clp.parse(argc,argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; - case Teuchos::CommandLineProcessor::PARSE_ERROR: - case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + switch (clp.parse(argc, argv)) { + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; + case Teuchos::CommandLineProcessor::PARSE_ERROR: + case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } // TODO: check -ml and --linAlgebra - if (comm->getRank() == 0) { std::cout << xpetraParameters << matrixParameters; } + if (comm->getRank() == 0) { + std::cout << xpetraParameters << matrixParameters; + } if (ml && xpetraParameters.GetLib() == Xpetra::UseTpetra) { ml = false; std::cout << "ML preconditionner can only be built if --linAlgebra=Epetra. Option --ml ignored" << std::endl; @@ -134,9 +138,9 @@ int main(int argc, char *argv[]) { // TUTORIALSPLIT =========================================================== RCP map = MapFactory::Build(xpetraParameters.GetLib(), matrixParameters.GetNumGlobalElements(), 0, comm); - RCP > Pr = + RCP > Pr = Galeri::Xpetra::BuildProblem(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList()); - RCP A = Pr->BuildMatrix(); + RCP A = Pr->BuildMatrix(); // TUTORIALSPLIT =========================================================== // @@ -146,35 +150,31 @@ int main(int argc, char *argv[]) { // ML parameter list RCP params; if (xmlFileName != "") { - std::cout << "Reading " << xmlFileName << " ..." << std::endl; // TUTORIALSPLIT =========================================================== params = Teuchos::getParametersFromXmlFile(xmlFileName); // TUTORIALSPLIT =========================================================== } else { - std::cout << "Using hard-coded parameter list:" << std::endl; // TUTORIALSPLIT =========================================================== params = rcp(new Teuchos::ParameterList()); - params->set("ML output", 10); + params->set("ML output", 10); params->set("max levels", 2); params->set("smoother: type", "symmetric Gauss-Seidel"); if (xpetraParameters.GetLib() == Xpetra::UseTpetra) - params->set("coarse: type","Amesos-Superlu"); + params->set("coarse: type", "Amesos-Superlu"); else - params->set("coarse: type","Amesos-KLU"); + params->set("coarse: type", "Amesos-KLU"); // TUTORIALSPLIT =========================================================== - } std::cout << "Initial parameter list" << std::endl; std::cout << *params << std::endl; if (muelu) { - // // Construct a multigrid preconditioner // @@ -182,27 +182,27 @@ int main(int argc, char *argv[]) { // Multigrid Hierarchy // TUTORIALSPLIT =========================================================== std::string paramXML = MueLu::ML2MueLuParameterTranslator::translate(*params, ""); - params = Teuchos::getParametersFromXmlString(paramXML); + params = Teuchos::getParametersFromXmlString(paramXML); ParameterListInterpreter mueLuFactory(*params); RCP H = mueLuFactory.CreateHierarchy(); // TUTORIALSPLIT =========================================================== // build default null space LocalOrdinal numPDEs = 1; - if(A->IsView("stridedMaps")==true) { - Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!) - numPDEs = Teuchos::rcp_dynamic_cast(A->getRowMap())->getFixedBlockSize(); - oldView = A->SwitchToView(oldView); + if (A->IsView("stridedMaps") == true) { + Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!) + numPDEs = Teuchos::rcp_dynamic_cast(A->getRowMap())->getFixedBlockSize(); + oldView = A->SwitchToView(oldView); } // TUTORIALSPLIT =========================================================== RCP nullspace = MultiVectorFactory::Build(A->getDomainMap(), numPDEs); - for (int i=0; i nsValues = nullspace->getDataNonConst(i); - int numBlocks = nsValues.size() / numPDEs; - for (int j=0; j< numBlocks; ++j) { - nsValues[j*numPDEs + i] = 1.0; + int numBlocks = nsValues.size() / numPDEs; + for (int j = 0; j < numBlocks; ++j) { + nsValues[j * numPDEs + i] = 1.0; } } // TUTORIALSPLIT =========================================================== @@ -224,8 +224,9 @@ int main(int argc, char *argv[]) { RCP X = VectorFactory::Build(map); RCP B = VectorFactory::Build(map); - X->putScalar((Scalar) 0.0); - B->setSeed(846930886); B->randomize(); + X->putScalar((Scalar)0.0); + B->setSeed(846930886); + B->randomize(); // AMG as a standalone solver H->IsPreconditioner(false); @@ -237,31 +238,32 @@ int main(int argc, char *argv[]) { std::cout << "||Residual|| = " << residualNorms << std::endl; #if defined(HAVE_MUELU_EPETRA) && defined(HAVE_MUELU_AZTECOO) - if (xpetraParameters.GetLib() == Xpetra::UseEpetra) { //TODO: should be doable with Tpetra too + if (xpetraParameters.GetLib() == Xpetra::UseEpetra) { // TODO: should be doable with Tpetra too // AMG as a preconditioner - //TODO: name mueluPrec and mlPrec not + // TODO: name mueluPrec and mlPrec not H->IsPreconditioner(true); - MueLu::EpetraOperator mueluPrec(H); // Wrap MueLu preconditioner into an Epetra Operator + MueLu::EpetraOperator mueluPrec(H); // Wrap MueLu preconditioner into an Epetra Operator // // Solve Ax = b // - RCP eA; //duplicate code - { // TODO: simplify this - RCP xCrsOp = Teuchos::rcp_dynamic_cast(A, true); - RCP xCrsMtx = xCrsOp->getCrsMatrix(); - RCP > eCrsMtx = Teuchos::rcp_dynamic_cast >(xCrsMtx, true); - eA = eCrsMtx->getEpetra_CrsMatrixNonConst(); + RCP eA; // duplicate code + { // TODO: simplify this + RCP xCrsOp = Teuchos::rcp_dynamic_cast(A, true); + RCP xCrsMtx = xCrsOp->getCrsMatrix(); + RCP > eCrsMtx = Teuchos::rcp_dynamic_cast >(xCrsMtx, true); + eA = eCrsMtx->getEpetra_CrsMatrixNonConst(); } RCP eX = rcp(new Epetra_Vector(eA->RowMap())); RCP eB = rcp(new Epetra_Vector(eA->RowMap())); - eX->PutScalar((Scalar) 0.0); - eB->SetSeed(846930886); eB->Random(); + eX->PutScalar((Scalar)0.0); + eB->SetSeed(846930886); + eB->Random(); Epetra_LinearProblem eProblem(eA.get(), eX.get(), eB.get()); @@ -273,7 +275,7 @@ int main(int argc, char *argv[]) { solver.Iterate(nIts, 1e-10); - { //TODO: simplify this + { // TODO: simplify this RCP mueluX = rcp(new Xpetra::EpetraVector(eX)); RCP mueluB = rcp(new Xpetra::EpetraVector(eB)); // Print relative residual norm @@ -284,14 +286,20 @@ int main(int argc, char *argv[]) { // TODO: AMG as a preconditioner (AZ_cg) } -#endif // HAVE_MUELU_AZTECOO +#endif // HAVE_MUELU_AZTECOO - } // if (muelu) + } // if (muelu) #if defined(HAVE_MUELU_ML) && defined(HAVE_MUELU_EPETRA) if (ml) { - - std::cout << std::endl << std::endl << std::endl << std::endl << "**** ML ml ML ml ML" << std::endl << std::endl << std::endl << std::endl; + std::cout << std::endl + << std::endl + << std::endl + << std::endl + << "**** ML ml ML ml ML" << std::endl + << std::endl + << std::endl + << std::endl; // // Construct a multigrid preconditioner @@ -299,18 +307,18 @@ int main(int argc, char *argv[]) { // Multigrid Hierarchy // TUTORIALSPLIT =========================================================== - RCP crsOp = Teuchos::rcp_dynamic_cast(A, true); - RCP crsMtx = crsOp->getCrsMatrix(); - RCP > epetraCrsMtx = Teuchos::rcp_dynamic_cast >(crsMtx, true); - RCP epetra_CrsMtx = epetraCrsMtx->getEpetra_CrsMatrix(); + RCP crsOp = Teuchos::rcp_dynamic_cast(A, true); + RCP crsMtx = crsOp->getCrsMatrix(); + RCP > epetraCrsMtx = Teuchos::rcp_dynamic_cast >(crsMtx, true); + RCP epetra_CrsMtx = epetraCrsMtx->getEpetra_CrsMatrix(); RCP eA; { // TUTORIALSPLIT =========================================================== - RCP xCrsOp = Teuchos::rcp_dynamic_cast(A, true); - RCP xCrsMtx = xCrsOp->getCrsMatrix(); - RCP > eCrsMtx = Teuchos::rcp_dynamic_cast >(xCrsMtx, true); - eA = eCrsMtx->getEpetra_CrsMatrixNonConst(); + RCP xCrsOp = Teuchos::rcp_dynamic_cast(A, true); + RCP xCrsMtx = xCrsOp->getCrsMatrix(); + RCP > eCrsMtx = Teuchos::rcp_dynamic_cast >(xCrsMtx, true); + eA = eCrsMtx->getEpetra_CrsMatrixNonConst(); // TUTORIALSPLIT =========================================================== } // TUTORIALSPLIT =========================================================== @@ -325,8 +333,9 @@ int main(int argc, char *argv[]) { RCP eX = rcp(new Epetra_Vector(eA->RowMap())); RCP eB = rcp(new Epetra_Vector(eA->RowMap())); - eX->PutScalar((Scalar) 0.0); - eB->SetSeed(846930886); eB->Random(); + eX->PutScalar((Scalar)0.0); + eB->SetSeed(846930886); + eB->Random(); Epetra_LinearProblem eProblem(eA.get(), eX.get(), eB.get()); @@ -338,7 +347,7 @@ int main(int argc, char *argv[]) { solver.Iterate(nIts, 1e-10); - { //TODO: simplify this + { // TODO: simplify this RCP mueluX = rcp(new Xpetra::EpetraVector(eX)); RCP mueluB = rcp(new Xpetra::EpetraVector(eB)); // Print relative residual norm @@ -350,16 +359,15 @@ int main(int argc, char *argv[]) { // TODO: AMG as a preconditioner (AZ_cg) #else std::cout << "Enable AztecOO to see solution" << std::endl; -#endif // HAVE_MUELU_AZTECOO +#endif // HAVE_MUELU_AZTECOO std::cout << "Parameter list after ML run" << std::endl; - const Teuchos::ParameterList & paramsAfterML = mlPrec->GetList(); + const Teuchos::ParameterList ¶msAfterML = mlPrec->GetList(); std::cout << paramsAfterML << std::endl; - } // if (ml) - + } // if (ml) -#endif // HAVE_MUELU_ML && HAVE_MUELU_EPETRA -#endif // #if defined(HAVE_MUELU_EPETRA) and defined(HAVE_MUELU_SERIAL) +#endif // HAVE_MUELU_ML && HAVE_MUELU_EPETRA +#endif // #if defined(HAVE_MUELU_EPETRA) and defined(HAVE_MUELU_SERIAL) return EXIT_SUCCESS; } diff --git a/packages/muelu/doc/Tutorial/src/ScalingTest.cpp b/packages/muelu/doc/Tutorial/src/ScalingTest.cpp index 742c0a026e65..e60398e8ddec 100644 --- a/packages/muelu/doc/Tutorial/src/ScalingTest.cpp +++ b/packages/muelu/doc/Tutorial/src/ScalingTest.cpp @@ -93,8 +93,8 @@ #include "BelosLinearProblem.hpp" #include "BelosBlockCGSolMgr.hpp" #include "BelosBlockGmresSolMgr.hpp" -#include "BelosXpetraAdapter.hpp" // this header defines Belos::XpetraOp() -#include "BelosMueLuAdapter.hpp" // this header defines Belos::MueLuOp() +#include "BelosXpetraAdapter.hpp" // this header defines Belos::XpetraOp() +#include "BelosMueLuAdapter.hpp" // this header defines Belos::MueLuOp() #endif #ifdef HAVE_MUELU_ISORROPIA @@ -103,10 +103,10 @@ // typedef double Scalar; -typedef int LocalOrdinal; -//FIXME we need a HAVE_MUELU_LONG_LONG_INT option +typedef int LocalOrdinal; +// FIXME we need a HAVE_MUELU_LONG_LONG_INT option // -// NOTE (mfh 11 Aug 2015) I just added a HAVE_XPETRA_INT_LONG_LONG option. +// NOTE (mfh 11 Aug 2015) I just added a HAVE_XPETRA_INT_LONG_LONG option. #ifdef HAVE_XPETRA_INT_LONG_LONG typedef long long int GlobalOrdinal; @@ -120,9 +120,10 @@ typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType Node; int main(int argc, char *argv[]) { #include "MueLu_UseShortNames.hpp" - using Teuchos::RCP; using Teuchos::rcp; + using Teuchos::RCP; + using Teuchos::rcp; using Teuchos::TimeMonitor; - //using Galeri::Xpetra::CreateCartesianCoordinates; + // using Galeri::Xpetra::CreateCartesianCoordinates; Teuchos::oblackholestream blackhole; Teuchos::GlobalMPISession mpiSession(&argc, &argv, &blackhole); @@ -134,16 +135,16 @@ int main(int argc, char *argv[]) { out->setOutputToRootOnly(0); *out << MueLu::MemUtils::PrintMemoryUsage() << std::endl; - // out->setOutputToRootOnly(-1); - // out->precision(12); +// out->setOutputToRootOnly(-1); +// out->precision(12); - //FIXME we need a HAVE_MUELU_LONG_LONG_INT option - // - // NOTE (mfh 11 Aug 2015) I just added a HAVE_XPETRA_INT_LONG_LONG option. - // - #ifndef HAVE_XPETRA_INT_LONG_LONG +// FIXME we need a HAVE_MUELU_LONG_LONG_INT option +// +// NOTE (mfh 11 Aug 2015) I just added a HAVE_XPETRA_INT_LONG_LONG option. +// +#ifndef HAVE_XPETRA_INT_LONG_LONG *out << "Warning: scaling test was not compiled with long long int support" << std::endl; - #endif +#endif // // SET TEST PARAMETERS @@ -153,56 +154,74 @@ int main(int argc, char *argv[]) { // Default is Laplace1D with nx = 8748. // It's a nice size for 1D and perfect aggregation. (6561 = 3^8) - //Nice size for 1D and perfect aggregation on small numbers of processors. (8748 = 4*3^7) - Galeri::Xpetra::Parameters matrixParameters(clp, 8748); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra + // Nice size for 1D and perfect aggregation on small numbers of processors. (8748 = 4*3^7) + Galeri::Xpetra::Parameters matrixParameters(clp, 8748); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra // Custom command line parameters - int optDump = 0; clp.setOption("dump", &optDump, "write matrix to file"); - int optTimings = 0; clp.setOption("timings", &optTimings, "print timings to screen"); + int optDump = 0; + clp.setOption("dump", &optDump, "write matrix to file"); + int optTimings = 0; + clp.setOption("timings", &optTimings, "print timings to screen"); // - Levels - LO optMaxLevels = 10; clp.setOption("maxLevels", &optMaxLevels, "maximum number of levels allowed"); - int optMaxCoarseSize = 50; clp.setOption("maxCoarseSize", &optMaxCoarseSize, "maximum #dofs in coarse operator"); //FIXME clp doesn't like long long int + LO optMaxLevels = 10; + clp.setOption("maxLevels", &optMaxLevels, "maximum number of levels allowed"); + int optMaxCoarseSize = 50; + clp.setOption("maxCoarseSize", &optMaxCoarseSize, "maximum #dofs in coarse operator"); // FIXME clp doesn't like long long int // - Smoothed-Aggregation - Scalar optSaDamping = 4./3; clp.setOption("saDamping", &optSaDamping, "prolongator damping factor"); + Scalar optSaDamping = 4. / 3; + clp.setOption("saDamping", &optSaDamping, "prolongator damping factor"); // - Aggregation - std::string optAggOrdering = "natural"; clp.setOption("aggOrdering", &optAggOrdering, "aggregation ordering strategy (natural, random, graph)"); - int optMinPerAgg = 2; clp.setOption("minPerAgg", &optMinPerAgg, "minimum #DOFs per aggregate"); - int optMaxNbrSel = 0; clp.setOption("maxNbrSel", &optMaxNbrSel, "maximum # of nbrs allowed to be in other aggregates"); + std::string optAggOrdering = "natural"; + clp.setOption("aggOrdering", &optAggOrdering, "aggregation ordering strategy (natural, random, graph)"); + int optMinPerAgg = 2; + clp.setOption("minPerAgg", &optMinPerAgg, "minimum #DOFs per aggregate"); + int optMaxNbrSel = 0; + clp.setOption("maxNbrSel", &optMaxNbrSel, "maximum # of nbrs allowed to be in other aggregates"); // - R - int optExplicitR = 1; clp.setOption("explicitR", &optExplicitR, "restriction will be explicitly stored as transpose of prolongator"); + int optExplicitR = 1; + clp.setOption("explicitR", &optExplicitR, "restriction will be explicitly stored as transpose of prolongator"); // - Smoothers - std::string optSmooType = "sgs"; clp.setOption("smooType", &optSmooType, "smoother type ('l1-sgs', 'sgs 'or 'cheby')"); - int optSweeps = 2; clp.setOption("sweeps", &optSweeps, "sweeps to be used in SGS (or Chebyshev degree)"); + std::string optSmooType = "sgs"; + clp.setOption("smooType", &optSmooType, "smoother type ('l1-sgs', 'sgs 'or 'cheby')"); + int optSweeps = 2; + clp.setOption("sweeps", &optSweeps, "sweeps to be used in SGS (or Chebyshev degree)"); // - Repartitioning #if defined(HAVE_MPI) && defined(HAVE_MUELU_ZOLTAN) - int optRepartition = 1; clp.setOption("repartition", &optRepartition, "enable repartitioning (0=no repartitioning, 1=Zoltan RCB, 2=Isorropia+Zoltan PHG"); - LO optMinRowsPerProc = 2000; clp.setOption("minRowsPerProc", &optMinRowsPerProc, "min #rows allowable per proc before repartitioning occurs"); - double optNnzImbalance = 1.2; clp.setOption("nnzImbalance", &optNnzImbalance, "max allowable nonzero imbalance before repartitioning occurs"); + int optRepartition = 1; + clp.setOption("repartition", &optRepartition, "enable repartitioning (0=no repartitioning, 1=Zoltan RCB, 2=Isorropia+Zoltan PHG"); + LO optMinRowsPerProc = 2000; + clp.setOption("minRowsPerProc", &optMinRowsPerProc, "min #rows allowable per proc before repartitioning occurs"); + double optNnzImbalance = 1.2; + clp.setOption("nnzImbalance", &optNnzImbalance, "max allowable nonzero imbalance before repartitioning occurs"); #else int optRepartition = 0; -#endif // HAVE_MPI && HAVE_MUELU_ZOLTAN +#endif // HAVE_MPI && HAVE_MUELU_ZOLTAN // - Solve - int optFixPoint = 1; clp.setOption("fixPoint", &optFixPoint, "apply multigrid as solver"); - int optPrecond = 1; clp.setOption("precond", &optPrecond, "apply multigrid as preconditioner"); - LO optIts = 10; clp.setOption("its", &optIts, "number of multigrid cycles"); - double optTol = 1e-7; clp.setOption("tol", &optTol, "stopping tolerance for Krylov method"); + int optFixPoint = 1; + clp.setOption("fixPoint", &optFixPoint, "apply multigrid as solver"); + int optPrecond = 1; + clp.setOption("precond", &optPrecond, "apply multigrid as preconditioner"); + LO optIts = 10; + clp.setOption("its", &optIts, "number of multigrid cycles"); + double optTol = 1e-7; + clp.setOption("tol", &optTol, "stopping tolerance for Krylov method"); switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; - case Teuchos::CommandLineProcessor::PARSE_ERROR: - case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; + case Teuchos::CommandLineProcessor::PARSE_ERROR: + case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } - RCP globalTimeMonitor = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: S - Global Time"))); + RCP globalTimeMonitor = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: S - Global Time"))); matrixParameters.check(); xpetraParameters.check(); @@ -226,17 +245,15 @@ int main(int argc, char *argv[]) { TimeMonitor tm(*TimeMonitor::getNewTimer("ScalingTest: 1 - Matrix Build")); map = MapFactory::Build(lib, matrixParameters.GetNumGlobalElements(), 0, comm); - Teuchos::RCP > Pr = - Galeri::Xpetra::BuildProblem(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList()); //TODO: Matrix vs. CrsMatrixWrap + Teuchos::RCP > Pr = + Galeri::Xpetra::BuildProblem(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList()); // TODO: Matrix vs. CrsMatrixWrap A = Pr->BuildMatrix(); if (matrixParameters.GetMatrixType() == "Laplace1D") { coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", map, matrixParameters.GetParameterList()); - } - else if (matrixParameters.GetMatrixType() == "Laplace2D") { + } else if (matrixParameters.GetMatrixType() == "Laplace2D") { coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", map, matrixParameters.GetParameterList()); - } - else if (matrixParameters.GetMatrixType() == "Laplace3D") { + } else if (matrixParameters.GetMatrixType() == "Laplace3D") { coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", map, matrixParameters.GetParameterList()); } } @@ -253,7 +270,7 @@ int main(int argc, char *argv[]) { // USER GUIDE // define near null space RCP nullspace = MultiVectorFactory::Build(map, 1); - nullspace->putScalar( (SC) 1.0); + nullspace->putScalar((SC)1.0); // USER GUIDE // Teuchos::Array::magnitudeType> norms(1); @@ -281,7 +298,7 @@ int main(int argc, char *argv[]) { // USER GUIDE // instantiate new Hierarchy object H = rcp(new Hierarchy()); H->setDefaultVerbLevel(Teuchos::VERB_HIGH); - H->SetMaxCoarseSize((GO) optMaxCoarseSize); + H->SetMaxCoarseSize((GO)optMaxCoarseSize); // USER GUIDE // // @@ -291,9 +308,9 @@ int main(int argc, char *argv[]) { // USER GUIDE // create a fine level object RCP Finest = H->GetLevel(); Finest->setDefaultVerbLevel(Teuchos::VERB_HIGH); - Finest->Set("A", A); - Finest->Set("Nullspace", nullspace); - Finest->Set("Coordinates", coordinates); //FIXME: XCoordinates, YCoordinates, .. + Finest->Set("A", A); + Finest->Set("Nullspace", nullspace); + Finest->Set("Coordinates", coordinates); // FIXME: XCoordinates, YCoordinates, .. // USER GUIDE // // @@ -314,20 +331,25 @@ int main(int argc, char *argv[]) { *out << "========================= Aggregate option summary =========================" << std::endl; *out << "min DOFs per aggregate : " << optMinPerAgg << std::endl; *out << "min # of root nbrs already aggregated : " << optMaxNbrSel << std::endl; - AggregationFact->SetMinNodesPerAggregate(optMinPerAgg); //TODO should increase if run anything othpermRFacter than 1D + AggregationFact->SetMinNodesPerAggregate(optMinPerAgg); // TODO should increase if run anything othpermRFacter than 1D AggregationFact->SetMaxNeighAlreadySelected(optMaxNbrSel); std::transform(optAggOrdering.begin(), optAggOrdering.end(), optAggOrdering.begin(), ::tolower); if (optAggOrdering == "natural" || optAggOrdering == "random" || optAggOrdering == "graph") { *out << "aggregate ordering : " << optAggOrdering << std::endl; AggregationFact->SetOrdering(optAggOrdering); } else { - std::string msg = "main: bad aggregation option """ + optAggOrdering + """."; + std::string msg = + "main: bad aggregation option " + "" + + optAggOrdering + + "" + "."; throw(MueLu::Exceptions::RuntimeError(msg)); } - //AggregationFact->SetPhase3AggCreation(0.5); + // AggregationFact->SetPhase3AggCreation(0.5); M.SetFactory("Aggregates", AggregationFact); - *out << "=============================================================================" << std::endl; + *out << "=============================================================================" << std::endl; } // @@ -343,7 +365,7 @@ int main(int argc, char *argv[]) { RCP PFact = rcp(new SaPFactory()); PFact->SetParameter("sa: damping factor", ParameterEntry(optSaDamping)); - RCP RFact = rcp(new TransPFactory()); + RCP RFact = rcp(new TransPFactory()); RCP AFact = rcp(new RAPFactory()); AFact->setVerbLevel(Teuchos::VERB_HIGH); @@ -354,7 +376,8 @@ int main(int argc, char *argv[]) { ParameterList Aclist = *(AFact->GetValidParameterList()); Aclist.set("transpose: use implicit", true); AFact->SetParameterList(Aclist); - if (comm->getRank() == 0) std::cout << "\n\n* ***** USING IMPLICIT RESTRICTION OPERATOR ***** *\n" << std::endl; + if (comm->getRank() == 0) std::cout << "\n\n* ***** USING IMPLICIT RESTRICTION OPERATOR ***** *\n" + << std::endl; } // @@ -383,13 +406,12 @@ int main(int argc, char *argv[]) { // Transfer coordinates RCP TransferCoordinatesFact = rcp(new CoordinatesTransferFactory()); - AFact->AddTransferFactory(TransferCoordinatesFact); // FIXME REMOVE + AFact->AddTransferFactory(TransferCoordinatesFact); // FIXME REMOVE // Compute partition (creates "Partition" object) - if(optRepartition == 1) { // use plain Zoltan Interface - - } else if (optRepartition == 2) { // use Isorropia + Zoltan interface + if (optRepartition == 1) { // use plain Zoltan Interface + } else if (optRepartition == 2) { // use Isorropia + Zoltan interface } // Repartitioning (creates "Importer" from "Partition") @@ -402,13 +424,12 @@ int main(int argc, char *argv[]) { } RepartitionFact->SetFactory("A", AFact); - if(optRepartition == 1) { + if (optRepartition == 1) { RCP ZoltanFact = rcp(new ZoltanInterface()); ZoltanFact->SetFactory("A", AFact); ZoltanFact->SetFactory("Coordinates", TransferCoordinatesFact); RepartitionFact->SetFactory("Partition", ZoltanFact); - } - else if(optRepartition == 2) { + } else if (optRepartition == 2) { #if defined(HAVE_MPI) && defined(HAVE_MUELU_ISORROPIA) RCP > isoInterface = rcp(new MueLu::IsorropiaInterface()); isoInterface->SetFactory("A", AFact); @@ -421,13 +442,12 @@ int main(int argc, char *argv[]) { #endif } - // Reordering of the transfer operators RCP RebalancedPFact = rcp(new RebalanceTransferFactory()); RebalancedPFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Interpolation"))); RebalancedPFact->SetFactory("P", PFact); RebalancedPFact->SetFactory("Coordinates", TransferCoordinatesFact); - RebalancedPFact->SetFactory("Nullspace", M.GetFactory("Ptent")); // TODO + RebalancedPFact->SetFactory("Nullspace", M.GetFactory("Ptent")); // TODO RCP RebalancedRFact = rcp(new RebalanceTransferFactory()); RebalancedRFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Restriction"))); @@ -441,16 +461,16 @@ int main(int argc, char *argv[]) { M.SetFactory("A", RebalancedAFact); M.SetFactory("P", RebalancedPFact); M.SetFactory("R", RebalancedRFact); - M.SetFactory("Nullspace", RebalancedPFact); + M.SetFactory("Nullspace", RebalancedPFact); M.SetFactory("Coordinates", RebalancedPFact); - M.SetFactory("Importer", RepartitionFact); + M.SetFactory("Importer", RepartitionFact); #else TEUCHOS_TEST_FOR_EXCEPT(true); #endif - } // optRepartition + } // optRepartition - } // Transfer + } // Transfer // // Smoothers @@ -460,8 +480,8 @@ int main(int argc, char *argv[]) { // USER GUIDE // define smoother object std::string ifpackType; Teuchos::ParameterList ifpackList; - ifpackList.set("relaxation: sweeps", (LO) optSweeps); - ifpackList.set("relaxation: damping factor", (SC) 1.0); + ifpackList.set("relaxation: sweeps", (LO)optSweeps); + ifpackList.set("relaxation: damping factor", (SC)1.0); if (optSmooType == "sgs") { ifpackType = "RELAXATION"; ifpackList.set("relaxation: type", "Symmetric Gauss-Seidel"); @@ -473,16 +493,14 @@ int main(int argc, char *argv[]) { ifpackList.set("relaxation: use l1", true); } else if (optSmooType == "cheby") { ifpackType = "CHEBYSHEV"; - ifpackList.set("chebyshev: degree", (LO) optSweeps); + ifpackList.set("chebyshev: degree", (LO)optSweeps); if (matrixParameters.GetMatrixType() == "Laplace1D") { - ifpackList.set("chebyshev: ratio eigenvalue", (SC) 3); - } - else if (matrixParameters.GetMatrixType() == "Laplace2D") { - ifpackList.set("chebyshev: ratio eigenvalue", (SC) 7); - } - else if (matrixParameters.GetMatrixType() == "Laplace3D") { - ifpackList.set("chebyshev: ratio eigenvalue", (SC) 20); + ifpackList.set("chebyshev: ratio eigenvalue", (SC)3); + } else if (matrixParameters.GetMatrixType() == "Laplace2D") { + ifpackList.set("chebyshev: ratio eigenvalue", (SC)7); + } else if (matrixParameters.GetMatrixType() == "Laplace3D") { + ifpackList.set("chebyshev: ratio eigenvalue", (SC)20); } // ifpackList.set("chebyshev: max eigenvalue", (double) -1.0); // ifpackList.set("chebyshev: min eigenvalue", (double) 1.0); @@ -503,7 +521,7 @@ int main(int argc, char *argv[]) { H->Setup(M, startLevel, optMaxLevels); // USER GUIDE // - } // end of Setup TimeMonitor + } // end of Setup TimeMonitor /*{ // some debug output // print out content of levels @@ -529,7 +547,7 @@ int main(int argc, char *argv[]) { X->randomize(); A->apply(*X, *B, Teuchos::NO_TRANS, (SC)1.0, (SC)0.0); B->norm2(norms); - B->scale(1.0/norms[0]); + B->scale(1.0 / norms[0]); // USER GUIDE // // @@ -537,15 +555,14 @@ int main(int argc, char *argv[]) { // if (optFixPoint) { - - X->putScalar( (SC) 0.0); + X->putScalar((SC)0.0); TimeMonitor tm(*TimeMonitor::getNewTimer("ScalingTest: 3 - Fixed Point Solve")); H->IsPreconditioner(false); H->Iterate(*B, *X, optIts); - } // optFixedPt + } // optFixedPt // // Use AMG as a preconditioner in Belos @@ -554,27 +571,27 @@ int main(int argc, char *argv[]) { #ifdef HAVE_MUELU_BELOS if (optPrecond) { - RCP tm; - tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 5 - Belos Solve"))); + tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 5 - Belos Solve"))); // USER GUIDE // Operator and Multivector type that will be used with Belos - typedef MultiVector MV; + typedef MultiVector MV; typedef Belos::OperatorT OP; H->IsPreconditioner(true); // Define Operator and Preconditioner - Teuchos::RCP belosOp = Teuchos::rcp(new Belos::XpetraOp(A)); // Turns a Xpetra::Operator object into a Belos operator - Teuchos::RCP belosPrec = Teuchos::rcp(new Belos::MueLuOp(H)); // Turns a MueLu::Hierarchy object into a Belos operator + Teuchos::RCP belosOp = Teuchos::rcp(new Belos::XpetraOp(A)); // Turns a Xpetra::Operator object into a Belos operator + Teuchos::RCP belosPrec = Teuchos::rcp(new Belos::MueLuOp(H)); // Turns a MueLu::Hierarchy object into a Belos operator // Construct a Belos LinearProblem object - RCP< Belos::LinearProblem > belosProblem = rcp(new Belos::LinearProblem(belosOp, X, B)); + RCP > belosProblem = rcp(new Belos::LinearProblem(belosOp, X, B)); belosProblem->setLeftPrec(belosPrec); bool set = belosProblem->setProblem(); if (set == false) { if (comm->getRank() == 0) - std::cout << std::endl << "ERROR: Belos::LinearProblem failed to set up correctly!" << std::endl; + std::cout << std::endl + << "ERROR: Belos::LinearProblem failed to set up correctly!" << std::endl; return EXIT_FAILURE; } // USER GUIDE // @@ -582,15 +599,15 @@ int main(int argc, char *argv[]) { // USER GUIDE // Belos parameter list int maxIts = 100; Teuchos::ParameterList belosList; - belosList.set("Maximum Iterations", maxIts); // Maximum number of iterations allowed - belosList.set("Convergence Tolerance", optTol); // Relative convergence tolerance requested - //belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::TimingDetails + Belos::StatusTestDetails); + belosList.set("Maximum Iterations", maxIts); // Maximum number of iterations allowed + belosList.set("Convergence Tolerance", optTol); // Relative convergence tolerance requested + // belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::TimingDetails + Belos::StatusTestDetails); belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); belosList.set("Output Frequency", 1); belosList.set("Output Style", Belos::Brief); // Create an iterative solver manager - RCP< Belos::SolverManager > solver = rcp(new Belos::BlockCGSolMgr(belosProblem, rcp(&belosList, false))); + RCP > solver = rcp(new Belos::BlockCGSolMgr(belosProblem, rcp(&belosList, false))); // USER GUIDE // // Perform solve @@ -601,7 +618,7 @@ int main(int argc, char *argv[]) { // USER GUIDE // solve linear system ret = solver->solve(); // USER GUIDE // - } // end of TimeMonitor + } // end of TimeMonitor // Get the number of iterations for this solve. if (comm->getRank() == 0) @@ -609,49 +626,53 @@ int main(int argc, char *argv[]) { // Compute actual residuals. int numrhs = 1; - std::vector actual_resids( numrhs ); //TODO: double? - std::vector rhs_norm( numrhs ); + std::vector actual_resids(numrhs); // TODO: double? + std::vector rhs_norm(numrhs); RCP resid = MultiVectorFactory::Build(map, numrhs); - typedef Belos::OperatorTraits OPT; - typedef Belos::MultiVecTraits MVT; - - OPT::Apply( *belosOp, *X, *resid ); - MVT::MvAddMv( -1.0, *resid, 1.0, *B, *resid ); - MVT::MvNorm( *resid, actual_resids ); - MVT::MvNorm( *B, rhs_norm ); - *out<< "---------- Actual Residuals (normalized) ----------"< tol) { badRes = true; } + typedef Belos::OperatorTraits OPT; + typedef Belos::MultiVecTraits MVT; + + OPT::Apply(*belosOp, *X, *resid); + MVT::MvAddMv(-1.0, *resid, 1.0, *B, *resid); + MVT::MvNorm(*resid, actual_resids); + MVT::MvNorm(*B, rhs_norm); + *out << "---------- Actual Residuals (normalized) ----------" << std::endl + << std::endl; + for (int i = 0; i < numrhs; i++) { + double actRes = actual_resids[i] / rhs_norm[i]; + *out << "Problem " << i << " : \t" << actRes << std::endl; + // if (actRes > tol) { badRes = true; } } - } //try + } // try - catch(...) { + catch (...) { if (comm->getRank() == 0) - std::cout << std::endl << "ERROR: Belos threw an error! " << std::endl; + std::cout << std::endl + << "ERROR: Belos threw an error! " << std::endl; } // USER GUIDE // Check convergence if (ret != Belos::Converged) { - if (comm->getRank() == 0) std::cout << std::endl << "ERROR: Belos did not converge! " << std::endl; + if (comm->getRank() == 0) std::cout << std::endl + << "ERROR: Belos did not converge! " << std::endl; } else { - if (comm->getRank() == 0) std::cout << std::endl << "SUCCESS: Belos converged!" << std::endl; + if (comm->getRank() == 0) std::cout << std::endl + << "SUCCESS: Belos converged!" << std::endl; } // USER GUIDE // tm = Teuchos::null; - } //if (optPrecond) + } // if (optPrecond) -#endif // HAVE_MUELU_BELOS +#endif // HAVE_MUELU_BELOS // // Timer final summaries // - globalTimeMonitor = Teuchos::null; // stop this timer before summary + globalTimeMonitor = Teuchos::null; // stop this timer before summary if (optTimings) TimeMonitor::summarize(); diff --git a/packages/muelu/doc/Tutorial/src/ScalingTestParamList.cpp b/packages/muelu/doc/Tutorial/src/ScalingTestParamList.cpp index bf6be6ec1e0d..8285963777d5 100644 --- a/packages/muelu/doc/Tutorial/src/ScalingTestParamList.cpp +++ b/packages/muelu/doc/Tutorial/src/ScalingTestParamList.cpp @@ -49,7 +49,7 @@ #include -#include // For Epetra only runs this points to FakeKokkos in Xpetra +#include // For Epetra only runs this points to FakeKokkos in Xpetra #include "Xpetra_ConfigDefs.hpp" #include @@ -65,7 +65,7 @@ #include #include #include -#include // TODO: move into MueLu.hpp +#include // TODO: move into MueLu.hpp #include @@ -77,8 +77,8 @@ #include #include #include -#include // => This header defines Belos::XpetraOp -#include // => This header defines Belos::MueLuOp +#include // => This header defines Belos::XpetraOp +#include // => This header defines Belos::MueLuOp #endif // Define default data types @@ -87,20 +87,20 @@ typedef int LocalOrdinal; typedef int GlobalOrdinal; typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType Node; -int main(int argc, char *argv[]) { +int main(int argc, char* argv[]) { #include + using Teuchos::ArrayRCP; + using Teuchos::ParameterList; using Teuchos::RCP; using Teuchos::rcp; - using Teuchos::ArrayRCP; using Teuchos::TimeMonitor; - using Teuchos::ParameterList; // ========================================================================= // MPI initialization using Teuchos // ========================================================================= Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL); - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); + RCP > comm = Teuchos::DefaultComm::getComm(); // ========================================================================= // Convenient definitions @@ -114,28 +114,40 @@ int main(int argc, char *argv[]) { Teuchos::CommandLineProcessor clp(false); GO nx = 100, ny = 100, nz = 100; - Galeri::Xpetra::Parameters galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra - - std::string xmlFileName = "scalingTest.xml"; clp.setOption("xml", &xmlFileName, "read parameters from a file [default = 'scalingTest.xml']"); - bool printTimings = true; clp.setOption("timings", "notimings", &printTimings, "print timings to screen"); - int writeMatricesOPT = -2; clp.setOption("write", &writeMatricesOPT, "write matrices to file (-1 means all; i>=0 means level i)"); - std::string dsolveType = "cg", solveType; clp.setOption("solver", &dsolveType, "solve type: (none | cg | gmres | standalone)"); - double dtol = 1e-12, tol; clp.setOption("tol", &dtol, "solver convergence tolerance"); - - std::string mapFile; clp.setOption("map", &mapFile, "map data file"); - std::string matrixFile; clp.setOption("matrix", &matrixFile, "matrix data file"); - std::string coordFile; clp.setOption("coords", &coordFile, "coordinates data file"); - std::string nullFile; clp.setOption("nullspace", &nullFile, "nullspace data file"); - int numRebuilds = 0; clp.setOption("rebuild", &numRebuilds, "#times to rebuild hierarchy"); - int maxIts = 200; clp.setOption("its", &maxIts, "maximum number of solver iterations"); - bool scaleResidualHistory = true; clp.setOption("scale", "noscale", &scaleResidualHistory, "scaled Krylov residual history"); + Galeri::Xpetra::Parameters galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra + + std::string xmlFileName = "scalingTest.xml"; + clp.setOption("xml", &xmlFileName, "read parameters from a file [default = 'scalingTest.xml']"); + bool printTimings = true; + clp.setOption("timings", "notimings", &printTimings, "print timings to screen"); + int writeMatricesOPT = -2; + clp.setOption("write", &writeMatricesOPT, "write matrices to file (-1 means all; i>=0 means level i)"); + std::string dsolveType = "cg", solveType; + clp.setOption("solver", &dsolveType, "solve type: (none | cg | gmres | standalone)"); + double dtol = 1e-12, tol; + clp.setOption("tol", &dtol, "solver convergence tolerance"); + + std::string mapFile; + clp.setOption("map", &mapFile, "map data file"); + std::string matrixFile; + clp.setOption("matrix", &matrixFile, "matrix data file"); + std::string coordFile; + clp.setOption("coords", &coordFile, "coordinates data file"); + std::string nullFile; + clp.setOption("nullspace", &nullFile, "nullspace data file"); + int numRebuilds = 0; + clp.setOption("rebuild", &numRebuilds, "#times to rebuild hierarchy"); + int maxIts = 200; + clp.setOption("its", &maxIts, "maximum number of solver iterations"); + bool scaleResidualHistory = true; + clp.setOption("scale", "noscale", &scaleResidualHistory, "scaled Krylov residual history"); switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } Xpetra::UnderlyingLib lib = xpetraParameters.GetLib(); @@ -166,12 +178,13 @@ int main(int argc, char *argv[]) { RCP globalTimeMonitor = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: S - Global Time"))); RCP tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 1 - Matrix Build"))); - RCP A; - RCP map; + RCP A; + RCP map; RCP coordinates; RCP nullspace; if (matrixFile.empty()) { - galeriStream << "========================================================\n" << xpetraParameters << galeriParameters; + galeriStream << "========================================================\n" + << xpetraParameters << galeriParameters; // Galeri will attempt to create a square-as-possible distribution of subdomains di, e.g., // d1 d2 d3 @@ -188,24 +201,24 @@ int main(int argc, char *argv[]) { // In the future, we hope to be able to first create a Galeri problem, and then request map and coordinates from it // At the moment, however, things are fragile as we hope that the Problem uses same map and coordinates inside if (matrixType == "Laplace1D") { - map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian1D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", map, galeriList); + map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian1D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", map, galeriList); } else if (matrixType == "Laplace2D" || matrixType == "Star2D" || matrixType == "BigStar2D" || matrixType == "Elasticity2D") { - map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian2D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", map, galeriList); + map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian2D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", map, galeriList); } else if (matrixType == "Laplace3D" || matrixType == "Brick3D" || matrixType == "Elasticity3D") { - map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian3D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", map, galeriList); + map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian3D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", map, galeriList); } // Expand map to do multiple DOF per node for block problems if (matrixType == "Elasticity2D") - map = Xpetra::MapFactory::Build(map, 2); + map = Xpetra::MapFactory::Build(map, 2); if (matrixType == "Elasticity3D") - map = Xpetra::MapFactory::Build(map, 3); + map = Xpetra::MapFactory::Build(map, 3); galeriStream << "Processor subdomains in x direction: " << galeriList.get("mx") << std::endl << "Processor subdomains in y direction: " << galeriList.get("my") << std::endl @@ -214,15 +227,15 @@ int main(int argc, char *argv[]) { if (matrixType == "Elasticity2D" || matrixType == "Elasticity3D") { // Our default test case for elasticity: all boundaries of a square/cube have Neumann b.c. except left which has Dirichlet - galeriList.set("right boundary" , "Neumann"); + galeriList.set("right boundary", "Neumann"); galeriList.set("bottom boundary", "Neumann"); - galeriList.set("top boundary" , "Neumann"); - galeriList.set("front boundary" , "Neumann"); - galeriList.set("back boundary" , "Neumann"); + galeriList.set("top boundary", "Neumann"); + galeriList.set("front boundary", "Neumann"); + galeriList.set("back boundary", "Neumann"); } - RCP > Pr = - Galeri::Xpetra::BuildProblem(galeriParameters.GetMatrixType(), map, galeriList); + RCP > Pr = + Galeri::Xpetra::BuildProblem(galeriParameters.GetMatrixType(), map, galeriList); A = Pr->BuildMatrix(); if (matrixType == "Elasticity2D" || @@ -243,7 +256,7 @@ int main(int argc, char *argv[]) { // Tpetra matrix reader is still broken, so instead we read in // a matrix in a binary format and then redistribute it const bool binaryFormat = true; - A = Utils::Read(matrixFile, lib, comm, binaryFormat); + A = Utils::Read(matrixFile, lib, comm, binaryFormat); if (!map.is_null()) { RCP newMatrix = MatrixFactory::Build(map, 1); @@ -280,11 +293,11 @@ int main(int argc, char *argv[]) { bool stop = false; if (isDriver) { - runList = paramList.sublist("Run1", mustAlreadyExist); - mueluList = runList .sublist("MueLu", mustAlreadyExist); + runList = paramList.sublist("Run1", mustAlreadyExist); + mueluList = runList.sublist("MueLu", mustAlreadyExist); } else { mueluList = paramList; - stop = true; + stop = true; } if (nullspace.is_null()) { @@ -303,13 +316,13 @@ int main(int argc, char *argv[]) { nullspace = MultiVectorFactory::Build(map, blkSize); for (int i = 0; i < blkSize; i++) { RCP domainMap = A->getDomainMap(); - GO indexBase = domainMap->getIndexBase(); + GO indexBase = domainMap->getIndexBase(); ArrayRCP nsData = nullspace->getDataNonConst(i); for (int j = 0; j < nsData.size(); j++) { GO GID = domainMap->getGlobalElement(j) - indexBase; - if ((GID-i) % blkSize == 0) + if ((GID - i) % blkSize == 0) nsData[j] = Teuchos::ScalarTraits::one(); } } @@ -322,7 +335,7 @@ int main(int argc, char *argv[]) { solveType = dsolveType; tol = dtol; - int savedOut = -1; + int savedOut = -1; FILE* openedOut = NULL; if (isDriver) { if (runList.isParameter("filename")) { @@ -339,12 +352,12 @@ int main(int argc, char *argv[]) { dup2(fileno(openedOut), STDOUT_FILENO); } if (runList.isParameter("solver")) solveType = runList.get("solver"); - if (runList.isParameter("tol")) tol = runList.get ("tol"); + if (runList.isParameter("tol")) tol = runList.get("tol"); } // Instead of checking each time for rank, create a rank 0 stream RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& out = *fancy; + Teuchos::FancyOStream& out = *fancy; out.setOutputToRootOnly(0); out << galeriStream.str(); @@ -366,12 +379,12 @@ int main(int argc, char *argv[]) { A->SetMaxEigenvalueEstimate(-one); //============================================ SPLIT H = mueLuFactory->CreateHierarchy(); - H->GetLevel(0)->Set("A", A); - H->GetLevel(0)->Set("Nullspace", nullspace); + H->GetLevel(0)->Set("A", A); + H->GetLevel(0)->Set("Nullspace", nullspace); if (!coordinates.is_null()) H->GetLevel(0)->Set("Coordinates", coordinates); mueLuFactory->SetupHierarchy(*H); - //============================================ SPLIT + //============================================ SPLIT } comm->barrier(); @@ -394,7 +407,7 @@ int main(int argc, char *argv[]) { Teuchos::Array norms(1); B->norm2(norms); - B->scale(one/norms[0]); + B->scale(one / norms[0]); X->putScalar(zero); } tm = Teuchos::null; @@ -410,7 +423,7 @@ int main(int argc, char *argv[]) { // Do not perform a solve } else if (solveType == "standalone") { - tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 4 - Fixed Point Solve"))); + tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 4 - Fixed Point Solve"))); H->IsPreconditioner(false); H->Iterate(*B, *X, maxIts); @@ -420,17 +433,17 @@ int main(int argc, char *argv[]) { tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 5 - Belos Solve"))); // Operator and Multivector type that will be used with Belos - typedef MultiVector MV; + typedef MultiVector MV; typedef Belos::OperatorT OP; H->IsPreconditioner(true); // Define Operator and Preconditioner - Teuchos::RCP belosOp = Teuchos::rcp(new Belos::XpetraOp(A)); // Turns a Xpetra::Matrix object into a Belos operator - Teuchos::RCP belosPrec = Teuchos::rcp(new Belos::MueLuOp (H)); // Turns a MueLu::Hierarchy object into a Belos operator + Teuchos::RCP belosOp = Teuchos::rcp(new Belos::XpetraOp(A)); // Turns a Xpetra::Matrix object into a Belos operator + Teuchos::RCP belosPrec = Teuchos::rcp(new Belos::MueLuOp(H)); // Turns a MueLu::Hierarchy object into a Belos operator // Construct a Belos LinearProblem object - RCP< Belos::LinearProblem > belosProblem = rcp(new Belos::LinearProblem(belosOp, X, B)); + RCP > belosProblem = rcp(new Belos::LinearProblem(belosOp, X, B)); belosProblem->setRightPrec(belosPrec); bool set = belosProblem->setProblem(); @@ -441,18 +454,18 @@ int main(int argc, char *argv[]) { // Belos parameter list Teuchos::ParameterList belosList; - belosList.set("Maximum Iterations", maxIts); // Maximum number of iterations allowed - belosList.set("Convergence Tolerance", tol); // Relative convergence tolerance requested - belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); - belosList.set("Output Frequency", 1); - belosList.set("Output Style", Belos::Brief); + belosList.set("Maximum Iterations", maxIts); // Maximum number of iterations allowed + belosList.set("Convergence Tolerance", tol); // Relative convergence tolerance requested + belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); + belosList.set("Output Frequency", 1); + belosList.set("Output Style", Belos::Brief); if (!scaleResidualHistory) belosList.set("Implicit Residual Scaling", "None"); // Create an iterative solver manager - RCP< Belos::SolverManager > solver; + RCP > solver; if (solveType == "cg") { - solver = rcp(new Belos::PseudoBlockCGSolMgr (belosProblem, rcp(&belosList, false))); + solver = rcp(new Belos::PseudoBlockCGSolMgr(belosProblem, rcp(&belosList, false))); } else if (solveType == "gmres") { solver = rcp(new Belos::BlockGmresSolMgr(belosProblem, rcp(&belosList, false))); } @@ -465,28 +478,30 @@ int main(int argc, char *argv[]) { // Get the number of iterations for this solve. out << "Number of iterations performed for this solve: " << solver->getNumIters() << std::endl; - } - catch(const std::exception& ex) - { - out << std::endl << "ERROR: Belos threw an error! The exception message is:" << std::endl; + } catch (const std::exception& ex) { + out << std::endl + << "ERROR: Belos threw an error! The exception message is:" << std::endl; std::cout << ex.what() << std::endl; } - - catch(...) { - out << std::endl << "ERROR: Belos threw an unknown error! " << std::endl; + + catch (...) { + out << std::endl + << "ERROR: Belos threw an unknown error! " << std::endl; } // Check convergence if (ret != Belos::Converged) - out << std::endl << "ERROR: Belos did not converge! " << std::endl; + out << std::endl + << "ERROR: Belos did not converge! " << std::endl; else - out << std::endl << "SUCCESS: Belos converged!" << std::endl; -#endif //ifdef HAVE_MUELU_BELOS + out << std::endl + << "SUCCESS: Belos converged!" << std::endl; +#endif // ifdef HAVE_MUELU_BELOS } else { throw MueLu::Exceptions::RuntimeError("Unknown solver type: \"" + solveType + "\""); } comm->barrier(); - tm = Teuchos::null; + tm = Teuchos::null; globalTimeMonitor = Teuchos::null; if (printTimings) @@ -502,7 +517,7 @@ int main(int argc, char *argv[]) { } try { runList = paramList.sublist("Run" + MueLu::toString(++runCount), mustAlreadyExist); - mueluList = runList .sublist("MueLu", mustAlreadyExist); + mueluList = runList.sublist("MueLu", mustAlreadyExist); } catch (std::exception) { stop = true; } @@ -511,6 +526,5 @@ int main(int argc, char *argv[]) { } while (stop == false); } - return 0; -} //main +} // main diff --git a/packages/muelu/doc/Tutorial/src/laplace2d.cpp b/packages/muelu/doc/Tutorial/src/laplace2d.cpp index 93f2cdad2302..6d7ae40be5fc 100644 --- a/packages/muelu/doc/Tutorial/src/laplace2d.cpp +++ b/packages/muelu/doc/Tutorial/src/laplace2d.cpp @@ -73,7 +73,7 @@ #include #include #include -#include // TODO: move into MueLu.hpp +#include // TODO: move into MueLu.hpp #include @@ -95,11 +95,11 @@ typedef int GlobalOrdinal; typedef Xpetra::EpetraNode Node; #endif -int main(int argc, char *argv[]) { +int main(int argc, char* argv[]) { #if defined(HAVE_MUELU_EPETRA) #include - using Teuchos::RCP; // reference count pointers + using Teuchos::RCP; // reference count pointers using Teuchos::rcp; using Teuchos::TimeMonitor; @@ -108,9 +108,9 @@ int main(int argc, char *argv[]) { bool success = false; try { - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); - int MyPID = comm->getRank(); - int NumProc = comm->getSize(); + RCP > comm = Teuchos::DefaultComm::getComm(); + int MyPID = comm->getRank(); + int NumProc = comm->getSize(); const Teuchos::RCP epComm = Teuchos::rcp_const_cast(Xpetra::toEpetra(comm)); @@ -118,33 +118,38 @@ int main(int argc, char *argv[]) { // ================================ // Convenient definitions // ================================ - //SC zero = Teuchos::ScalarTraits::zero(); + // SC zero = Teuchos::ScalarTraits::zero(); SC one = Teuchos::ScalarTraits::one(); // Instead of checking each time for rank, create a rank 0 stream RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& fancyout = *fancy; + Teuchos::FancyOStream& fancyout = *fancy; fancyout.setOutputToRootOnly(0); - - // ================================ // Parameters initialization // ================================ Teuchos::CommandLineProcessor clp(false); - GO nx = 100; clp.setOption("nx", &nx, "mesh size in x direction"); - GO ny = 100; clp.setOption("ny", &ny, "mesh size in y direction"); - std::string xmlFileName = "xml/s2a.xml"; clp.setOption("xml", &xmlFileName, "read parameters from a file"); - int mgridSweeps = 1; clp.setOption("mgridSweeps", &mgridSweeps, "number of multigrid sweeps within Multigrid solver."); - std::string printTimings = "no"; clp.setOption("timings", &printTimings, "print timings to screen [yes/no]"); - double tol = 1e-12; clp.setOption("tol", &tol, "solver convergence tolerance"); - int importOldData = 0; clp.setOption("importOldData", &importOldData, "import map and matrix from previous run (highly experimental)."); - - switch (clp.parse(argc,argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; + GO nx = 100; + clp.setOption("nx", &nx, "mesh size in x direction"); + GO ny = 100; + clp.setOption("ny", &ny, "mesh size in y direction"); + std::string xmlFileName = "xml/s2a.xml"; + clp.setOption("xml", &xmlFileName, "read parameters from a file"); + int mgridSweeps = 1; + clp.setOption("mgridSweeps", &mgridSweeps, "number of multigrid sweeps within Multigrid solver."); + std::string printTimings = "no"; + clp.setOption("timings", &printTimings, "print timings to screen [yes/no]"); + double tol = 1e-12; + clp.setOption("tol", &tol, "solver convergence tolerance"); + int importOldData = 0; + clp.setOption("importOldData", &importOldData, "import map and matrix from previous run (highly experimental)."); + + switch (clp.parse(argc, argv)) { + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } // ================================ @@ -160,14 +165,14 @@ int main(int argc, char *argv[]) { GaleriList.set("ny", ny); GaleriList.set("mx", epComm->NumProc()); GaleriList.set("my", 1); - GaleriList.set("lx", 1.0); // length of x-axis - GaleriList.set("ly", 1.0); // length of y-axis + GaleriList.set("lx", 1.0); // length of x-axis + GaleriList.set("ly", 1.0); // length of y-axis - Teuchos::RCP epMap = Teuchos::null; + Teuchos::RCP epMap = Teuchos::null; Teuchos::RCP epCoord = Teuchos::null; - Teuchos::RCP epA = Teuchos::null; + Teuchos::RCP epA = Teuchos::null; - if(importOldData==0) { + if (importOldData == 0) { // TUTORIALSPLIT =========================================================== // create map epMap = Teuchos::rcp(Galeri::CreateMap("Cartesian2D", *epComm, GaleriList)); @@ -178,8 +183,9 @@ int main(int argc, char *argv[]) { // create matrix epA = Teuchos::rcp(Galeri::CreateCrsMatrix("Laplace2D", epMap.get(), GaleriList)); - double hx = 1./(nx-1); double hy = 1./(ny-1); - epA->Scale(1./(hx*hy)); + double hx = 1. / (nx - 1); + double hy = 1. / (ny - 1); + epA->Scale(1. / (hx * hy)); // TUTORIALSPLIT =========================================================== } else { std::cout << "Import old data" << std::endl; @@ -192,14 +198,14 @@ int main(int argc, char *argv[]) { epCoord = Teuchos::rcp(myEpVector); comm->barrier(); Epetra_CrsMatrix* myEpMatrix; - EpetraExt::MatrixMarketFileToCrsMatrix("A.mat",*(Xpetra::toEpetra(comm)), myEpMatrix); + EpetraExt::MatrixMarketFileToCrsMatrix("A.mat", *(Xpetra::toEpetra(comm)), myEpMatrix); epA = Teuchos::rcp(myEpMatrix); comm->barrier(); } // TUTORIALSPLIT =========================================================== // Epetra -> Xpetra - Teuchos::RCP exA = Teuchos::rcp(new Xpetra::EpetraCrsMatrixT(epA)); + Teuchos::RCP exA = Teuchos::rcp(new Xpetra::EpetraCrsMatrixT(epA)); Teuchos::RCP exAWrap = Teuchos::rcp(new CrsMatrixWrap(exA)); RCP A = Teuchos::rcp_dynamic_cast(exAWrap); @@ -213,16 +219,16 @@ int main(int argc, char *argv[]) { X->PutScalar(0.0); // Epetra -> Xpetra - RCP xB = Teuchos::rcp(new Xpetra::EpetraVectorT(B)); - RCP xX = Teuchos::rcp(new Xpetra::EpetraVectorT(X)); - RCP coords = Teuchos::rcp(new Xpetra::EpetraMultiVectorT(epCoord)); + RCP xB = Teuchos::rcp(new Xpetra::EpetraVectorT(B)); + RCP xX = Teuchos::rcp(new Xpetra::EpetraVectorT(X)); + RCP coords = Teuchos::rcp(new Xpetra::EpetraMultiVectorT(epCoord)); xX->setSeed(100); xX->randomize(); // TUTORIALSPLIT =========================================================== // build null space vector - RCP map = A->getRowMap(); + RCP map = A->getRowMap(); RCP nullspace = MultiVectorFactory::Build(map, 1); nullspace->putScalar(one); @@ -248,8 +254,8 @@ int main(int argc, char *argv[]) { // TUTORIALSPLIT =========================================================== RCP H = mueLuFactory.CreateHierarchy(); - H->GetLevel(0)->Set("A", A); - H->GetLevel(0)->Set("Nullspace", nullspace); + H->GetLevel(0)->Set("A", A); + H->GetLevel(0)->Set("Nullspace", nullspace); H->GetLevel(0)->Set("Coordinates", coords); mueLuFactory.SetupHierarchy(*H); @@ -270,7 +276,7 @@ int main(int argc, char *argv[]) { fancyout << "========================================================\nCalculate exact solution." << std::endl; tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 3 - direct solve"))); exactLsgVec->PutScalar(0.0); - exactLsgVec->Update(1.0,*X,1.0); + exactLsgVec->Update(1.0, *X, 1.0); Epetra_LinearProblem epetraProblem(epA.get(), exactLsgVec.get(), B.get()); Amesos amesosFactory; @@ -295,7 +301,7 @@ int main(int argc, char *argv[]) { // TUTORIALSPLIT =========================================================== precLsgVec->PutScalar(0.0); - precLsgVec->Update(1.0,*X,1.0); + precLsgVec->Update(1.0, *X, 1.0); Epetra_LinearProblem epetraProblem(epA.get(), precLsgVec.get(), B.get()); AztecOO aztecSolver(epetraProblem); @@ -319,8 +325,8 @@ int main(int argc, char *argv[]) { mgridLsgVec->putScalar(0.0); { fancyout << "========================================================\nUse multigrid hierarchy as solver." << std::endl; - tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 5 - Multigrid Solve"))); - mgridLsgVec->update(1.0,*xX,1.0); + tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 5 - Multigrid Solve"))); + mgridLsgVec->update(1.0, *xX, 1.0); H->IsPreconditioner(false); H->Iterate(*xB, *mgridLsgVec, mgridSweeps); comm->barrier(); @@ -331,68 +337,65 @@ int main(int argc, char *argv[]) { fancyout << "========================================================\nExport results.\n========================================================" << std::endl; std::ofstream myfile; - std::stringstream ss; ss << "example" << MyPID << ".txt"; - myfile.open (ss.str().c_str()); + std::stringstream ss; + ss << "example" << MyPID << ".txt"; + myfile.open(ss.str().c_str()); ////////////////// // loop over all procs - for (int iproc=0; iproc < NumProc; iproc++) { - if (MyPID==iproc) { - int NumVectors1 = 2; - int NumMyElements1 = epCoord->Map(). NumMyElements(); - int MaxElementSize1 = epCoord->Map().MaxElementSize(); - int * FirstPointInElementList1 = NULL; - if (MaxElementSize1!=1) FirstPointInElementList1 = epCoord->Map().FirstPointInElementList(); - double ** A_Pointers = epCoord->Pointers(); - - if (MyPID==0) { + for (int iproc = 0; iproc < NumProc; iproc++) { + if (MyPID == iproc) { + int NumVectors1 = 2; + int NumMyElements1 = epCoord->Map().NumMyElements(); + int MaxElementSize1 = epCoord->Map().MaxElementSize(); + int* FirstPointInElementList1 = NULL; + if (MaxElementSize1 != 1) FirstPointInElementList1 = epCoord->Map().FirstPointInElementList(); + double** A_Pointers = epCoord->Pointers(); + + if (MyPID == 0) { myfile.width(8); - myfile << "# MyPID"; myfile << " "; + myfile << "# MyPID"; + myfile << " "; myfile.width(12); - if (MaxElementSize1==1) - myfile << "GID "; + if (MaxElementSize1 == 1) + myfile << "GID "; else - myfile << " GID/Point"; - for (int j = 0; j < NumVectors1 ; j++) - { + myfile << " GID/Point"; + for (int j = 0; j < NumVectors1; j++) { myfile.width(20); - myfile << "Value "; + myfile << "Value "; } myfile << std::endl; } - for (int i=0; i < NumMyElements1; i++) { - for (int ii=0; ii< epCoord->Map().ElementSize(i); ii++) { + for (int i = 0; i < NumMyElements1; i++) { + for (int ii = 0; ii < epCoord->Map().ElementSize(i); ii++) { int iii; myfile.width(10); - myfile << MyPID; myfile << " "; + myfile << MyPID; + myfile << " "; myfile.width(10); - if (MaxElementSize1==1) { - if(epCoord->Map().GlobalIndicesInt()) - { - int * MyGlobalElements1 = epCoord->Map().MyGlobalElements(); + if (MaxElementSize1 == 1) { + if (epCoord->Map().GlobalIndicesInt()) { + int* MyGlobalElements1 = epCoord->Map().MyGlobalElements(); myfile << MyGlobalElements1[i] << " "; } iii = i; - } - else { - if(epCoord->Map().GlobalIndicesInt()) - { - - int * MyGlobalElements1 = epCoord->Map().MyGlobalElements(); - myfile << MyGlobalElements1[i]<< "/" << ii << " "; + } else { + if (epCoord->Map().GlobalIndicesInt()) { + int* MyGlobalElements1 = epCoord->Map().MyGlobalElements(); + myfile << MyGlobalElements1[i] << "/" << ii << " "; } - iii = FirstPointInElementList1[i]+ii; + iii = FirstPointInElementList1[i] + ii; } - for (int j = 0; j < NumVectors1 ; j++) - { + for (int j = 0; j < NumVectors1; j++) { myfile.width(20); - myfile << A_Pointers[j][iii]; + myfile << A_Pointers[j][iii]; } - myfile.precision(18); // set high precision for output + myfile.precision(18); // set high precision for output // add solution vector entry myfile.width(25); @@ -406,10 +409,10 @@ int main(int argc, char *argv[]) { Teuchos::ArrayRCP mgridLsgVecData = mgridLsgVec->getDataNonConst(0); myfile << mgridLsgVecData[iii]; - myfile.precision(6); // set default precision + myfile.precision(6); // set default precision myfile << std::endl; } - } // end loop over all lines on current proc + } // end loop over all lines on current proc myfile << std::flush; // syncronize procs @@ -417,19 +420,19 @@ int main(int argc, char *argv[]) { comm->barrier(); comm->barrier(); - } // end myProc + } // end myProc } // export map - RCP Amap = A->getRowMap(); - RCP > epAmap = Teuchos::rcp_dynamic_cast >(Amap); + RCP Amap = A->getRowMap(); + RCP > epAmap = Teuchos::rcp_dynamic_cast >(Amap); - //Epetra_Map* eMap; - //int rv = EpetraExt::MatrixMarketFileToMap(fileName.c_str(), *(Xpetra::toEpetra(comm)), eMap); - EpetraExt::BlockMapToMatrixMarketFile( "ARowMap.mat", epAmap->getEpetra_BlockMap(), - "ARowMap", - "Row map of matrix A", - true); + // Epetra_Map* eMap; + // int rv = EpetraExt::MatrixMarketFileToMap(fileName.c_str(), *(Xpetra::toEpetra(comm)), eMap); + EpetraExt::BlockMapToMatrixMarketFile("ARowMap.mat", epAmap->getEpetra_BlockMap(), + "ARowMap", + "Row map of matrix A", + true); EpetraExt::MultiVectorToMatrixMarketFile("ACoordVector.mat", *epCoord, "Coordinate multi vector", "Multi vector with mesh coordinates", true); EpetraExt::RowMatrixToMatrixMarketFile("A.mat", *epA, "A matrix", "Matrix A", true); @@ -438,7 +441,7 @@ int main(int argc, char *argv[]) { myfile.close(); comm->barrier(); - tm = Teuchos::null; + tm = Teuchos::null; globalTimeMonitor = Teuchos::null; if (printTimings == "yes") { @@ -449,8 +452,8 @@ int main(int argc, char *argv[]) { } TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success); - return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); + return (success ? EXIT_SUCCESS : EXIT_FAILURE); #else return EXIT_SUCCESS; -#endif // #if defined(HAVE_MUELU_EPETRA) and defined(HAVE_MUELU_SERIAL) -} //main +#endif // #if defined(HAVE_MUELU_EPETRA) and defined(HAVE_MUELU_SERIAL) +} // main diff --git a/packages/muelu/doc/Tutorial/src/recirc2d.cpp b/packages/muelu/doc/Tutorial/src/recirc2d.cpp index 92f6d08b47a5..735da741555e 100644 --- a/packages/muelu/doc/Tutorial/src/recirc2d.cpp +++ b/packages/muelu/doc/Tutorial/src/recirc2d.cpp @@ -65,7 +65,7 @@ #include #include #include -#include // TODO: move into MueLu.hpp +#include // TODO: move into MueLu.hpp #include @@ -84,14 +84,14 @@ typedef double Scalar; typedef int LocalOrdinal; typedef int GlobalOrdinal; -typedef Xpetra::EpetraNode Node; // Epetra needs SerialNode +typedef Xpetra::EpetraNode Node; // Epetra needs SerialNode #endif -int main(int argc, char *argv[]) { +int main(int argc, char* argv[]) { #if defined(HAVE_MUELU_EPETRA) #include - using Teuchos::RCP; // reference count pointers + using Teuchos::RCP; // reference count pointers using Teuchos::rcp; using Teuchos::TimeMonitor; @@ -103,41 +103,43 @@ int main(int argc, char *argv[]) { bool success = false; bool verbose = true; try { - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); - int MyPID = comm->getRank(); - int NumProc = comm->getSize(); + RCP > comm = Teuchos::DefaultComm::getComm(); + int MyPID = comm->getRank(); + int NumProc = comm->getSize(); const Teuchos::RCP epComm = Teuchos::rcp_const_cast(Xpetra::toEpetra(comm)); // ========================================================================= // Convenient definitions // ========================================================================= - //SC zero = Teuchos::ScalarTraits::zero(), one = Teuchos::ScalarTraits::one(); + // SC zero = Teuchos::ScalarTraits::zero(), one = Teuchos::ScalarTraits::one(); // Instead of checking each time for rank, create a rank 0 stream RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& fancyout = *fancy; + Teuchos::FancyOStream& fancyout = *fancy; fancyout.setOutputToRootOnly(0); - - // ========================================================================= // Parameters initialization // ========================================================================= Teuchos::CommandLineProcessor clp(false); GO nx = 100, ny = 100; - clp.setOption("nx", &nx, "mesh size in x direction"); - clp.setOption("ny", &ny, "mesh size in y direction"); - std::string xmlFileName = "xml/s3a.xml"; clp.setOption("xml", &xmlFileName, "read parameters from a file. Otherwise, this example uses by default 'tutorial1a.xml'"); - int mgridSweeps = 1; clp.setOption("mgridSweeps", &mgridSweeps, "number of multigrid sweeps within Multigrid solver."); - std::string printTimings = "no"; clp.setOption("timings", &printTimings, "print timings to screen [yes/no]"); - double tol = 1e-12; clp.setOption("tol", &tol, "solver convergence tolerance"); - - switch (clp.parse(argc,argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; + clp.setOption("nx", &nx, "mesh size in x direction"); + clp.setOption("ny", &ny, "mesh size in y direction"); + std::string xmlFileName = "xml/s3a.xml"; + clp.setOption("xml", &xmlFileName, "read parameters from a file. Otherwise, this example uses by default 'tutorial1a.xml'"); + int mgridSweeps = 1; + clp.setOption("mgridSweeps", &mgridSweeps, "number of multigrid sweeps within Multigrid solver."); + std::string printTimings = "no"; + clp.setOption("timings", &printTimings, "print timings to screen [yes/no]"); + double tol = 1e-12; + clp.setOption("tol", &tol, "solver convergence tolerance"); + + switch (clp.parse(argc, argv)) { + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } // ========================================================================= @@ -153,8 +155,8 @@ int main(int argc, char *argv[]) { GaleriList.set("ny", ny); GaleriList.set("mx", epComm->NumProc()); GaleriList.set("my", 1); - GaleriList.set("lx", 1.0); // length of x-axis - GaleriList.set("ly", 1.0); // length of y-axis + GaleriList.set("lx", 1.0); // length of x-axis + GaleriList.set("ly", 1.0); // length of y-axis GaleriList.set("diff", 1e-5); GaleriList.set("conv", 1.0); @@ -168,11 +170,11 @@ int main(int argc, char *argv[]) { Teuchos::RCP epA = Teuchos::rcp(Galeri::CreateCrsMatrix("Recirc2D", epMap.get(), GaleriList)); // Epetra -> Xpetra - Teuchos::RCP exA = Teuchos::rcp(new Xpetra::EpetraCrsMatrixT(epA)); + Teuchos::RCP exA = Teuchos::rcp(new Xpetra::EpetraCrsMatrixT(epA)); Teuchos::RCP exAWrap = Teuchos::rcp(new CrsMatrixWrap(exA)); RCP A = Teuchos::rcp_dynamic_cast(exAWrap); - int numPDEs = 1; + int numPDEs = 1; A->SetFixedBlockSize(numPDEs); // set rhs and solution vector @@ -182,22 +184,22 @@ int main(int argc, char *argv[]) { X->PutScalar(0.0); // Epetra -> Xpetra - RCP xB = Teuchos::rcp(new Xpetra::EpetraVectorT(B)); - RCP xX = Teuchos::rcp(new Xpetra::EpetraVectorT(X)); - RCP coords = Teuchos::rcp(new Xpetra::EpetraMultiVectorT(epCoord)); + RCP xB = Teuchos::rcp(new Xpetra::EpetraVectorT(B)); + RCP xX = Teuchos::rcp(new Xpetra::EpetraVectorT(X)); + RCP coords = Teuchos::rcp(new Xpetra::EpetraMultiVectorT(epCoord)); xX->setSeed(100); xX->randomize(); // build null space vector - RCP map = A->getRowMap(); + RCP map = A->getRowMap(); RCP nullspace = MultiVectorFactory::Build(map, numPDEs); - for (int i=0; i nsValues = nullspace->getDataNonConst(i); - int numBlocks = nsValues.size() / numPDEs; - for (int j=0; j< numBlocks; ++j) { - nsValues[j*numPDEs + i] = 1.0; + int numBlocks = nsValues.size() / numPDEs; + for (int j = 0; j < numBlocks; ++j) { + nsValues[j * numPDEs + i] = 1.0; } } @@ -219,8 +221,8 @@ int main(int argc, char *argv[]) { RCP H = mueLuFactory.CreateHierarchy(); - H->GetLevel(0)->Set("A", A); - H->GetLevel(0)->Set("Nullspace", nullspace); + H->GetLevel(0)->Set("A", A); + H->GetLevel(0)->Set("Nullspace", nullspace); H->GetLevel(0)->Set("Coordinates", coords); mueLuFactory.SetupHierarchy(*H); @@ -240,7 +242,7 @@ int main(int argc, char *argv[]) { fancyout << "========================================================\nCalculate exact solution." << std::endl; tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 3 - direct solve"))); exactLsgVec->PutScalar(0.0); - exactLsgVec->Update(1.0,*X,1.0); + exactLsgVec->Update(1.0, *X, 1.0); Epetra_LinearProblem epetraProblem(epA.get(), exactLsgVec.get(), B.get()); Amesos amesosFactory; @@ -263,7 +265,7 @@ int main(int argc, char *argv[]) { tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 4 - AMG as preconditioner"))); precLsgVec->PutScalar(0.0); - precLsgVec->Update(1.0,*X,1.0); + precLsgVec->Update(1.0, *X, 1.0); Epetra_LinearProblem epetraProblem(epA.get(), precLsgVec.get(), B.get()); AztecOO aztecSolver(epetraProblem); @@ -287,8 +289,8 @@ int main(int argc, char *argv[]) { mgridLsgVec->putScalar(0.0); { fancyout << "========================================================\nUse multigrid hierarchy as solver." << std::endl; - tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 5 - Multigrid Solve"))); - mgridLsgVec->update(1.0,*xX,1.0); + tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 5 - Multigrid Solve"))); + mgridLsgVec->update(1.0, *xX, 1.0); H->IsPreconditioner(false); H->Iterate(*xB, *mgridLsgVec, mgridSweeps); comm->barrier(); @@ -299,83 +301,82 @@ int main(int argc, char *argv[]) { fancyout << "========================================================\nExport results.\n========================================================" << std::endl; std::ofstream myfile; - std::stringstream ss; ss << "example" << MyPID << ".txt"; - myfile.open (ss.str().c_str()); + std::stringstream ss; + ss << "example" << MyPID << ".txt"; + myfile.open(ss.str().c_str()); ////////////////// // loop over all procs - for (int iproc=0; iproc < NumProc; iproc++) { - if (MyPID==iproc) { - int NumVectors1 = 2; - int NumMyElements1 = epCoord->Map(). NumMyElements(); - int MaxElementSize1 = epCoord->Map().MaxElementSize(); - int * FirstPointInElementList1 = NULL; - if (MaxElementSize1!=1) FirstPointInElementList1 = epCoord->Map().FirstPointInElementList(); - double ** A_Pointers = epCoord->Pointers(); - - if (MyPID==0) { + for (int iproc = 0; iproc < NumProc; iproc++) { + if (MyPID == iproc) { + int NumVectors1 = 2; + int NumMyElements1 = epCoord->Map().NumMyElements(); + int MaxElementSize1 = epCoord->Map().MaxElementSize(); + int* FirstPointInElementList1 = NULL; + if (MaxElementSize1 != 1) FirstPointInElementList1 = epCoord->Map().FirstPointInElementList(); + double** A_Pointers = epCoord->Pointers(); + + if (MyPID == 0) { myfile.width(8); - myfile << "# MyPID"; myfile << " "; + myfile << "# MyPID"; + myfile << " "; myfile.width(12); - if (MaxElementSize1==1) - myfile << "GID "; + if (MaxElementSize1 == 1) + myfile << "GID "; else - myfile << " GID/Point"; - for (int j = 0; j < NumVectors1 ; j++) - { + myfile << " GID/Point"; + for (int j = 0; j < NumVectors1; j++) { myfile.width(20); - myfile << "Value "; + myfile << "Value "; } myfile << std::endl; } - for (int i=0; i < NumMyElements1; i++) { - for (int ii=0; ii< epCoord->Map().ElementSize(i); ii++) { + for (int i = 0; i < NumMyElements1; i++) { + for (int ii = 0; ii < epCoord->Map().ElementSize(i); ii++) { int iii; myfile.width(10); - myfile << MyPID; myfile << " "; + myfile << MyPID; + myfile << " "; myfile.width(10); - if (MaxElementSize1==1) { - if(epCoord->Map().GlobalIndicesInt()) - { - int * MyGlobalElements1 = epCoord->Map().MyGlobalElements(); + if (MaxElementSize1 == 1) { + if (epCoord->Map().GlobalIndicesInt()) { + int* MyGlobalElements1 = epCoord->Map().MyGlobalElements(); myfile << MyGlobalElements1[i] << " "; } iii = i; - } - else { - if(epCoord->Map().GlobalIndicesInt()) - { - - int * MyGlobalElements1 = epCoord->Map().MyGlobalElements(); - myfile << MyGlobalElements1[i]<< "/" << ii << " "; + } else { + if (epCoord->Map().GlobalIndicesInt()) { + int* MyGlobalElements1 = epCoord->Map().MyGlobalElements(); + myfile << MyGlobalElements1[i] << "/" << ii << " "; } - iii = FirstPointInElementList1[i]+ii; + iii = FirstPointInElementList1[i] + ii; } - for (int j = 0; j < NumVectors1 ; j++) - { + for (int j = 0; j < NumVectors1; j++) { myfile.width(20); - myfile << A_Pointers[j][iii]; + myfile << A_Pointers[j][iii]; } - myfile.precision(18); // set high precision for output + myfile.precision(18); // set high precision for output // add solution vector entry - myfile.width(25); myfile << (*exactLsgVec)[iii]; + myfile.width(25); + myfile << (*exactLsgVec)[iii]; // add preconditioned solution vector entry - myfile.width(25); myfile << (*precLsgVec)[iii]; + myfile.width(25); + myfile << (*precLsgVec)[iii]; Teuchos::ArrayRCP mgridLsgVecData = mgridLsgVec->getDataNonConst(0); - myfile.width(25); myfile << mgridLsgVecData[iii]; - + myfile.width(25); + myfile << mgridLsgVecData[iii]; - myfile.precision(6); // set default precision + myfile.precision(6); // set default precision myfile << std::endl; } - } // end loop over all lines on current proc + } // end loop over all lines on current proc myfile << std::flush; // syncronize procs @@ -383,14 +384,14 @@ int main(int argc, char *argv[]) { comm->barrier(); comm->barrier(); - } // end myProc + } // end myProc } //////////// myfile.close(); comm->barrier(); - tm = Teuchos::null; + tm = Teuchos::null; globalTimeMonitor = Teuchos::null; if (printTimings == "yes") { @@ -401,8 +402,8 @@ int main(int argc, char *argv[]) { } TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); - return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); + return (success ? EXIT_SUCCESS : EXIT_FAILURE); #else return EXIT_SUCCESS; -#endif // #if defined(HAVE_MUELU_EPETRA) and defined(HAVE_MUELU_SERIAL) -} //main +#endif // #if defined(HAVE_MUELU_EPETRA) and defined(HAVE_MUELU_SERIAL) +} // main diff --git a/packages/muelu/doc/Tutorial/src/recirc2d_api.cpp b/packages/muelu/doc/Tutorial/src/recirc2d_api.cpp index 5379a352b627..26c5a9e0287d 100644 --- a/packages/muelu/doc/Tutorial/src/recirc2d_api.cpp +++ b/packages/muelu/doc/Tutorial/src/recirc2d_api.cpp @@ -84,14 +84,14 @@ typedef double Scalar; typedef int LocalOrdinal; typedef int GlobalOrdinal; -typedef Xpetra::EpetraNode Node; // Epetra needs SerialNode +typedef Xpetra::EpetraNode Node; // Epetra needs SerialNode #endif -int main(int argc, char *argv[]) { +int main(int argc, char* argv[]) { #if defined(HAVE_MUELU_EPETRA) #include - using Teuchos::RCP; // reference count pointers + using Teuchos::RCP; // reference count pointers using Teuchos::rcp; using Teuchos::TimeMonitor; @@ -102,20 +102,20 @@ int main(int argc, char *argv[]) { bool success = false; try { - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); - int MyPID = comm->getRank(); - int NumProc = comm->getSize(); + RCP > comm = Teuchos::DefaultComm::getComm(); + int MyPID = comm->getRank(); + int NumProc = comm->getSize(); const Teuchos::RCP epComm = Teuchos::rcp_const_cast(Xpetra::toEpetra(comm)); // ========================================================================= // Convenient definitions // ========================================================================= - //SC zero = Teuchos::ScalarTraits::zero(), one = Teuchos::ScalarTraits::one(); + // SC zero = Teuchos::ScalarTraits::zero(), one = Teuchos::ScalarTraits::one(); // Instead of checking each time for rank, create a rank 0 stream RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& fancyout = *fancy; + Teuchos::FancyOStream& fancyout = *fancy; fancyout.setOutputToRootOnly(0); // ========================================================================= @@ -124,20 +124,23 @@ int main(int argc, char *argv[]) { Teuchos::CommandLineProcessor clp(false); GO nx = 100, ny = 100; GO maxCoarseSize = 10; - LO maxLevels = 4; - clp.setOption("nx", &nx, "mesh size in x direction"); - clp.setOption("ny", &ny, "mesh size in y direction"); - clp.setOption("maxCoarseSize", &maxCoarseSize, "maximum coarse size"); - clp.setOption("maxLevels", &maxLevels, "maximum number of multigrid levels"); - int mgridSweeps = 1; clp.setOption("mgridSweeps", &mgridSweeps, "number of multigrid sweeps within Multigrid solver."); - std::string printTimings = "no"; clp.setOption("timings", &printTimings, "print timings to screen [yes/no]"); - double tol = 1e-12; clp.setOption("tol", &tol, "solver convergence tolerance"); - - switch (clp.parse(argc,argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; + LO maxLevels = 4; + clp.setOption("nx", &nx, "mesh size in x direction"); + clp.setOption("ny", &ny, "mesh size in y direction"); + clp.setOption("maxCoarseSize", &maxCoarseSize, "maximum coarse size"); + clp.setOption("maxLevels", &maxLevels, "maximum number of multigrid levels"); + int mgridSweeps = 1; + clp.setOption("mgridSweeps", &mgridSweeps, "number of multigrid sweeps within Multigrid solver."); + std::string printTimings = "no"; + clp.setOption("timings", &printTimings, "print timings to screen [yes/no]"); + double tol = 1e-12; + clp.setOption("tol", &tol, "solver convergence tolerance"); + + switch (clp.parse(argc, argv)) { + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } // ========================================================================= @@ -153,8 +156,8 @@ int main(int argc, char *argv[]) { GaleriList.set("ny", ny); GaleriList.set("mx", epComm->NumProc()); GaleriList.set("my", 1); - GaleriList.set("lx", 1.0); // length of x-axis - GaleriList.set("ly", 1.0); // length of y-axis + GaleriList.set("lx", 1.0); // length of x-axis + GaleriList.set("ly", 1.0); // length of y-axis GaleriList.set("diff", 1e-5); GaleriList.set("conv", 1.0); @@ -168,11 +171,11 @@ int main(int argc, char *argv[]) { Teuchos::RCP epA = Teuchos::rcp(Galeri::CreateCrsMatrix("Recirc2D", epMap.get(), GaleriList)); // Epetra -> Xpetra - Teuchos::RCP exA = Teuchos::rcp(new Xpetra::EpetraCrsMatrixT(epA)); + Teuchos::RCP exA = Teuchos::rcp(new Xpetra::EpetraCrsMatrixT(epA)); Teuchos::RCP exAWrap = Teuchos::rcp(new CrsMatrixWrap(exA)); RCP A = Teuchos::rcp_dynamic_cast(exAWrap); - int numPDEs = 1; + int numPDEs = 1; A->SetFixedBlockSize(numPDEs); // set rhs and solution vector @@ -182,21 +185,21 @@ int main(int argc, char *argv[]) { X->PutScalar(0.0); // Epetra -> Xpetra - RCP xB = Teuchos::rcp(new Xpetra::EpetraVectorT(B)); - RCP xX = Teuchos::rcp(new Xpetra::EpetraVectorT(X)); + RCP xB = Teuchos::rcp(new Xpetra::EpetraVectorT(B)); + RCP xX = Teuchos::rcp(new Xpetra::EpetraVectorT(X)); xX->setSeed(100); xX->randomize(); // build null space vector - RCP map = A->getRowMap(); + RCP map = A->getRowMap(); RCP nullspace = MultiVectorFactory::Build(map, numPDEs); - for (int i=0; i nsValues = nullspace->getDataNonConst(i); - int numBlocks = nsValues.size() / numPDEs; - for (int j=0; j< numBlocks; ++j) { - nsValues[j*numPDEs + i] = 1.0; + int numBlocks = nsValues.size() / numPDEs; + for (int j = 0; j < numBlocks; ++j) { + nsValues[j * numPDEs + i] = 1.0; } } @@ -211,19 +214,19 @@ int main(int argc, char *argv[]) { comm->barrier(); tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 1.5 - MueLu read XML"))); - RCP H = rcp ( new Hierarchy() ); + RCP H = rcp(new Hierarchy()); H->setDefaultVerbLevel(Teuchos::VERB_HIGH); H->SetMaxCoarseSize(maxCoarseSize); // build finest Level RCP Finest = H->GetLevel(); Finest->setDefaultVerbLevel(Teuchos::VERB_HIGH); - Finest->Set("A",A); - Finest->Set("Nullspace",nullspace); + Finest->Set("A", A); + Finest->Set("Nullspace", nullspace); // create factories for transfer operators RCP PFact = Teuchos::rcp(new TentativePFactory()); - RCP RFact = Teuchos::rcp(new TransPFactory()); + RCP RFact = Teuchos::rcp(new TransPFactory()); RFact->SetFactory("P", PFact); // build level smoothers @@ -231,15 +234,15 @@ int main(int argc, char *argv[]) { RCP smooProto; std::string ifpackType; Teuchos::ParameterList ifpackList; - ifpackList.set("relaxation: sweeps", (LO) 1); - ifpackList.set("relaxation: damping factor", (SC) 1.0); + ifpackList.set("relaxation: sweeps", (LO)1); + ifpackList.set("relaxation: damping factor", (SC)1.0); ifpackType = "RELAXATION"; ifpackList.set("relaxation: type", "Symmetric Gauss-Seidel"); - smooProto = Teuchos::rcp( new TrilinosSmoother(ifpackType, ifpackList) ); + smooProto = Teuchos::rcp(new TrilinosSmoother(ifpackType, ifpackList)); RCP SmooFact; if (maxLevels > 1) - SmooFact = rcp( new SmootherFactory(smooProto) ); + SmooFact = rcp(new SmootherFactory(smooProto)); // design multigrid hierarchy FactoryManager M; @@ -266,7 +269,7 @@ int main(int argc, char *argv[]) { fancyout << "========================================================\nCalculate exact solution." << std::endl; tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 3 - direct solve"))); exactLsgVec->PutScalar(0.0); - exactLsgVec->Update(1.0,*X,1.0); + exactLsgVec->Update(1.0, *X, 1.0); Epetra_LinearProblem epetraProblem(epA.get(), exactLsgVec.get(), B.get()); Amesos amesosFactory; @@ -289,7 +292,7 @@ int main(int argc, char *argv[]) { tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 4 - AMG as preconditioner"))); precLsgVec->PutScalar(0.0); - precLsgVec->Update(1.0,*X,1.0); + precLsgVec->Update(1.0, *X, 1.0); Epetra_LinearProblem epetraProblem(epA.get(), precLsgVec.get(), B.get()); AztecOO aztecSolver(epetraProblem); @@ -299,7 +302,7 @@ int main(int argc, char *argv[]) { aztecSolver.SetPrecOperator(&aztecPrec); int maxIts = 100; - //double tol2 = 1e-8; + // double tol2 = 1e-8; aztecSolver.Iterate(maxIts, tol); @@ -314,8 +317,8 @@ int main(int argc, char *argv[]) { mgridLsgVec->putScalar(0.0); { fancyout << "========================================================\nUse multigrid hierarchy as solver." << std::endl; - tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 5 - Multigrid Solve"))); - mgridLsgVec->update(1.0,*xX,1.0); + tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 5 - Multigrid Solve"))); + mgridLsgVec->update(1.0, *xX, 1.0); H->IsPreconditioner(false); H->Iterate(*xB, *mgridLsgVec, mgridSweeps); comm->barrier(); @@ -326,83 +329,82 @@ int main(int argc, char *argv[]) { fancyout << "========================================================\nExport results.\n========================================================" << std::endl; std::ofstream myfile; - std::stringstream ss; ss << "example" << MyPID << ".txt"; - myfile.open (ss.str().c_str()); + std::stringstream ss; + ss << "example" << MyPID << ".txt"; + myfile.open(ss.str().c_str()); ////////////////// // loop over all procs - for (int iproc=0; iproc < NumProc; iproc++) { - if (MyPID==iproc) { - int NumVectors1 = 2; - int NumMyElements1 = epCoord->Map(). NumMyElements(); - int MaxElementSize1 = epCoord->Map().MaxElementSize(); - int * FirstPointInElementList1 = NULL; - if (MaxElementSize1!=1) FirstPointInElementList1 = epCoord->Map().FirstPointInElementList(); - double ** A_Pointers = epCoord->Pointers(); - - if (MyPID==0) { + for (int iproc = 0; iproc < NumProc; iproc++) { + if (MyPID == iproc) { + int NumVectors1 = 2; + int NumMyElements1 = epCoord->Map().NumMyElements(); + int MaxElementSize1 = epCoord->Map().MaxElementSize(); + int* FirstPointInElementList1 = NULL; + if (MaxElementSize1 != 1) FirstPointInElementList1 = epCoord->Map().FirstPointInElementList(); + double** A_Pointers = epCoord->Pointers(); + + if (MyPID == 0) { myfile.width(8); - myfile << "# MyPID"; myfile << " "; + myfile << "# MyPID"; + myfile << " "; myfile.width(12); - if (MaxElementSize1==1) - myfile << "GID "; + if (MaxElementSize1 == 1) + myfile << "GID "; else - myfile << " GID/Point"; - for (int j = 0; j < NumVectors1 ; j++) - { + myfile << " GID/Point"; + for (int j = 0; j < NumVectors1; j++) { myfile.width(20); - myfile << "Value "; + myfile << "Value "; } myfile << std::endl; } - for (int i=0; i < NumMyElements1; i++) { - for (int ii=0; ii< epCoord->Map().ElementSize(i); ii++) { + for (int i = 0; i < NumMyElements1; i++) { + for (int ii = 0; ii < epCoord->Map().ElementSize(i); ii++) { int iii; myfile.width(10); - myfile << MyPID; myfile << " "; + myfile << MyPID; + myfile << " "; myfile.width(10); - if (MaxElementSize1==1) { - if(epCoord->Map().GlobalIndicesInt()) - { - int * MyGlobalElements1 = epCoord->Map().MyGlobalElements(); + if (MaxElementSize1 == 1) { + if (epCoord->Map().GlobalIndicesInt()) { + int* MyGlobalElements1 = epCoord->Map().MyGlobalElements(); myfile << MyGlobalElements1[i] << " "; } iii = i; - } - else { - if(epCoord->Map().GlobalIndicesInt()) - { - - int * MyGlobalElements1 = epCoord->Map().MyGlobalElements(); - myfile << MyGlobalElements1[i]<< "/" << ii << " "; + } else { + if (epCoord->Map().GlobalIndicesInt()) { + int* MyGlobalElements1 = epCoord->Map().MyGlobalElements(); + myfile << MyGlobalElements1[i] << "/" << ii << " "; } - iii = FirstPointInElementList1[i]+ii; + iii = FirstPointInElementList1[i] + ii; } - for (int j = 0; j < NumVectors1 ; j++) - { + for (int j = 0; j < NumVectors1; j++) { myfile.width(20); - myfile << A_Pointers[j][iii]; + myfile << A_Pointers[j][iii]; } - myfile.precision(18); // set high precision for output + myfile.precision(18); // set high precision for output // add solution vector entry - myfile.width(25); myfile << (*exactLsgVec)[iii]; + myfile.width(25); + myfile << (*exactLsgVec)[iii]; // add preconditioned solution vector entry - myfile.width(25); myfile << (*precLsgVec)[iii]; + myfile.width(25); + myfile << (*precLsgVec)[iii]; Teuchos::ArrayRCP mgridLsgVecData = mgridLsgVec->getDataNonConst(0); - myfile.width(25); myfile << mgridLsgVecData[iii]; - + myfile.width(25); + myfile << mgridLsgVecData[iii]; - myfile.precision(6); // set default precision + myfile.precision(6); // set default precision myfile << std::endl; } - } // end loop over all lines on current proc + } // end loop over all lines on current proc myfile << std::flush; // syncronize procs @@ -410,14 +412,14 @@ int main(int argc, char *argv[]) { comm->barrier(); comm->barrier(); - } // end myProc + } // end myProc } //////////// myfile.close(); comm->barrier(); - tm = Teuchos::null; + tm = Teuchos::null; globalTimeMonitor = Teuchos::null; if (printTimings == "yes") { @@ -428,9 +430,8 @@ int main(int argc, char *argv[]) { } TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success); - return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); + return (success ? EXIT_SUCCESS : EXIT_FAILURE); #else return EXIT_SUCCESS; -#endif // #if defined(HAVE_MUELU_EPETRA) and defined(HAVE_MUELU_SERIAL) -} //main - +#endif // #if defined(HAVE_MUELU_EPETRA) and defined(HAVE_MUELU_SERIAL) +} // main diff --git a/packages/muelu/doc/UsersGuide/update_params.sh b/packages/muelu/doc/UsersGuide/update_params.sh index dfca374e23af..21a647662e6d 100755 --- a/packages/muelu/doc/UsersGuide/update_params.sh +++ b/packages/muelu/doc/UsersGuide/update_params.sh @@ -63,6 +63,8 @@ echo '// @HEADER // *********************************************************************** // // @HEADER + +// clang-format off #include #include "MueLu_Exceptions.hpp" diff --git a/packages/muelu/example/ParameterList/MLParameterList.cpp b/packages/muelu/example/ParameterList/MLParameterList.cpp index 3e39d0eecd9b..7a373ef9a616 100644 --- a/packages/muelu/example/ParameterList/MLParameterList.cpp +++ b/packages/muelu/example/ParameterList/MLParameterList.cpp @@ -47,9 +47,9 @@ #include -#include // For Epetra only runs this points to FakeKokkos in Xpetra +#include // For Epetra only runs this points to FakeKokkos in Xpetra -#include // getParametersFromXmlFile() +#include // getParametersFromXmlFile() #if defined(HAVE_MUELU_ML) && defined(HAVE_MUELU_EPETRA) #include @@ -80,7 +80,7 @@ // Default problem is Laplace1D with nx = 8748. Use --help to list available options. -template +template int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int argc, char *argv[]) { #include @@ -94,34 +94,38 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg bool success = false; bool verbose = true; try { - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); + RCP > comm = Teuchos::DefaultComm::getComm(); // // Parameters // - //TODO: FIXME: option by default does not work for MueLu/Tpetra + // TODO: FIXME: option by default does not work for MueLu/Tpetra int nIts = 9; - Galeri::Xpetra::Parameters matrixParameters(clp, 256); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra + Galeri::Xpetra::Parameters matrixParameters(clp, 256); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra - std::string xmlFileName; clp.setOption("xml", &xmlFileName, "read parameters from a file. Otherwise, this example uses by default an hard-coded parameter list."); - int translatedmuelu = true; clp.setOption("muelu2", &translatedmuelu, "use muelu through XML parameter translation and ParameterListInterpreter"); - int ml = true; + std::string xmlFileName; + clp.setOption("xml", &xmlFileName, "read parameters from a file. Otherwise, this example uses by default an hard-coded parameter list."); + int translatedmuelu = true; + clp.setOption("muelu2", &translatedmuelu, "use muelu through XML parameter translation and ParameterListInterpreter"); + int ml = true; #if defined(HAVE_MUELU_ML) && defined(HAVE_MUELU_EPETRA) - clp.setOption("ml", &ml, "use ml"); + clp.setOption("ml", &ml, "use ml"); #endif - switch (clp.parse(argc,argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + switch (clp.parse(argc, argv)) { + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } - if (comm->getRank() == 0) { std::cout << xpetraParameters << matrixParameters; } + if (comm->getRank() == 0) { + std::cout << xpetraParameters << matrixParameters; + } // choose ML and Tpetra if (ml && xpetraParameters.GetLib() == Xpetra::UseTpetra) { @@ -134,9 +138,9 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // RCP map = MapFactory::Build(lib, matrixParameters.GetNumGlobalElements(), 0, comm); - RCP > Pr = - Galeri::Xpetra::BuildProblem(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList()); - RCP A = Pr->BuildMatrix(); + RCP > Pr = + Galeri::Xpetra::BuildProblem(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList()); + RCP A = Pr->BuildMatrix(); // // Preconditioner configuration @@ -145,31 +149,28 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // ML parameter list RCP params; if (xmlFileName != "") { - std::cout << "Reading " << xmlFileName << " ..." << std::endl; params = Teuchos::getParametersFromXmlFile(xmlFileName); } else { - std::cout << "Using hard-coded parameter list:" << std::endl; params = rcp(new Teuchos::ParameterList()); - params->set("ML output", 10); + params->set("ML output", 10); params->set("max levels", 2); params->set("smoother: type", "symmetric Gauss-Seidel"); - if (xpetraParameters.GetLib() == Xpetra::UseTpetra) // TODO: remove 'if' when Amesos2-KLU becomes available - params->set("coarse: type","Amesos-Superlu"); + if (xpetraParameters.GetLib() == Xpetra::UseTpetra) // TODO: remove 'if' when Amesos2-KLU becomes available + params->set("coarse: type", "Amesos-Superlu"); else - params->set("coarse: type","Amesos-KLU"); - + params->set("coarse: type", "Amesos-KLU"); } std::cout << "Initial parameter list" << std::endl; std::cout << *params << std::endl; - if ( translatedmuelu ) { - // + if (translatedmuelu) { + // // Construct a multigrid preconditioner // @@ -186,19 +187,19 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // build default null space LocalOrdinal numPDEs = 1; - if(A->IsView("stridedMaps")==true) { - Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!) - numPDEs = Teuchos::rcp_dynamic_cast(A->getRowMap())->getFixedBlockSize(); - oldView = A->SwitchToView(oldView); + if (A->IsView("stridedMaps") == true) { + Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!) + numPDEs = Teuchos::rcp_dynamic_cast(A->getRowMap())->getFixedBlockSize(); + oldView = A->SwitchToView(oldView); } RCP nullspace = MultiVectorFactory::Build(A->getDomainMap(), numPDEs); - for (int i=0; i nsValues = nullspace->getDataNonConst(i); - int numBlocks = nsValues.size() / numPDEs; - for (int j=0; j< numBlocks; ++j) { - nsValues[j*numPDEs + i] = 1.0; + int numBlocks = nsValues.size() / numPDEs; + for (int j = 0; j < numBlocks; ++j) { + nsValues[j * numPDEs + i] = 1.0; } } @@ -217,8 +218,9 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg RCP X = VectorFactory::Build(map); RCP B = VectorFactory::Build(map); - X->putScalar((Scalar) 0.0); - B->setSeed(846930886); B->randomize(); + X->putScalar((Scalar)0.0); + B->setSeed(846930886); + B->randomize(); // AMG as a standalone solver H->IsPreconditioner(false); @@ -237,31 +239,32 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // probably a fix necessary in EpetraOperator (which only supports // SERIAL or OPENMP, but not PTHREAD of course). - if (xpetraParameters.GetLib() == Xpetra::UseEpetra) { //TODO: should be doable with Tpetra too + if (xpetraParameters.GetLib() == Xpetra::UseEpetra) { // TODO: should be doable with Tpetra too // AMG as a preconditioner - //TODO: name mueluPrec and mlPrec not + // TODO: name mueluPrec and mlPrec not H->IsPreconditioner(true); - MueLu::EpetraOperator mueluPrec(H); // Wrap MueLu preconditioner into an Epetra Operator + MueLu::EpetraOperator mueluPrec(H); // Wrap MueLu preconditioner into an Epetra Operator // // Solve Ax = b // - RCP eA; //duplicate code - { // TODO: simplify this - RCP xCrsOp = Teuchos::rcp_dynamic_cast(A, true); - RCP xCrsMtx = xCrsOp->getCrsMatrix(); - RCP eCrsMtx = Teuchos::rcp_dynamic_cast(xCrsMtx, true); - eA = eCrsMtx->getEpetra_CrsMatrixNonConst(); + RCP eA; // duplicate code + { // TODO: simplify this + RCP xCrsOp = Teuchos::rcp_dynamic_cast(A, true); + RCP xCrsMtx = xCrsOp->getCrsMatrix(); + RCP eCrsMtx = Teuchos::rcp_dynamic_cast(xCrsMtx, true); + eA = eCrsMtx->getEpetra_CrsMatrixNonConst(); } RCP eX = rcp(new Epetra_Vector(eA->RowMap())); RCP eB = rcp(new Epetra_Vector(eA->RowMap())); - eX->PutScalar((Scalar) 0.0); - eB->SetSeed(846930886); eB->Random(); + eX->PutScalar((Scalar)0.0); + eB->SetSeed(846930886); + eB->Random(); Epetra_LinearProblem eProblem(eA.get(), eX.get(), eB.get()); @@ -273,9 +276,9 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg solver.Iterate(nIts, 1e-10); - { //TODO: simplify this - RCP mueluX = rcp(new Xpetra::EpetraVectorT(eX)); - RCP mueluB = rcp(new Xpetra::EpetraVectorT(eB)); + { // TODO: simplify this + RCP mueluX = rcp(new Xpetra::EpetraVectorT(eX)); + RCP mueluB = rcp(new Xpetra::EpetraVectorT(eB)); // Print relative residual norm typename Teuchos::ScalarTraits::magnitudeType residualNorms2 = Utilities::ResidualNorm(*A, *mueluX, *mueluB)[0]; if (comm->getRank() == 0) @@ -284,30 +287,36 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // TODO: AMG as a preconditioner (AZ_cg) } -#endif // HAVE_MUELU_AZTECOO - } // if (translatedmuelu) +#endif // HAVE_MUELU_AZTECOO + } // if (translatedmuelu) #if defined(HAVE_MUELU_ML) && defined(HAVE_MUELU_EPETRA) && defined(HAVE_MUELU_AZTECOO) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP) && !defined(HAVE_MUELU_SYCL) if (ml) { - - std::cout << std::endl << std::endl << std::endl << std::endl << "**** ML ml ML ml ML" << std::endl << std::endl << std::endl << std::endl; + std::cout << std::endl + << std::endl + << std::endl + << std::endl + << "**** ML ml ML ml ML" << std::endl + << std::endl + << std::endl + << std::endl; // // Construct a multigrid preconditioner // // Multigrid Hierarchy - RCP crsOp = Teuchos::rcp_dynamic_cast(A, true); - RCP crsMtx = crsOp->getCrsMatrix(); - RCP epetraCrsMtx = Teuchos::rcp_dynamic_cast(crsMtx, true); + RCP crsOp = Teuchos::rcp_dynamic_cast(A, true); + RCP crsMtx = crsOp->getCrsMatrix(); + RCP epetraCrsMtx = Teuchos::rcp_dynamic_cast(crsMtx, true); RCP epetra_CrsMtx = epetraCrsMtx->getEpetra_CrsMatrix(); RCP eA; - { // TODO: simplify this - RCP xCrsOp = Teuchos::rcp_dynamic_cast(A, true); - RCP xCrsMtx = xCrsOp->getCrsMatrix(); - RCP eCrsMtx = Teuchos::rcp_dynamic_cast(xCrsMtx, true); - eA = eCrsMtx->getEpetra_CrsMatrixNonConst(); + { // TODO: simplify this + RCP xCrsOp = Teuchos::rcp_dynamic_cast(A, true); + RCP xCrsMtx = xCrsOp->getCrsMatrix(); + RCP eCrsMtx = Teuchos::rcp_dynamic_cast(xCrsMtx, true); + eA = eCrsMtx->getEpetra_CrsMatrixNonConst(); } RCP mlPrec = rcp(new ML_Epetra::MultiLevelPreconditioner(*eA, *params)); @@ -319,8 +328,9 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg RCP eX = rcp(new Epetra_Vector(eA->RowMap())); RCP eB = rcp(new Epetra_Vector(eA->RowMap())); - eX->PutScalar((Scalar) 0.0); - eB->SetSeed(846930886); eB->Random(); + eX->PutScalar((Scalar)0.0); + eB->SetSeed(846930886); + eB->Random(); Epetra_LinearProblem eProblem(eA.get(), eX.get(), eB.get()); @@ -332,9 +342,9 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg solver.Iterate(nIts, 1e-10); - { //TODO: simplify this - RCP mueluX = rcp(new Xpetra::EpetraVectorT(eX)); - RCP mueluB = rcp(new Xpetra::EpetraVectorT(eB)); + { // TODO: simplify this + RCP mueluX = rcp(new Xpetra::EpetraVectorT(eX)); + RCP mueluB = rcp(new Xpetra::EpetraVectorT(eB)); // Print relative residual norm typename Teuchos::ScalarTraits::magnitudeType residualNorms = Utilities::ResidualNorm(*A, *mueluX, *mueluB)[0]; if (comm->getRank() == 0) @@ -342,28 +352,26 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg } std::cout << "Parameter list after ML run" << std::endl; - const Teuchos::ParameterList & paramsAfterML = mlPrec->GetList(); + const Teuchos::ParameterList ¶msAfterML = mlPrec->GetList(); std::cout << paramsAfterML << std::endl; - } // if (ml) - -#endif // HAVE_MUELU_ML && HAVE_MUELU_EPETRA + } // if (ml) +#endif // HAVE_MUELU_ML && HAVE_MUELU_EPETRA success = true; } TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); - return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); + return (success ? EXIT_SUCCESS : EXIT_FAILURE); } - -int main(int argc, char* argv[]) { +int main(int argc, char *argv[]) { bool success = false; bool verbose = true; - Teuchos::GlobalMPISession mpiSession(&argc,&argv); - Kokkos::initialize(argc,argv); + Teuchos::GlobalMPISession mpiSession(&argc, &argv); + Kokkos::initialize(argc, argv); try { const bool throwExceptions = false; const bool recogniseAllOptions = false; @@ -371,20 +379,21 @@ int main(int argc, char* argv[]) { Teuchos::CommandLineProcessor clp(throwExceptions, recogniseAllOptions); Xpetra::Parameters xpetraParameters(clp); - std::string node = ""; clp.setOption("node", &node, "node type (serial | openmp | cuda | hip)"); + std::string node = ""; + clp.setOption("node", &node, "node type (serial | openmp | cuda | hip)"); switch (clp.parse(argc, argv, NULL)) { - case Teuchos::CommandLineProcessor::PARSE_ERROR: return EXIT_FAILURE; + case Teuchos::CommandLineProcessor::PARSE_ERROR: return EXIT_FAILURE; case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } Xpetra::UnderlyingLib lib = xpetraParameters.GetLib(); if (lib == Xpetra::UseEpetra) { #ifdef HAVE_MUELU_EPETRA - return main_(clp, lib, argc, argv); + return main_(clp, lib, argc, argv); #else throw MueLu::Exceptions::RuntimeError("Epetra is not available"); #endif @@ -396,5 +405,5 @@ int main(int argc, char* argv[]) { TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); Kokkos::finalize(); - return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); + return (success ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/packages/muelu/example/advanced/blockcrs/BlockCrs.cpp b/packages/muelu/example/advanced/blockcrs/BlockCrs.cpp index 55487c018a5a..157177172ed5 100644 --- a/packages/muelu/example/advanced/blockcrs/BlockCrs.cpp +++ b/packages/muelu/example/advanced/blockcrs/BlockCrs.cpp @@ -79,7 +79,6 @@ using Teuchos::RCP; using Teuchos::rcp; - //---------------------------------------------------------------------------------------------------------- // // This example demonstrates how to use MueLu in a fashion that looks like ML's LevelWrap @@ -94,158 +93,155 @@ const std::string thinSeparator = "-------------------------------------------- const std::string prefSeparator = "====================================="; - namespace MueLuExamples { - template - typename Teuchos::ScalarTraits::magnitudeType diff_vectors( - const Xpetra::Vector & X, - const Xpetra::Vector & Y) { - RCP > diff = Xpetra::VectorFactory::Build(X.getMap()); - diff->update(1.0,X,-1.0,Y,0.0); - typedef typename Teuchos::ScalarTraits::magnitudeType mgn; - Teuchos::Array mt(1); - diff->norm2(mt); - return mt[0]; - } +template +typename Teuchos::ScalarTraits::magnitudeType diff_vectors( + const Xpetra::Vector &X, + const Xpetra::Vector &Y) { + RCP > diff = Xpetra::VectorFactory::Build(X.getMap()); + diff->update(1.0, X, -1.0, Y, 0.0); + typedef typename Teuchos::ScalarTraits::magnitudeType mgn; + Teuchos::Array mt(1); + diff->norm2(mt); + return mt[0]; +} - template - typename Teuchos::ScalarTraits::magnitudeType compute_resid_norm( - const Xpetra::Matrix & A, - const Xpetra::Vector & X, - const Xpetra::Vector & B) { - RCP > temp = Xpetra::VectorFactory::Build(X.getMap()); - A.apply(X,*temp); - temp->update(1.0,B,-1.0); - typedef typename Teuchos::ScalarTraits::magnitudeType mgn; - Teuchos::Array mt(1); - temp->norm2(mt); - return mt[0]; - } +template +typename Teuchos::ScalarTraits::magnitudeType compute_resid_norm( + const Xpetra::Matrix &A, + const Xpetra::Vector &X, + const Xpetra::Vector &B) { + RCP > temp = Xpetra::VectorFactory::Build(X.getMap()); + A.apply(X, *temp); + temp->update(1.0, B, -1.0); + typedef typename Teuchos::ScalarTraits::magnitudeType mgn; + Teuchos::Array mt(1); + temp->norm2(mt); + return mt[0]; +} // -------------------------------------------------------------------------------------- - template - void solve_system_belos( - RCP > & A, - RCP > & X, - RCP > & B, - Teuchos::ParameterList & MueLuList, - const std::string & belos_solver, - RCP & SList) { - using Teuchos::RCP; - using Teuchos::rcp; - typedef Tpetra::Operator Tpetra_Operator; - typedef Tpetra::CrsMatrix Tpetra_CrsMatrix; - typedef Tpetra::BlockCrsMatrix Tpetra_BlockCrsMatrix; - typedef Xpetra::TpetraBlockCrsMatrix Xpetra_TpetraBlockCrsMatrix; - typedef Tpetra::Vector Tpetra_Vector; - typedef Tpetra::MultiVector Tpetra_MultiVector; - - RCP At = MueLu::Utilities::Op2NonConstTpetraRow(A); - RCP Mt = MueLu::CreateTpetraPreconditioner(At,MueLuList); - RCP Xt = Xpetra::toTpetra(*X); - RCP Bt = Xpetra::toTpetra(*B); - - if(Xt.is_null() || Bt.is_null() || At.is_null() || Mt.is_null()) throw std::runtime_error("ERROR: Xpetra to Tpetra conversion failed"); - - typedef Tpetra_MultiVector MV; - typedef Tpetra_Operator OP; - RCP > belosProblem = rcp(new Belos::LinearProblem(At, Xt, Bt)); - belosProblem->setLeftPrec(Mt); - belosProblem->setProblem(Xt,Bt); - - Belos::SolverFactory BelosFactory; - Teuchos::RCP > BelosSolver = BelosFactory.create(belos_solver, SList); - BelosSolver->setProblem(belosProblem); - BelosSolver->solve(); - } - - // -------------------------------------------------------------------------------------- - template - void solve_system_ifpack2( - RCP > & A, - RCP > & X, - RCP > & B, - const std::string & ifpack2_solver, - Teuchos::ParameterList & Ifpack2List) { - using Teuchos::RCP; - using Teuchos::rcp; - typedef Tpetra::Operator Tpetra_Operator; - typedef Ifpack2::Preconditioner Ifpack2_Preconditioner; - typedef Tpetra::CrsMatrix Tpetra_CrsMatrix; - typedef Tpetra::RowMatrix Tpetra_RowMatrix; - typedef Tpetra::BlockCrsMatrix Tpetra_BlockCrsMatrix; - typedef Xpetra::TpetraBlockCrsMatrix Xpetra_TpetraBlockCrsMatrix; - typedef Tpetra::Vector Tpetra_Vector; - typedef Tpetra::MultiVector Tpetra_MultiVector; - - RCP At = MueLu::Utilities::Op2NonConstTpetraRow(A); - RCP Xt = Xpetra::toTpetra(*X); - RCP Bt = Xpetra::toTpetra(*B); - - RCP Solver = Ifpack2::Factory::create(ifpack2_solver,At); - Solver->setParameters(Ifpack2List); - Solver->initialize(); - Solver->compute(); - - Solver->apply(*Bt,*Xt); - } +template +void solve_system_belos( + RCP > &A, + RCP > &X, + RCP > &B, + Teuchos::ParameterList &MueLuList, + const std::string &belos_solver, + RCP &SList) { + using Teuchos::RCP; + using Teuchos::rcp; + typedef Tpetra::Operator Tpetra_Operator; + typedef Tpetra::CrsMatrix Tpetra_CrsMatrix; + typedef Tpetra::BlockCrsMatrix Tpetra_BlockCrsMatrix; + typedef Xpetra::TpetraBlockCrsMatrix Xpetra_TpetraBlockCrsMatrix; + typedef Tpetra::Vector Tpetra_Vector; + typedef Tpetra::MultiVector Tpetra_MultiVector; + + RCP At = MueLu::Utilities::Op2NonConstTpetraRow(A); + RCP Mt = MueLu::CreateTpetraPreconditioner(At, MueLuList); + RCP Xt = Xpetra::toTpetra(*X); + RCP Bt = Xpetra::toTpetra(*B); + + if (Xt.is_null() || Bt.is_null() || At.is_null() || Mt.is_null()) throw std::runtime_error("ERROR: Xpetra to Tpetra conversion failed"); + + typedef Tpetra_MultiVector MV; + typedef Tpetra_Operator OP; + RCP > belosProblem = rcp(new Belos::LinearProblem(At, Xt, Bt)); + belosProblem->setLeftPrec(Mt); + belosProblem->setProblem(Xt, Bt); + + Belos::SolverFactory BelosFactory; + Teuchos::RCP > BelosSolver = BelosFactory.create(belos_solver, SList); + BelosSolver->setProblem(belosProblem); + BelosSolver->solve(); +} - // -------------------------------------------------------------------------------------- - // This routine generate's the user's original A matrix and nullspace - template - void generate_user_matrix_and_nullspace(std::string &matrixType, Xpetra::UnderlyingLib & lib,Teuchos::ParameterList &galeriList, RCP > &comm, RCP > & A, RCP > & nullspace){ - using Teuchos::RCP; +// -------------------------------------------------------------------------------------- +template +void solve_system_ifpack2( + RCP > &A, + RCP > &X, + RCP > &B, + const std::string &ifpack2_solver, + Teuchos::ParameterList &Ifpack2List) { + using Teuchos::RCP; + using Teuchos::rcp; + typedef Tpetra::Operator Tpetra_Operator; + typedef Ifpack2::Preconditioner Ifpack2_Preconditioner; + typedef Tpetra::CrsMatrix Tpetra_CrsMatrix; + typedef Tpetra::RowMatrix Tpetra_RowMatrix; + typedef Tpetra::BlockCrsMatrix Tpetra_BlockCrsMatrix; + typedef Xpetra::TpetraBlockCrsMatrix Xpetra_TpetraBlockCrsMatrix; + typedef Tpetra::Vector Tpetra_Vector; + typedef Tpetra::MultiVector Tpetra_MultiVector; + + RCP At = MueLu::Utilities::Op2NonConstTpetraRow(A); + RCP Xt = Xpetra::toTpetra(*X); + RCP Bt = Xpetra::toTpetra(*B); + + RCP Solver = Ifpack2::Factory::create(ifpack2_solver, At); + Solver->setParameters(Ifpack2List); + Solver->initialize(); + Solver->compute(); + + Solver->apply(*Bt, *Xt); +} - RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& out = *fancy; - - typedef typename Xpetra::Map map_type; - typedef typename Xpetra::MultiVector multivector_type; - typedef typename Xpetra::MultiVector::magnitudeType,LocalOrdinal,GlobalOrdinal,Node> realvaluedmultivector_type; - typedef typename Xpetra::CrsMatrixWrap matrixwrap_type; - RCP map; - RCP coordinates; - if (matrixType == "Laplace1D") { - map = Galeri::Xpetra::CreateMap(lib, "Cartesian1D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", map, galeriList); - - } else if (matrixType == "Laplace2D" || matrixType == "Star2D" || matrixType == "BigStar2D" || matrixType == "Elasticity2D") { - map = Galeri::Xpetra::CreateMap(lib, "Cartesian2D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", map, galeriList); - - } else if (matrixType == "Laplace3D" || matrixType == "Brick3D" || matrixType == "Elasticity3D") { - map = Galeri::Xpetra::CreateMap(lib, "Cartesian3D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", map, galeriList); - } +// -------------------------------------------------------------------------------------- +// This routine generate's the user's original A matrix and nullspace +template +void generate_user_matrix_and_nullspace(std::string &matrixType, Xpetra::UnderlyingLib &lib, Teuchos::ParameterList &galeriList, RCP > &comm, RCP > &A, RCP > &nullspace) { + using Teuchos::RCP; - // Expand map to do multiple DOF per node for block problems - if (matrixType == "Elasticity2D" || matrixType == "Elasticity3D") - map = Xpetra::MapFactory::Build(map, (matrixType == "Elasticity2D" ? 2 : 3)); + RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + Teuchos::FancyOStream &out = *fancy; + + typedef typename Xpetra::Map map_type; + typedef typename Xpetra::MultiVector multivector_type; + typedef typename Xpetra::MultiVector::magnitudeType, LocalOrdinal, GlobalOrdinal, Node> realvaluedmultivector_type; + typedef typename Xpetra::CrsMatrixWrap matrixwrap_type; + RCP map; + RCP coordinates; + if (matrixType == "Laplace1D") { + map = Galeri::Xpetra::CreateMap(lib, "Cartesian1D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", map, galeriList); + + } else if (matrixType == "Laplace2D" || matrixType == "Star2D" || matrixType == "BigStar2D" || matrixType == "Elasticity2D") { + map = Galeri::Xpetra::CreateMap(lib, "Cartesian2D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", map, galeriList); + + } else if (matrixType == "Laplace3D" || matrixType == "Brick3D" || matrixType == "Elasticity3D") { + map = Galeri::Xpetra::CreateMap(lib, "Cartesian3D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", map, galeriList); + } - out << "Processor subdomains in x direction: " << galeriList.get("mx") << std::endl - << "Processor subdomains in y direction: " << galeriList.get("my") << std::endl - << "Processor subdomains in z direction: " << galeriList.get("mz") << std::endl - << "========================================================" << std::endl; + // Expand map to do multiple DOF per node for block problems + if (matrixType == "Elasticity2D" || matrixType == "Elasticity3D") + map = Xpetra::MapFactory::Build(map, (matrixType == "Elasticity2D" ? 2 : 3)); - RCP > Pr = - Galeri::Xpetra::BuildProblem(matrixType, map, galeriList); + out << "Processor subdomains in x direction: " << galeriList.get("mx") << std::endl + << "Processor subdomains in y direction: " << galeriList.get("my") << std::endl + << "Processor subdomains in z direction: " << galeriList.get("mz") << std::endl + << "========================================================" << std::endl; - A = Pr->BuildMatrix(); + RCP > Pr = + Galeri::Xpetra::BuildProblem(matrixType, map, galeriList); - if (matrixType == "Elasticity2D" || matrixType == "Elasticity3D") { - nullspace = Pr->BuildNullspace(); - A->SetFixedBlockSize((matrixType == "Elasticity2D") ? 2 : 3); - } + A = Pr->BuildMatrix(); + if (matrixType == "Elasticity2D" || matrixType == "Elasticity3D") { + nullspace = Pr->BuildNullspace(); + A->SetFixedBlockSize((matrixType == "Elasticity2D") ? 2 : 3); } - } +} // namespace MueLuExamples // -------------------------------------------------------------------------------------- -//int main(int argc, char *argv[]) { -template +// int main(int argc, char *argv[]) { +template int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int argc, char *argv[]) { #include using Teuchos::RCP; @@ -257,47 +253,49 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg RCP > comm = Teuchos::DefaultComm::getComm(); RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& out = *fancy; + Teuchos::FancyOStream &out = *fancy; typedef Teuchos::ScalarTraits STS; // ========================================================================= // Parameters initialization // ========================================================================= - //Teuchos::CommandLineProcessor clp(false); + // Teuchos::CommandLineProcessor clp(false); GO nx = 100, ny = 100, nz = 100; - Galeri::Xpetra::Parameters galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra - std::string matFileName = ""; clp.setOption("matrix",&matFileName,"read matrix from a file"); - LO blocksize = 1; clp.setOption("blocksize",&blocksize,"block size"); + Galeri::Xpetra::Parameters galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra + std::string matFileName = ""; + clp.setOption("matrix", &matFileName, "read matrix from a file"); + LO blocksize = 1; + clp.setOption("blocksize", &blocksize, "block size"); switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } - //Xpetra::UnderlyingLib lib = xpetraParameters.GetLib(); + // Xpetra::UnderlyingLib lib = xpetraParameters.GetLib(); Teuchos::ParameterList galeriList = galeriParameters.GetParameterList(); - if(lib!=Xpetra::UseTpetra) + if (lib != Xpetra::UseTpetra) throw std::runtime_error("This test only works with Tpetra linear algebra"); // ========================================================================= // Problem construction // ========================================================================= - RCP map; + RCP map; RCP A; RCP nullspace; - typedef Tpetra::CrsMatrix Tpetra_CrsMatrix; - typedef Tpetra::Operator Tpetra_Operator; - typedef Tpetra::BlockCrsMatrix Tpetra_BlockCrsMatrix; - typedef Xpetra::TpetraBlockCrsMatrix Xpetra_TpetraBlockCrsMatrix; - typedef Xpetra::CrsMatrix Xpetra_CrsMatrix; - typedef Xpetra::CrsMatrixWrap Xpetra_CrsMatrixWrap; + typedef Tpetra::CrsMatrix Tpetra_CrsMatrix; + typedef Tpetra::Operator Tpetra_Operator; + typedef Tpetra::BlockCrsMatrix Tpetra_BlockCrsMatrix; + typedef Xpetra::TpetraBlockCrsMatrix Xpetra_TpetraBlockCrsMatrix; + typedef Xpetra::CrsMatrix Xpetra_CrsMatrix; + typedef Xpetra::CrsMatrixWrap Xpetra_CrsMatrixWrap; typedef typename Teuchos::ScalarTraits::magnitudeType SCN; SC one = Teuchos::ScalarTraits::one(); @@ -305,94 +303,95 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg RCP Acrs; RCP Ablock; - if(matFileName.length() > 0) { + if (matFileName.length() > 0) { // Read matrix from disk - out << thickSeparator << std::endl << "Reading matrix from disk" < reader_type; - Acrs = reader_type::readSparseFile(matFileName,comm); - } - else{ + Acrs = reader_type::readSparseFile(matFileName, comm); + } else { // Use Galeri - out << thickSeparator << std::endl << xpetraParameters << galeriParameters; + out << thickSeparator << std::endl + << xpetraParameters << galeriParameters; std::string matrixType = galeriParameters.GetMatrixType(); - RCP > Axp; - MueLuExamples::generate_user_matrix_and_nullspace(matrixType,lib,galeriList,comm,Axp,nullspace); - Acrs = Xpetra::Helpers::Op2NonConstTpetraCrs(Axp); + RCP > Axp; + MueLuExamples::generate_user_matrix_and_nullspace(matrixType, lib, galeriList, comm, Axp, nullspace); + Acrs = Xpetra::Helpers::Op2NonConstTpetraCrs(Axp); } // Block this bad boy - Ablock = Tpetra::convertToBlockCrsMatrix(*Acrs,blocksize); + Ablock = Tpetra::convertToBlockCrsMatrix(*Acrs, blocksize); // Now wrap BlockCrs to Xpetra::Matrix RCP Axt = rcp(new Xpetra_TpetraBlockCrsMatrix(Ablock)); - A = rcp(new Xpetra_CrsMatrixWrap(Axt)); + A = rcp(new Xpetra_CrsMatrixWrap(Axt)); // ========================================================================= // Setups and solves // ========================================================================= - map=Xpetra::toXpetra(Acrs->getRowMap()); + map = Xpetra::toXpetra(Acrs->getRowMap()); RCP X1 = VectorFactory::Build(map); RCP X2 = VectorFactory::Build(map); - RCP B = VectorFactory::Build(map); + RCP B = VectorFactory::Build(map); B->setSeed(846930886); B->randomize(); RCP tm; // Belos Options - RCP SList = rcp(new Teuchos::ParameterList ); - SList->set("Verbosity",Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); - SList->set("Output Frequency",10); - SList->set("Output Style",Belos::Brief); - SList->set("Maximum Iterations",10); - SList->set("Convergence Tolerance",5e-2); + RCP SList = rcp(new Teuchos::ParameterList); + SList->set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); + SList->set("Output Frequency", 10); + SList->set("Output Style", Belos::Brief); + SList->set("Maximum Iterations", 10); + SList->set("Convergence Tolerance", 5e-2); // ========================================================================= // Solve #1 (fixed point + Jacobi) // ========================================================================= out << thickSeparator << std::endl; - out << prefSeparator << " Solve 1: Fixed Point + Jacobi"<< prefSeparator <(A,X1,B,MueList,belos_solver,SList); + MueLuExamples::solve_system_belos(A, X1, B, MueList, belos_solver, SList); std::cout << "I" << std::endl; - SCN result = MueLuExamples::compute_resid_norm(*A,*X1,*B); - out<<"Solve #1: Residual Norm = "<(*A, *X1, *B); + out << "Solve #1: Residual Norm = " << result << std::endl; } // ========================================================================= // Solve #2 (striaght up Jacobi) // ========================================================================= out << thickSeparator << std::endl; - out << prefSeparator << " Solve 2: Fixed Jacobi"<< prefSeparator <(*A,*X2,*B); - out<<"Solve #2: Residual Norm = "<(*A, *X2, *B); + out << "Solve #2: Residual Norm = " << result << std::endl; } // Compare 1 & 2 - SCN norm = MueLuExamples::diff_vectors(*X1,*X2); - if(norm > 1e-10) { - out<<"ERROR: Norm of Solve #1 and Solve #2 differs by "<(*X1, *X2); + if (norm > 1e-10) { + out << "ERROR: Norm of Solve #1 and Solve #2 differs by " << norm << std::endl; + success = false; } - }//end try + } // end try TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); - return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); + return (success ? EXIT_SUCCESS : EXIT_FAILURE); } //- -- -------------------------------------------------------- @@ -400,7 +399,5 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg #include "MueLu_Test_ETI.hpp" int main(int argc, char *argv[]) { - return Automatic_Test_ETI(argc,argv); + return Automatic_Test_ETI(argc, argv); } - - diff --git a/packages/muelu/example/advanced/clone/Clone-Tpetra.cpp b/packages/muelu/example/advanced/clone/Clone-Tpetra.cpp index 46e701a4ccd9..8334ea68e3a0 100644 --- a/packages/muelu/example/advanced/clone/Clone-Tpetra.cpp +++ b/packages/muelu/example/advanced/clone/Clone-Tpetra.cpp @@ -61,7 +61,7 @@ #include #include #include -#include // TODO: move into MueLu.hpp +#include // TODO: move into MueLu.hpp #include "MueLu_SmootherFactory.hpp" #include "MueLu_TrilinosSmoother.hpp" #include "MueLu_Ifpack2Smoother.hpp" @@ -74,15 +74,15 @@ #include #include #include -#include // => This header defines Belos::XpetraOp -#include // => This header defines Belos::MueLuOp +#include // => This header defines Belos::XpetraOp +#include // => This header defines Belos::MueLuOp #endif -int main(int argc, char *argv[]) { - #include - #include +int main(int argc, char* argv[]) { +#include +#include - using Teuchos::RCP; // reference count pointers + using Teuchos::RCP; // reference count pointers using Teuchos::rcp; using Teuchos::TimeMonitor; @@ -90,7 +90,7 @@ int main(int argc, char *argv[]) { // MPI initialization using Teuchos // ========================================================================= Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL); - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); + RCP > comm = Teuchos::DefaultComm::getComm(); // ========================================================================= // Convenient definitions @@ -99,38 +99,46 @@ int main(int argc, char *argv[]) { // Instead of checking each time for rank, create a rank 0 stream RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& fancyout = *fancy; + Teuchos::FancyOStream& fancyout = *fancy; fancyout.setOutputToRootOnly(0); - // ========================================================================= // Parameters initialization // ========================================================================= Teuchos::CommandLineProcessor clp(false); GO nx = 100, ny = 100, nz = 100; - Galeri::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra - - std::string xmlFileName = "scalingTest.xml"; clp.setOption("xml", &xmlFileName, "read parameters from a file. Otherwise, this example uses by default 'scalingTest.xml'"); - int amgAsPrecond = 1; clp.setOption("precond", &amgAsPrecond, "apply multigrid as preconditioner"); - int amgAsSolver = 0; clp.setOption("fixPoint", &amgAsSolver, "apply multigrid as solver"); - bool printTimings = true; clp.setOption("timings", "notimings", &printTimings, "print timings to screen"); - int writeMatricesOPT = -2; clp.setOption("write", &writeMatricesOPT, "write matrices to file (-1 means all; i>=0 means level i)"); - double tol = 1e-12; clp.setOption("tol", &tol, "solver convergence tolerance"); - std::string krylovMethod = "cg"; clp.setOption("krylov", &krylovMethod, "outer Krylov method"); - std::string optSmooType = "cheby"; clp.setOption("smooType", &optSmooType, "smoother type ('l1-sgs', 'sgs 'or 'cheby')"); - int optSweeps = 2; clp.setOption("sweeps", &optSweeps, "sweeps to be used in SGS (or Chebyshev degree)"); - - - switch (clp.parse(argc,argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + Galeri::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra + + std::string xmlFileName = "scalingTest.xml"; + clp.setOption("xml", &xmlFileName, "read parameters from a file. Otherwise, this example uses by default 'scalingTest.xml'"); + int amgAsPrecond = 1; + clp.setOption("precond", &amgAsPrecond, "apply multigrid as preconditioner"); + int amgAsSolver = 0; + clp.setOption("fixPoint", &amgAsSolver, "apply multigrid as solver"); + bool printTimings = true; + clp.setOption("timings", "notimings", &printTimings, "print timings to screen"); + int writeMatricesOPT = -2; + clp.setOption("write", &writeMatricesOPT, "write matrices to file (-1 means all; i>=0 means level i)"); + double tol = 1e-12; + clp.setOption("tol", &tol, "solver convergence tolerance"); + std::string krylovMethod = "cg"; + clp.setOption("krylov", &krylovMethod, "outer Krylov method"); + std::string optSmooType = "cheby"; + clp.setOption("smooType", &optSmooType, "smoother type ('l1-sgs', 'sgs 'or 'cheby')"); + int optSweeps = 2; + clp.setOption("sweeps", &optSweeps, "sweeps to be used in SGS (or Chebyshev degree)"); + + switch (clp.parse(argc, argv)) { + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } - fancyout << "========================================================\n" << xpetraParameters << matrixParameters; + fancyout << "========================================================\n" + << xpetraParameters << matrixParameters; // ========================================================================= // Problem construction @@ -140,7 +148,7 @@ int main(int argc, char *argv[]) { comm->barrier(); tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 1 - Matrix Build"))); - RCP map; + RCP map; RCP coordinates; // Retrieve matrix parameters (they may have been changed on the command line), and pass them to Galeri. @@ -155,9 +163,9 @@ int main(int argc, char *argv[]) { // If you don't want Galeri to do this, specify mx or my on the galeriList. Teuchos::ParameterList pl = matrixParameters.GetParameterList(); Teuchos::ParameterList galeriList; - galeriList.set("nx", pl.get("nx",nx)); - galeriList.set("ny", pl.get("ny",ny)); - galeriList.set("nz", pl.get("nz",nz)); + galeriList.set("nx", pl.get("nx", nx)); + galeriList.set("ny", pl.get("ny", ny)); + galeriList.set("nz", pl.get("nz", nz)); // galeriList.set("mx", comm->getSize()); // galeriList.set("my", 1); @@ -165,31 +173,29 @@ int main(int argc, char *argv[]) { // In the future, we hope to be able to first create a Galeri problem, and then request map and coordinates from it // At the moment, however, things are fragile as we hope that the Problem uses same map and coordinates inside if (matrixParameters.GetMatrixType() == "Laplace1D") { - map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian1D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D",map,matrixParameters.GetParameterList()); - } - else if (matrixParameters.GetMatrixType() == "Laplace2D" || matrixParameters.GetMatrixType() == "Star2D" || matrixParameters.GetMatrixType() == "Elasticity2D") { - map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian2D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D",map,matrixParameters.GetParameterList()); - } - else if (matrixParameters.GetMatrixType() == "Laplace3D" || matrixParameters.GetMatrixType() == "Elasticity3D") { - map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian3D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D",map,matrixParameters.GetParameterList()); + map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian1D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", map, matrixParameters.GetParameterList()); + } else if (matrixParameters.GetMatrixType() == "Laplace2D" || matrixParameters.GetMatrixType() == "Star2D" || matrixParameters.GetMatrixType() == "Elasticity2D") { + map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian2D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", map, matrixParameters.GetParameterList()); + } else if (matrixParameters.GetMatrixType() == "Laplace3D" || matrixParameters.GetMatrixType() == "Elasticity3D") { + map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian3D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", map, matrixParameters.GetParameterList()); } // Expand map to do multiple DOF per node for block problems if (matrixParameters.GetMatrixType() == "Elasticity2D") - map = Xpetra::MapFactory::Build(map, 2); + map = Xpetra::MapFactory::Build(map, 2); if (matrixParameters.GetMatrixType() == "Elasticity3D") - map = Xpetra::MapFactory::Build(map, 3); + map = Xpetra::MapFactory::Build(map, 3); if (comm->getRank() == 0) { GO mx = galeriList.get("mx", -1); GO my = galeriList.get("my", -1); GO mz = galeriList.get("mz", -1); fancyout << "Processor subdomains in x direction: " << mx << std::endl - << "Processor subdomains in y direction: " << my << std::endl - << "Processor subdomains in z direction: " << mz << std::endl - << "========================================================" << std::endl; + << "Processor subdomains in y direction: " << my << std::endl + << "Processor subdomains in z direction: " << mz << std::endl + << "========================================================" << std::endl; } Teuchos::ParameterList matrixParams = matrixParameters.GetParameterList(); @@ -198,18 +204,18 @@ int main(int argc, char *argv[]) { matrixParams.set("mz", galeriList.get("mz", -1)); if (matrixParameters.GetMatrixType() == "Elasticity2D" || matrixParameters.GetMatrixType() == "Elasticity3D") { // Our default test case for elasticity: all boundaries of a square/cube have Neumann b.c. except left which has Dirichlet - matrixParams.set("right boundary" , "Neumann"); + matrixParams.set("right boundary", "Neumann"); matrixParams.set("bottom boundary", "Neumann"); - matrixParams.set("top boundary" , "Neumann"); - matrixParams.set("front boundary" , "Neumann"); - matrixParams.set("back boundary" , "Neumann"); + matrixParams.set("top boundary", "Neumann"); + matrixParams.set("front boundary", "Neumann"); + matrixParams.set("back boundary", "Neumann"); } - RCP > Pr = - Galeri::Xpetra::BuildProblem(matrixParameters.GetMatrixType(), map, matrixParams); + RCP > Pr = + Galeri::Xpetra::BuildProblem(matrixParameters.GetMatrixType(), map, matrixParams); RCP A = Pr->BuildMatrix(); - RCP nullspace = MultiVectorFactory::Build(map,1); + RCP nullspace = MultiVectorFactory::Build(map, 1); if (matrixParameters.GetMatrixType() == "Elasticity2D" || matrixParameters.GetMatrixType() == "Elasticity3D") { nullspace = Pr->BuildNullspace(); @@ -219,57 +225,52 @@ int main(int argc, char *argv[]) { nullspace->putScalar(one); } - fancyout << "Galeri complete.\n========================================================" << std::endl; // ========================================================================= // Preconditioner construction // ========================================================================= - //Multigrid Hierarchy + // Multigrid Hierarchy RCP H = rcp(new Hierarchy(A)); H->setDefaultVerbLevel(Teuchos::VERB_HIGH); FactoryManager M; - //Smoothers + // Smoothers std::string ifpackType; Teuchos::ParameterList ifpackList; - ifpackList.set("relaxation: sweeps", (LO) optSweeps); - ifpackList.set("relaxation: damping factor", (SC) 1.0); + ifpackList.set("relaxation: sweeps", (LO)optSweeps); + ifpackList.set("relaxation: damping factor", (SC)1.0); if (optSmooType == "sgs") { ifpackType = "RELAXATION"; ifpackList.set("relaxation: type", "Symmetric Gauss-Seidel"); - } - else if (optSmooType == "l1-sgs") { + } else if (optSmooType == "l1-sgs") { ifpackType = "RELAXATION"; ifpackList.set("relaxation: type", "Symmetric Gauss-Seidel"); ifpackList.set("relaxation: use l1", true); } else if (optSmooType == "cheby") { ifpackType = "CHEBYSHEV"; - ifpackList.set("chebyshev: degree", (LO) optSweeps); + ifpackList.set("chebyshev: degree", (LO)optSweeps); if (matrixParameters.GetMatrixType() == "Laplace1D") { - ifpackList.set("chebyshev: ratio eigenvalue", (SC) 3); - } - else if (matrixParameters.GetMatrixType() == "Laplace2D") { - ifpackList.set("chebyshev: ratio eigenvalue", (SC) 7); - } - else if (matrixParameters.GetMatrixType() == "Laplace3D") { - ifpackList.set("chebyshev: ratio eigenvalue", (SC) 20); + ifpackList.set("chebyshev: ratio eigenvalue", (SC)3); + } else if (matrixParameters.GetMatrixType() == "Laplace2D") { + ifpackList.set("chebyshev: ratio eigenvalue", (SC)7); + } else if (matrixParameters.GetMatrixType() == "Laplace3D") { + ifpackList.set("chebyshev: ratio eigenvalue", (SC)20); } } RCP smootherPrototype = rcp(new Ifpack2Smoother(ifpackType, ifpackList)); M.SetFactory("Smoother", rcp(new SmootherFactory(smootherPrototype))); - // create coarsest smoother RCP coarsestSmooProto; Teuchos::ParameterList coarsestSmooList; - coarsestSmooProto = rcp( new Ifpack2Smoother("RILUK",coarsestSmooList) ); + coarsestSmooProto = rcp(new Ifpack2Smoother("RILUK", coarsestSmooList)); RCP coarsestSmooFact = rcp(new SmootherFactory(coarsestSmooProto, Teuchos::null)); M.SetFactory("CoarseSolver", coarsestSmooFact); - int startLevel = 0; + int startLevel = 0; int optMaxLevels = 10; H->Setup(M, startLevel, optMaxLevels); @@ -292,29 +293,27 @@ int main(int argc, char *argv[]) { Teuchos::Array norms(1); B->norm2(norms); - B->scale(1.0/norms[0]); + B->scale(1.0 / norms[0]); X->putScalar(zero); } if (amgAsSolver) { - H->IsPreconditioner(false); H->Iterate(*B, *X, 25); } else if (amgAsPrecond) { - #ifdef HAVE_MUELU_BELOS // Operator and Multivector type that will be used with Belos - typedef MultiVector MV; + typedef MultiVector MV; typedef Belos::OperatorT OP; H->IsPreconditioner(true); // Define Operator and Preconditioner - Teuchos::RCP belosOp = Teuchos::rcp(new Belos::XpetraOp(A)); // Turns a Xpetra::Matrix object into a Belos operator - Teuchos::RCP belosPrec = Teuchos::rcp(new Belos::MueLuOp(H)); // Turns a MueLu::Hierarchy object into a Belos operator + Teuchos::RCP belosOp = Teuchos::rcp(new Belos::XpetraOp(A)); // Turns a Xpetra::Matrix object into a Belos operator + Teuchos::RCP belosPrec = Teuchos::rcp(new Belos::MueLuOp(H)); // Turns a MueLu::Hierarchy object into a Belos operator // Construct a Belos LinearProblem object - RCP< Belos::LinearProblem > belosProblem = rcp(new Belos::LinearProblem(belosOp, X, B)); + RCP > belosProblem = rcp(new Belos::LinearProblem(belosOp, X, B)); belosProblem->setLeftPrec(belosPrec); bool set = belosProblem->setProblem(); @@ -326,14 +325,14 @@ int main(int argc, char *argv[]) { // Belos parameter list int maxIts = 2000; Teuchos::ParameterList belosList; - belosList.set("Maximum Iterations", maxIts); // Maximum number of iterations allowed - belosList.set("Convergence Tolerance", tol); // Relative convergence tolerance requested - belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); - belosList.set("Output Frequency", 1); - belosList.set("Output Style", Belos::Brief); + belosList.set("Maximum Iterations", maxIts); // Maximum number of iterations allowed + belosList.set("Convergence Tolerance", tol); // Relative convergence tolerance requested + belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); + belosList.set("Output Frequency", 1); + belosList.set("Output Style", Belos::Brief); // Create an iterative solver manager - RCP< Belos::SolverManager > solver; + RCP > solver; if (krylovMethod == "cg") { solver = rcp(new Belos::BlockCGSolMgr(belosProblem, rcp(&belosList, false))); } else if (krylovMethod == "gmres") { @@ -350,41 +349,43 @@ int main(int argc, char *argv[]) { // Get the number of iterations for this solve. fancyout << "Number of iterations performed for this solve: " << solver->getNumIters() << std::endl; - } catch(...) { - fancyout << std::endl << "ERROR: Belos threw an error! " << std::endl; + } catch (...) { + fancyout << std::endl + << "ERROR: Belos threw an error! " << std::endl; } // Check convergence if (ret != Belos::Converged) - fancyout << std::endl << "ERROR: Belos did not converge! " << std::endl; + fancyout << std::endl + << "ERROR: Belos did not converge! " << std::endl; else - fancyout << std::endl << "SUCCESS: Belos converged!" << std::endl; + fancyout << std::endl + << "SUCCESS: Belos converged!" << std::endl; - //Clone the preconditioner to ThrustGPU node type + // Clone the preconditioner to ThrustGPU node type typedef KokkosClassic::ThrustGPUNode NO2; - typedef MueLu::Hierarchy Hierarchy2; - typedef Xpetra::MultiVector MV2; + typedef MueLu::Hierarchy Hierarchy2; + typedef Xpetra::MultiVector MV2; typedef Belos::OperatorT OP2; ParameterList plClone; plClone.set("Verbose", 1); - RCP node = rcp(new NO2(plClone)); + RCP node = rcp(new NO2(plClone)); RCP clonedH = H->clone(node); - //Clone A, X, B to new node type - RCP< Xpetra::Matrix > clonedA = Xpetra::clone(*A, node); - RCP< MV2 > clonedX = Xpetra::clone(*X, node); + // Clone A, X, B to new node type + RCP > clonedA = Xpetra::clone(*A, node); + RCP clonedX = Xpetra::clone(*X, node); clonedX->putScalar(zero); - RCP< MV2 > clonedB = Xpetra::clone(*B, node); + RCP clonedB = Xpetra::clone(*B, node); clonedH->IsPreconditioner(true); - // Define Operator and Preconditioner - Teuchos::RCP belosOp2 = Teuchos::rcp(new Belos::XpetraOp(clonedA)); // Turns a Xpetra::Matrix object into a Belos operator - Teuchos::RCP belosPrec2 = Teuchos::rcp(new Belos::MueLuOp(clonedH)); // Turns a MueLu::Hierarchy object into a Belos operator + Teuchos::RCP belosOp2 = Teuchos::rcp(new Belos::XpetraOp(clonedA)); // Turns a Xpetra::Matrix object into a Belos operator + Teuchos::RCP belosPrec2 = Teuchos::rcp(new Belos::MueLuOp(clonedH)); // Turns a MueLu::Hierarchy object into a Belos operator // Construct a Belos LinearProblem object - RCP< Belos::LinearProblem > belosProblem2 = rcp(new Belos::LinearProblem(belosOp2, clonedX, clonedB)); + RCP > belosProblem2 = rcp(new Belos::LinearProblem(belosOp2, clonedX, clonedB)); belosProblem2->setLeftPrec(belosPrec2); bool set2 = belosProblem2->setProblem(); @@ -393,7 +394,7 @@ int main(int argc, char *argv[]) { return EXIT_FAILURE; } // Create an iterative solver manager - RCP< Belos::SolverManager > solver2; + RCP > solver2; if (krylovMethod == "cg") { solver2 = rcp(new Belos::BlockCGSolMgr(belosProblem2, rcp(&belosList, false))); } else if (krylovMethod == "gmres") { @@ -401,30 +402,33 @@ int main(int argc, char *argv[]) { } else { TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "Invalid Krylov method. Options are \"cg\" or \" gmres\"."); } - //Perform solve + // Perform solve Belos::ReturnType ret2 = Belos::Unconverged; try { - ret2 = solver2->solve(); - // Get the number of iterations for this solve. - fancyout << "Number of iterations performed for this solve: " << solver2->getNumIters() << std::endl; - } catch(...) { - fancyout << std::endl << "ERROR: Belos threw an error! " << std::endl; + ret2 = solver2->solve(); + // Get the number of iterations for this solve. + fancyout << "Number of iterations performed for this solve: " << solver2->getNumIters() << std::endl; + } catch (...) { + fancyout << std::endl + << "ERROR: Belos threw an error! " << std::endl; } // Check convergence if (ret2 != Belos::Converged) - fancyout << std::endl << "ERROR: Belos did not converge! " << std::endl; + fancyout << std::endl + << "ERROR: Belos did not converge! " << std::endl; else - fancyout << std::endl << "SUCCESS: Belos converged!" << std::endl; + fancyout << std::endl + << "SUCCESS: Belos converged!" << std::endl; - //Determine if example passed + // Determine if example passed RCP defaultNode = - rcp (new Tpetra::KokkosClassic::DefaultNode::DefaultNodeType (pl)); - RCP clonedXcpu = Xpetra::clone (*clonedX, defaultNode); - clonedXcpu->update (1.0, *X, -1.0); + rcp(new Tpetra::KokkosClassic::DefaultNode::DefaultNodeType(pl)); + RCP clonedXcpu = Xpetra::clone(*clonedX, defaultNode); + clonedXcpu->update(1.0, *X, -1.0); Scalar norm; - clonedXcpu->norm2(Teuchos::arrayView(&norm,1)); - std::cout <<"\nNorm of serial node soln - ThrustGPU node soln = " - << norm << std::endl; + clonedXcpu->norm2(Teuchos::arrayView(&norm, 1)); + std::cout << "\nNorm of serial node soln - ThrustGPU node soln = " + << norm << std::endl; bool passed = false; if (norm <= Scalar(1e-10)) @@ -434,7 +438,7 @@ int main(int argc, char *argv[]) { else std::cout << "Example Failed!" << std::endl; } - #endif //ifdef HAVE_MUELU_BELOS +#endif // ifdef HAVE_MUELU_BELOS return 0; -} //main +} // main diff --git a/packages/muelu/example/advanced/levelwrap/LevelWrap.cpp b/packages/muelu/example/advanced/levelwrap/LevelWrap.cpp index b37d0b5a7822..b44f23b7d1f9 100644 --- a/packages/muelu/example/advanced/levelwrap/LevelWrap.cpp +++ b/packages/muelu/example/advanced/levelwrap/LevelWrap.cpp @@ -55,7 +55,7 @@ #include #include -#include // For Epetra only runs this points to FakeKokkos in Xpetra +#include // For Epetra only runs this points to FakeKokkos in Xpetra #include "Xpetra_ConfigDefs.hpp" #include @@ -95,139 +95,136 @@ const std::string prefSeparator = "====================================="; namespace MueLuExamples { - template - MueLu::ParameterListInterpreter makeFactory(Teuchos::ParameterList & paramList) { - std::string paramXML = MueLu::ML2MueLuParameterTranslator::translate(paramList, ""); - paramList = *Teuchos::getParametersFromXmlString(paramXML); - return MueLu::ParameterListInterpreter(paramList); - } - - +template +MueLu::ParameterListInterpreter makeFactory(Teuchos::ParameterList& paramList) { + std::string paramXML = MueLu::ML2MueLuParameterTranslator::translate(paramList, ""); + paramList = *Teuchos::getParametersFromXmlString(paramXML); + return MueLu::ParameterListInterpreter(paramList); +} #ifdef HAVE_MUELU_BELOS - template - void solve_system_hierarchy(Xpetra::UnderlyingLib& lib, - Teuchos::RCP>& A, - Teuchos::RCP>& X, - Teuchos::RCP>& B, - Teuchos::RCP>& H, - Teuchos::RCP& SList) { +template +void solve_system_hierarchy(Xpetra::UnderlyingLib& lib, + Teuchos::RCP>& A, + Teuchos::RCP>& X, + Teuchos::RCP>& B, + Teuchos::RCP>& H, + Teuchos::RCP& SList) { #include "MueLu_UseShortNames.hpp" - using Teuchos::rcp; - - typedef Xpetra::MultiVector MV; - typedef Belos::OperatorT OP; - - // Construct a Belos LinearProblem object - RCP belosOp = rcp(new Belos::XpetraOp(A)); - RCP belosPrec = rcp(new Belos::MueLuOp (H)); - - RCP > belosProblem = - rcp(new Belos::LinearProblem(belosOp, X, B)); - belosProblem->setRightPrec(belosPrec); - belosProblem->setProblem(X,B); - Belos::SolverFactory BelosFactory; - RCP > BelosSolver = - BelosFactory.create(std::string("CG"), SList); - BelosSolver->setProblem(belosProblem); - Belos::ReturnType result = BelosSolver->solve(); - TEUCHOS_TEST_FOR_EXCEPTION(result == Belos::Unconverged, std::runtime_error, "Belos failed to converge"); - } + using Teuchos::rcp; + + typedef Xpetra::MultiVector MV; + typedef Belos::OperatorT OP; + + // Construct a Belos LinearProblem object + RCP belosOp = rcp(new Belos::XpetraOp(A)); + RCP belosPrec = rcp(new Belos::MueLuOp(H)); + + RCP> belosProblem = + rcp(new Belos::LinearProblem(belosOp, X, B)); + belosProblem->setRightPrec(belosPrec); + belosProblem->setProblem(X, B); + Belos::SolverFactory BelosFactory; + RCP> BelosSolver = + BelosFactory.create(std::string("CG"), SList); + BelosSolver->setProblem(belosProblem); + Belos::ReturnType result = BelosSolver->solve(); + TEUCHOS_TEST_FOR_EXCEPTION(result == Belos::Unconverged, std::runtime_error, "Belos failed to converge"); +} - // -------------------------------------------------------------------------------------- - template - void solve_system_list(Xpetra::UnderlyingLib& lib, - Teuchos::RCP>& A, - Teuchos::RCP>& X, - Teuchos::RCP>& B, - Teuchos::ParameterList& MueLuList, - Teuchos::RCP& SList) { +// -------------------------------------------------------------------------------------- +template +void solve_system_list(Xpetra::UnderlyingLib& lib, + Teuchos::RCP>& A, + Teuchos::RCP>& X, + Teuchos::RCP>& B, + Teuchos::ParameterList& MueLuList, + Teuchos::RCP& SList) { #include "MueLu_UseShortNames.hpp" - using Teuchos::rcp; - - if(lib == Xpetra::UseEpetra) {MueLuList.set("use kokkos refactor", false);} - Teuchos::RCP > H = - MueLu::CreateXpetraPreconditioner(A, MueLuList); + using Teuchos::rcp; - typedef Xpetra::MultiVector MV; - typedef Belos::OperatorT OP; - - // Construct a Belos LinearProblem object - RCP belosOp = rcp(new Belos::XpetraOp(A)); - RCP belosPrec = rcp(new Belos::MueLuOp (H)); - - RCP > belosProblem = - rcp(new Belos::LinearProblem(belosOp, X, B)); - belosProblem->setRightPrec(belosPrec); - belosProblem->setProblem(X,B); - - Belos::SolverFactory BelosFactory; - Teuchos::RCP > BelosSolver = BelosFactory.create(std::string("CG"), SList); - BelosSolver->setProblem(belosProblem); - Belos::ReturnType result = BelosSolver->solve(); - TEUCHOS_TEST_FOR_EXCEPTION(result == Belos::Unconverged, std::runtime_error, "Belos failed to converge"); + if (lib == Xpetra::UseEpetra) { + MueLuList.set("use kokkos refactor", false); } + Teuchos::RCP> H = + MueLu::CreateXpetraPreconditioner(A, MueLuList); + + typedef Xpetra::MultiVector MV; + typedef Belos::OperatorT OP; + + // Construct a Belos LinearProblem object + RCP belosOp = rcp(new Belos::XpetraOp(A)); + RCP belosPrec = rcp(new Belos::MueLuOp(H)); + + RCP> belosProblem = + rcp(new Belos::LinearProblem(belosOp, X, B)); + belosProblem->setRightPrec(belosPrec); + belosProblem->setProblem(X, B); + + Belos::SolverFactory BelosFactory; + Teuchos::RCP> BelosSolver = BelosFactory.create(std::string("CG"), SList); + BelosSolver->setProblem(belosProblem); + Belos::ReturnType result = BelosSolver->solve(); + TEUCHOS_TEST_FOR_EXCEPTION(result == Belos::Unconverged, std::runtime_error, "Belos failed to converge"); +} #endif - - // -------------------------------------------------------------------------------------- - // This routine generate's the user's original A matrix and nullspace - template - void generate_user_matrix_and_nullspace(std::string& matrixType, - Xpetra::UnderlyingLib& lib, - Teuchos::ParameterList& galeriList, - Teuchos::RCP>& comm, - Teuchos::RCP>& A, - Teuchos::RCP>& nullspace) { +// -------------------------------------------------------------------------------------- +// This routine generate's the user's original A matrix and nullspace +template +void generate_user_matrix_and_nullspace(std::string& matrixType, + Xpetra::UnderlyingLib& lib, + Teuchos::ParameterList& galeriList, + Teuchos::RCP>& comm, + Teuchos::RCP>& A, + Teuchos::RCP>& nullspace) { #include "MueLu_UseShortNames.hpp" - RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& out = *fancy; + RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + Teuchos::FancyOStream& out = *fancy; - RCP map; - RCP coordinates; - if (matrixType == "Laplace1D") { - map = Galeri::Xpetra::CreateMap(lib, "Cartesian1D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", map, galeriList); + RCP map; + RCP coordinates; + if (matrixType == "Laplace1D") { + map = Galeri::Xpetra::CreateMap(lib, "Cartesian1D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", map, galeriList); - } else if (matrixType == "Laplace2D" || matrixType == "Star2D" || matrixType == "BigStar2D" || matrixType == "Elasticity2D") { - map = Galeri::Xpetra::CreateMap(lib, "Cartesian2D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", map, galeriList); + } else if (matrixType == "Laplace2D" || matrixType == "Star2D" || matrixType == "BigStar2D" || matrixType == "Elasticity2D") { + map = Galeri::Xpetra::CreateMap(lib, "Cartesian2D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", map, galeriList); - } else if (matrixType == "Laplace3D" || matrixType == "Brick3D" || matrixType == "Elasticity3D") { - map = Galeri::Xpetra::CreateMap(lib, "Cartesian3D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", map, galeriList); - } + } else if (matrixType == "Laplace3D" || matrixType == "Brick3D" || matrixType == "Elasticity3D") { + map = Galeri::Xpetra::CreateMap(lib, "Cartesian3D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", map, galeriList); + } - // Expand map to do multiple DOF per node for block problems - if (matrixType == "Elasticity2D" || matrixType == "Elasticity3D") - map = Xpetra::MapFactory::Build(map, (matrixType == "Elasticity2D" ? 2 : 3)); + // Expand map to do multiple DOF per node for block problems + if (matrixType == "Elasticity2D" || matrixType == "Elasticity3D") + map = Xpetra::MapFactory::Build(map, (matrixType == "Elasticity2D" ? 2 : 3)); - out << "Processor subdomains in x direction: " << galeriList.get("mx") << std::endl - << "Processor subdomains in y direction: " << galeriList.get("my") << std::endl - << "Processor subdomains in z direction: " << galeriList.get("mz") << std::endl - << "========================================================" << std::endl; + out << "Processor subdomains in x direction: " << galeriList.get("mx") << std::endl + << "Processor subdomains in y direction: " << galeriList.get("my") << std::endl + << "Processor subdomains in z direction: " << galeriList.get("mz") << std::endl + << "========================================================" << std::endl; - RCP > Pr = - Galeri::Xpetra::BuildProblem(matrixType, map, galeriList); + RCP> Pr = + Galeri::Xpetra::BuildProblem(matrixType, map, galeriList); - A = Pr->BuildMatrix(); + A = Pr->BuildMatrix(); - if (matrixType == "Elasticity2D" || matrixType == "Elasticity3D") { - nullspace = Pr->BuildNullspace(); - A->SetFixedBlockSize((matrixType == "Elasticity2D") ? 2 : 3); - } - else { - nullspace = Xpetra::MultiVectorFactory::Build(A->getRowMap(),1); - nullspace->putScalar(Teuchos::ScalarTraits::one()); - } + if (matrixType == "Elasticity2D" || matrixType == "Elasticity3D") { + nullspace = Pr->BuildNullspace(); + A->SetFixedBlockSize((matrixType == "Elasticity2D") ? 2 : 3); + } else { + nullspace = Xpetra::MultiVectorFactory::Build(A->getRowMap(), 1); + nullspace->putScalar(Teuchos::ScalarTraits::one()); } } - +} // namespace MueLuExamples // -------------------------------------------------------------------------------------- -template -int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int argc, char *argv[]) { +template +int main_(Teuchos::CommandLineProcessor& clp, Xpetra::UnderlyingLib lib, int argc, char* argv[]) { #include using Teuchos::TimeMonitor; @@ -235,10 +232,10 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg bool verbose = true; try { #if defined(HAVE_MUELU_SERIAL) && defined(HAVE_TPETRA_INST_INT_INT) - RCP > comm = Teuchos::DefaultComm::getComm(); + RCP> comm = Teuchos::DefaultComm::getComm(); RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& out = *fancy; + Teuchos::FancyOStream& out = *fancy; typedef Teuchos::ScalarTraits STS; @@ -246,28 +243,29 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Parameters initialization // ========================================================================= GO nx = 100, ny = 100, nz = 100; - Galeri::Xpetra::Parameters galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra + Galeri::Xpetra::Parameters galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } Teuchos::ParameterList galeriList = galeriParameters.GetParameterList(); - out << thickSeparator << std::endl << xpetraParameters << galeriParameters; + out << thickSeparator << std::endl + << xpetraParameters << galeriParameters; // ========================================================================= // Problem construction // ========================================================================= - RCP map; - RCP A,P,R, Ac; - RCP > nullspace; + RCP map; + RCP A, P, R, Ac; + RCP> nullspace; std::string matrixType = galeriParameters.GetMatrixType(); - MueLuExamples::generate_user_matrix_and_nullspace(matrixType,lib,galeriList,comm,A,nullspace); - map=A->getRowMap(); + MueLuExamples::generate_user_matrix_and_nullspace(matrixType, lib, galeriList, comm, A, nullspace); + map = A->getRowMap(); // ========================================================================= // Setups and solves @@ -280,54 +278,52 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg #ifdef HAVE_MUELU_BELOS // Belos Options - RCP SList = rcp(new Teuchos::ParameterList ); - SList->set("Verbosity",Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); - SList->set("Output Frequency",10); - SList->set("Output Style",Belos::Brief); - SList->set("Maximum Iterations",200); - SList->set("Convergence Tolerance",1e-10); + RCP SList = rcp(new Teuchos::ParameterList); + SList->set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); + SList->set("Output Frequency", 10); + SList->set("Output Style", Belos::Brief); + SList->set("Maximum Iterations", 200); + SList->set("Convergence Tolerance", 1e-10); #endif - // ========================================================================= // Solve #1 (standard MueLu) // ========================================================================= out << thickSeparator << std::endl; - out << prefSeparator << " Solve 1: Standard "<< prefSeparator < mueLuFactory(MueList); - RCP H = mueLuFactory.CreateHierarchy(); + MueLu::ParameterListInterpreter mueLuFactory(MueList); + RCP H = mueLuFactory.CreateHierarchy(); Teuchos::RCP LevelFactory = mueLuFactory.GetFactoryManager(1); H->setlib(lib); H->AddNewLevel(); - H->GetLevel(1)->Keep("Nullspace",LevelFactory->GetFactory("Nullspace").get()); + H->GetLevel(1)->Keep("Nullspace", LevelFactory->GetFactory("Nullspace").get()); H->GetLevel(0)->Set("A", A); H->GetLevel(0)->Set("Nullspace", nullspace); mueLuFactory.SetupHierarchy(*H); - #ifdef HAVE_MUELU_BELOS // Solve - MueLuExamples::solve_system_hierarchy(lib,A,X,B,H,SList); + MueLuExamples::solve_system_hierarchy(lib, A, X, B, H, SList); #endif // Extract R,P & Ac for LevelWrap Usage - H->GetLevel(1)->Get("R",R); - H->GetLevel(1)->Get("P",P); - H->GetLevel(1)->Get("A",Ac); + H->GetLevel(1)->Get("R", R); + H->GetLevel(1)->Get("P", P); + H->GetLevel(1)->Get("A", Ac); // extract coarse level null space from level 1 that we have to inject for the next runs... - nullspace = H->GetLevel(1)->template Get >("Nullspace",LevelFactory->GetFactory("Nullspace").get()); + nullspace = H->GetLevel(1)->template Get>("Nullspace", LevelFactory->GetFactory("Nullspace").get()); } out << thickSeparator << std::endl; @@ -335,22 +331,22 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Solve #2 (level wrap, the long way, using pre-done Ac) // ========================================================================= out << thickSeparator << std::endl; - out << prefSeparator << " Solve 2: LevelWrap, Long Way, P, R, Ac "<< prefSeparator < mueLuFactory = MueLuExamples::makeFactory(MLList); + MueLu::ParameterListInterpreter mueLuFactory = MueLuExamples::makeFactory(MLList); mueLuFactory.AddFactoryManager(1, 1, Teuchos::rcpFromRef(M1)); RCP H = mueLuFactory.CreateHierarchy(); H->setlib(lib); @@ -363,33 +359,30 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg mueLuFactory.SetupHierarchy(*H); #ifdef HAVE_MUELU_BELOS - MueLuExamples::solve_system_hierarchy(lib,A,X,B,H,SList); + MueLuExamples::solve_system_hierarchy(lib, A, X, B, H, SList); #endif - } out << thickSeparator << std::endl; - // ========================================================================= // Solve #3 (level wrap, the long way, using P, R and nullspace) // ========================================================================= out << thickSeparator << std::endl; - out << prefSeparator << " Solve 3: LevelWrap, Long Way, P, R "<< prefSeparator < mueLuFactory = MueLuExamples::makeFactory(MLList); + MueLu::ParameterListInterpreter mueLuFactory = MueLuExamples::makeFactory(MLList); mueLuFactory.AddFactoryManager(1, 1, Teuchos::rcpFromRef(M1)); RCP H = mueLuFactory.CreateHierarchy(); H->setlib(lib); @@ -400,9 +393,8 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg H->GetLevel(1)->Set("Nullspace", nullspace); mueLuFactory.SetupHierarchy(*H); #ifdef HAVE_MUELU_BELOS - MueLuExamples::solve_system_hierarchy(lib,A,X,B,H,SList); + MueLuExamples::solve_system_hierarchy(lib, A, X, B, H, SList); #endif - } out << thickSeparator << std::endl; @@ -410,19 +402,19 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Solve #4 (level wrap, the fast way, everything) // ========================================================================= out << thickSeparator << std::endl; - out << prefSeparator << " Solve 4: LevelWrap, Fast Way, P, R, Ac "<< prefSeparator < Values(2); - Values[0] = -1.0; Values[1] = -1.0; + Values[0] = -1.0; + Values[1] = -1.0; std::vector Indices(2); double two = 2.0; int NumEntries; - for (i=0; i // for Sleep +#include // for Sleep #endif - /* This driver simply generates a Tpetra matrix, prints it to screen, and exits. @@ -79,16 +78,15 @@ Use the "--help" option to get verbose help. */ -int main(int argc, char** argv) -{ +int main(int argc, char** argv) { using Teuchos::RCP; - typedef typename Tpetra::Map<>::local_ordinal_type LO; // LocalOrdinal - typedef typename Tpetra::Map<>::global_ordinal_type GO; // GlobalOrdinal - typedef MueLu::DefaultScalar SC; + typedef typename Tpetra::Map<>::local_ordinal_type LO; // LocalOrdinal + typedef typename Tpetra::Map<>::global_ordinal_type GO; // GlobalOrdinal + typedef MueLu::DefaultScalar SC; Teuchos::oblackholestream blackhole; - Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole); + Teuchos::GlobalMPISession mpiSession(&argc, &argv, &blackhole); bool success = false; bool verbose = true; @@ -101,13 +99,13 @@ int main(int argc, char** argv) // Note: use --help to list available options. Teuchos::CommandLineProcessor clp(false); - Galeri::Xpetra::Parameters matrixParameters(clp); // manage parameters of the test case + Galeri::Xpetra::Parameters matrixParameters(clp); // manage parameters of the test case - switch (clp.parse(argc,argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + switch (clp.parse(argc, argv)) { + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } matrixParameters.check(); @@ -116,11 +114,10 @@ int main(int argc, char** argv) /**********************************************************************************/ /* CREATE INITAL MATRIX */ /**********************************************************************************/ - RCP > map = Teuchos::rcp( new Tpetra::Map(matrixParameters.GetNumGlobalElements(), 0, comm) ); - RCP,Tpetra::CrsMatrix,Tpetra::MultiVector > > problem = - Galeri::Xpetra::BuildProblem, Tpetra::CrsMatrix, Tpetra::MultiVector > - (matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList()); - RCP > A = problem->BuildMatrix(); + RCP > map = Teuchos::rcp(new Tpetra::Map(matrixParameters.GetNumGlobalElements(), 0, comm)); + RCP, Tpetra::CrsMatrix, Tpetra::MultiVector > > problem = + Galeri::Xpetra::BuildProblem, Tpetra::CrsMatrix, Tpetra::MultiVector >(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList()); + RCP > A = problem->BuildMatrix(); /**********************************************************************************/ /* */ @@ -128,7 +125,8 @@ int main(int argc, char** argv) RCP out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); if (comm->getRank() == 0) - std::cout << "\n================ MAP =====================================================\n" << std::endl; + std::cout << "\n================ MAP =====================================================\n" + << std::endl; map->describe(*out, Teuchos::VERB_EXTREME); comm->barrier(); #ifdef _MSC_VER @@ -137,12 +135,13 @@ int main(int argc, char** argv) sleep(1); #endif if (comm->getRank() == 0) - std::cout << "\n================ MATRIX ==================================================\n" << std::endl; + std::cout << "\n================ MATRIX ==================================================\n" + << std::endl; A->describe(*out, Teuchos::VERB_EXTREME); success = true; } TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); - return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); + return (success ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/packages/muelu/example/advanced/memory/MueLu_MemoryProfiler.cpp b/packages/muelu/example/advanced/memory/MueLu_MemoryProfiler.cpp index 63d1f6dc93c7..efc50b5632c3 100644 --- a/packages/muelu/example/advanced/memory/MueLu_MemoryProfiler.cpp +++ b/packages/muelu/example/advanced/memory/MueLu_MemoryProfiler.cpp @@ -53,35 +53,34 @@ #ifdef HAVE_MUELU_GOOGLE_PERFTOOLS //#include -#include // TODO :-) +#include // TODO :-) #endif #include std::string GetMemoryUsage() { - std::ostringstream mem; #ifdef HAVE_MUELU_PROC_SELF_STATUS - //TODO: test if /proc/self/status exist on the system instead of #ifdef + // TODO: test if /proc/self/status exist on the system instead of #ifdef std::ifstream proc("/proc/self/status"); std::string s; - while(getline(proc, s), !proc.fail()) { - if(s.substr(0, 6) == "VmSize") { + while (getline(proc, s), !proc.fail()) { + if (s.substr(0, 6) == "VmSize") { mem << s; return mem.str(); } } -#endif // HAVE_MUELU_PROC_SELF_STATUS +#endif // HAVE_MUELU_PROC_SELF_STATUS return mem.str(); } void MemoryUsageStart(const std::string& autoLogPrefix) { #ifdef HAVE_MUELU_GOOGLE_PERFTOOLS - HeapProfilerStart("auto-profiling"); + HeapProfilerStart("auto-profiling"); #endif } @@ -98,18 +97,20 @@ void PrintMemoryUsage(const std::string& description, const std::string& filenam #ifdef HAVE_MUELU_GOOGLE_PERFTOOLS if (IsHeapProfilerRunning()) { - char* profile = GetHeapProfile(); std::istringstream iss(profile); std::string sub; - iss >> sub; iss >> sub; iss >> sub; // skip 3 first substring iss >> sub; - double MB = atof(sub.c_str()) / (1024*1024); + iss >> sub; + iss >> sub; // skip 3 first substring + iss >> sub; + double MB = atof(sub.c_str()) / (1024 * 1024); // print if (description != "") { - std::ostringstream sname; sname.precision(1); + std::ostringstream sname; + sname.precision(1); sname << description << ": " << std::fixed << MB << " MB"; std::cout << sname.str() << std::endl; } @@ -117,7 +118,10 @@ void PrintMemoryUsage(const std::string& description, const std::string& filenam // dump to file if (filename != "") { std::ofstream out(filename.c_str(), std::ios::out | std::ios::binary); - if(!out) { std::cout << "Cannot open output file: " << filename << std::endl; return; } + if (!out) { + std::cout << "Cannot open output file: " << filename << std::endl; + return; + } out.write(profile, strlen(profile)); out.close(); @@ -126,8 +130,6 @@ void PrintMemoryUsage(const std::string& description, const std::string& filenam } free(profile); - } #endif - } diff --git a/packages/muelu/example/advanced/memory/MueLu_MemoryProfiler.hpp b/packages/muelu/example/advanced/memory/MueLu_MemoryProfiler.hpp index b180ae5cb656..6ddf8fad40c9 100644 --- a/packages/muelu/example/advanced/memory/MueLu_MemoryProfiler.hpp +++ b/packages/muelu/example/advanced/memory/MueLu_MemoryProfiler.hpp @@ -54,5 +54,4 @@ void PrintMemoryUsage(const std::string& description = "Memory Usage:", const st void MemoryUsageStart(const std::string& autoLogPrefix = "memorylog-"); void MemoryUsageStop(); -#endif // MUELU_MEMORY_PROFILER_HPP - +#endif // MUELU_MEMORY_PROFILER_HPP diff --git a/packages/muelu/example/advanced/memory/Tpetra1DLaplace.cpp b/packages/muelu/example/advanced/memory/Tpetra1DLaplace.cpp index d356b0c3e991..e893655e348f 100644 --- a/packages/muelu/example/advanced/memory/Tpetra1DLaplace.cpp +++ b/packages/muelu/example/advanced/memory/Tpetra1DLaplace.cpp @@ -57,72 +57,72 @@ #include "MueLu_MemoryProfiler.hpp" int main(int argc, char *argv[]) { - Tpetra::ScopeGuard mpiSession(&argc,&argv); + Tpetra::ScopeGuard mpiSession(&argc, &argv); bool success = false; bool verbose = true; try { - typedef Tpetra::CrsMatrix<>::scalar_type Scalar; - typedef Tpetra::Map<>::local_ordinal_type LO; + typedef Tpetra::CrsMatrix<>::scalar_type Scalar; + typedef Tpetra::Map<>::local_ordinal_type LO; #if defined(HAVE_TPETRA_INST_INT_INT) // mfh 07 Aug 2015: Prefer GO = int, for consistency with Epetra, // but use the default GO type if GO = int is not enabled. - typedef int GO; + typedef int GO; #else - typedef Tpetra::Map::global_ordinal_type GO; -#endif // HAVE_TPETRA_INT_INT - typedef Tpetra::Map::node_type Node; - typedef Tpetra::Map Map; + typedef Tpetra::Map::global_ordinal_type GO; +#endif // HAVE_TPETRA_INT_INT + typedef Tpetra::Map::node_type Node; + typedef Tpetra::Map Map; typedef Tpetra::CrsMatrix CrsMatrix; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::tuple; RCP > comm = Tpetra::getDefaultComm(); - //const int myRank = comm->getRank(); + // const int myRank = comm->getRank(); - //int numGlobalElements = 10000000; + // int numGlobalElements = 10000000; int numGlobalElements = 100; - Teuchos::CommandLineProcessor cmdp(false,true); - cmdp.setOption("numGlobalElements",&numGlobalElements,"Global problem size."); - if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { + Teuchos::CommandLineProcessor cmdp(false, true); + cmdp.setOption("numGlobalElements", &numGlobalElements, "Global problem size."); + if (cmdp.parse(argc, argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { return -1; } const GO indexBase = 0; RCP map = - rcp (new Map (static_cast (numGlobalElements), + rcp(new Map(static_cast(numGlobalElements), indexBase, comm)); - const size_t numMyElements = map->getLocalNumElements(); + const size_t numMyElements = map->getLocalNumElements(); Teuchos::ArrayView myGlobalElements = map->getLocalElementList(); MemoryUsageStart("Tpetra"); PrintMemoryUsage("Initial memory usage", "tpetra-init.heap"); - RCP A = Tpetra::createCrsMatrix(map,3); + RCP A = Tpetra::createCrsMatrix(map, 3); PrintMemoryUsage("Memory after CrsMatrix constructor", "tpetra-after-ctor.heap"); - for (size_t i=0; iinsertGlobalValues( myGlobalElements[i], - tuple( myGlobalElements[i], myGlobalElements[i]+1 ), - tuple ( 2.0, -1.0 ) ); - } else if (myGlobalElements[i] == numGlobalElements-1) { - A->insertGlobalValues( myGlobalElements[i], - tuple( myGlobalElements[i]-1, myGlobalElements[i] ), - tuple ( -1.0, 2.0 ) ); + A->insertGlobalValues(myGlobalElements[i], + tuple(myGlobalElements[i], myGlobalElements[i] + 1), + tuple(2.0, -1.0)); + } else if (myGlobalElements[i] == numGlobalElements - 1) { + A->insertGlobalValues(myGlobalElements[i], + tuple(myGlobalElements[i] - 1, myGlobalElements[i]), + tuple(-1.0, 2.0)); } else { - A->insertGlobalValues( myGlobalElements[i], - tuple( myGlobalElements[i]-1, myGlobalElements[i], myGlobalElements[i]+1 ), - tuple ( -1.0, 2.0, -1.0 ) ); + A->insertGlobalValues(myGlobalElements[i], + tuple(myGlobalElements[i] - 1, myGlobalElements[i], myGlobalElements[i] + 1), + tuple(-1.0, 2.0, -1.0)); } } PrintMemoryUsage("Memory after insertGlobalValues()", "tpetra-after-insert.heap"); - A->fillComplete(); // DoOptimizeStorage by default + A->fillComplete(); // DoOptimizeStorage by default PrintMemoryUsage("Memory after fillComplete()", "tpetra-after-fillcomplete.heap"); @@ -132,5 +132,5 @@ int main(int argc, char *argv[]) { } TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); - return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); + return (success ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/packages/muelu/example/advanced/multiplesolve/FixedMatrixPattern.cpp b/packages/muelu/example/advanced/multiplesolve/FixedMatrixPattern.cpp index d6cda610af83..9bae72e25a28 100644 --- a/packages/muelu/example/advanced/multiplesolve/FixedMatrixPattern.cpp +++ b/packages/muelu/example/advanced/multiplesolve/FixedMatrixPattern.cpp @@ -68,7 +68,7 @@ // // The resulting preconditioners are identical to multigrid preconditioners built without recycling the parts described above. // This can be verified by using the --no-recycling option. -template +template int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int argc, char *argv[]) { #include @@ -79,7 +79,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg bool success = false; bool verbose = true; try { - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); + RCP > comm = Teuchos::DefaultComm::getComm(); // // Parameters // @@ -87,7 +87,8 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg Galeri::Xpetra::Parameters matrixParameters(clp, 8748); Xpetra::Parameters xpetraParameters(clp); - bool optRecycling = true; clp.setOption("recycling", "no-recycling", &optRecycling, "Enable recycling of the multigrid preconditioner"); + bool optRecycling = true; + clp.setOption("recycling", "no-recycling", &optRecycling, "Enable recycling of the multigrid preconditioner"); /* DO NOT WORK YET bool optRecyclingRAPpattern = true; clp.setOption("recycling-rap-pattern", "no-recycling-rap-pattern", &optRecyclingRAPpattern, "Enable recycling of Ac=RAP pattern"); @@ -97,10 +98,10 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg bool optRecyclingAPpattern = false; switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } // option dependencies @@ -114,11 +115,11 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // RCP map = MapFactory::Build(lib, matrixParameters.GetNumGlobalElements(), 0, comm); - Teuchos::RCP > Pr = - Galeri::Xpetra::BuildProblem(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList()); + Teuchos::RCP > Pr = + Galeri::Xpetra::BuildProblem(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList()); RCP A1 = Pr->BuildMatrix(); - RCP A2 = Pr->BuildMatrix(); // TODO: generate another problem would be more meaningful (ex: scale A1) + RCP A2 = Pr->BuildMatrix(); // TODO: generate another problem would be more meaningful (ex: scale A1) // // First solve @@ -142,7 +143,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // PTENT: RCP PtentFact = rcp(new TentativePFactory()); M.SetFactory("Ptent", PtentFact); - H.Keep("P", PtentFact.get()); + H.Keep("P", PtentFact.get()); } RCP AcFact = rcp(new RAPFactory()); @@ -162,8 +163,9 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg RCP X = VectorFactory::Build(map); RCP B = VectorFactory::Build(map); - X->putScalar((Scalar) 0.0); - B->setSeed(846930886); B->randomize(); + X->putScalar((Scalar)0.0); + B->setSeed(846930886); + B->randomize(); int nIts = 9; H.Iterate(*B, *X, nIts); @@ -197,8 +199,9 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg RCP X = VectorFactory::Build(map); RCP B = VectorFactory::Build(map); - X->putScalar((Scalar) 0.0); - B->setSeed(846930886); B->randomize(); + X->putScalar((Scalar)0.0); + B->setSeed(846930886); + B->randomize(); int nIts = 9; H.Iterate(*B, *X, nIts); @@ -215,8 +218,8 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Remove kept data from the preconditioner. This will force recomputation on future runs. "Keep" flags are also removed. if (optRecycling) { - //if aggregates explicitly kept: H.Delete("Aggregates", M.GetFactory("Aggregates").get()); - H.Delete("P", M.GetFactory("Ptent").get()); + // if aggregates explicitly kept: H.Delete("Aggregates", M.GetFactory("Aggregates").get()); + H.Delete("P", M.GetFactory("Ptent").get()); } if (optRecyclingRAPpattern) { H.Delete("RAP graph", M.GetFactory("A").get()); @@ -232,16 +235,13 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg } TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); - return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); + return (success ? EXIT_SUCCESS : EXIT_FAILURE); } - //- -- -------------------------------------------------------- #define MUELU_AUTOMATIC_TEST_ETI_NAME main_ #include "MueLu_Test_ETI.hpp" int main(int argc, char *argv[]) { - return Automatic_Test_ETI(argc,argv); + return Automatic_Test_ETI(argc, argv); } - - diff --git a/packages/muelu/example/advanced/multiplesolve/ReuseSequence.cpp b/packages/muelu/example/advanced/multiplesolve/ReuseSequence.cpp index 7d3570de6d04..784315ede482 100644 --- a/packages/muelu/example/advanced/multiplesolve/ReuseSequence.cpp +++ b/packages/muelu/example/advanced/multiplesolve/ReuseSequence.cpp @@ -74,8 +74,8 @@ #include #include #include -#include // => This header defines Belos::XpetraOp -#include // => This header defines Belos::MueLuOp +#include // => This header defines Belos::XpetraOp +#include // => This header defines Belos::MueLuOp #endif #include @@ -95,18 +95,21 @@ // The resulting preconditioners are identical to multigrid preconditioners built without recycling the parts described above. // This can be verified by using the --no-recycling option. - -template +template class Tensor { -private: - typedef Scalar SC; + private: + typedef Scalar SC; typedef Teuchos::ScalarTraits STS; -public: - Tensor() : useSigmaRTC_(false), is3D_(true) { } + public: + Tensor() + : useSigmaRTC_(false) + , is3D_(true) {} #ifdef HAVE_MUELU_PAMGEN - Tensor(const std::string& rtcString, bool is3D = true) : useSigmaRTC_(true), is3D_(is3D) { + Tensor(const std::string& rtcString, bool is3D = true) + : useSigmaRTC_(true) + , is3D_(is3D) { sigmaRTC_ = Teuchos::rcp(new PG_RuntimeCompiler::Function); std::string variableType; if (TYPE_EQUAL(Scalar, float) || TYPE_EQUAL(Scalar, std::complex)) @@ -114,17 +117,17 @@ class Tensor { else variableType = "double"; - if (!sigmaRTC_->addVar(variableType, "x")) throw std::runtime_error("Error setting RTC input argument \"x\""); - if (!sigmaRTC_->addVar(variableType, "y")) throw std::runtime_error("Error setting RTC input argument \"y\""); + if (!sigmaRTC_->addVar(variableType, "x")) throw std::runtime_error("Error setting RTC input argument \"x\""); + if (!sigmaRTC_->addVar(variableType, "y")) throw std::runtime_error("Error setting RTC input argument \"y\""); if (is3D_ && - !sigmaRTC_->addVar(variableType, "z")) throw std::runtime_error("Error setting RTC input argument \"z\""); - if (!sigmaRTC_->addVar(variableType, "t")) throw std::runtime_error("Error setting RTC input argument \"t\""); - if (!sigmaRTC_->addVar(variableType, "sigmax")) throw std::runtime_error("Error setting RTC input argument \"sigmax\""); - if (!sigmaRTC_->addVar(variableType, "sigmay")) throw std::runtime_error("Error setting RTC input argument \"sigmay\""); + !sigmaRTC_->addVar(variableType, "z")) throw std::runtime_error("Error setting RTC input argument \"z\""); + if (!sigmaRTC_->addVar(variableType, "t")) throw std::runtime_error("Error setting RTC input argument \"t\""); + if (!sigmaRTC_->addVar(variableType, "sigmax")) throw std::runtime_error("Error setting RTC input argument \"sigmax\""); + if (!sigmaRTC_->addVar(variableType, "sigmay")) throw std::runtime_error("Error setting RTC input argument \"sigmay\""); if (is3D_ && - !sigmaRTC_->addVar(variableType, "sigmaz")) throw std::runtime_error("Error setting RTC input argument \"sigmaz\""); + !sigmaRTC_->addVar(variableType, "sigmaz")) throw std::runtime_error("Error setting RTC input argument \"sigmaz\""); - if (!sigmaRTC_->addBody(rtcString)) throw std::runtime_error("Error in RTC function compilation"); + if (!sigmaRTC_->addBody(rtcString)) throw std::runtime_error("Error in RTC function compilation"); } #endif @@ -143,11 +146,11 @@ class Tensor { is3D_ = tensor.is3D_; t_ = tensor.t_; #ifdef HAVE_MUELU_PAMGEN - sigmaRTC_ = tensor.sigmaRTC_; + sigmaRTC_ = tensor.sigmaRTC_; #endif } -private: + private: SC tensorDefault(char c, SC x, SC y, SC z) const { // isotropic tensor return STS::one(); @@ -158,15 +161,15 @@ class Tensor { SC sigmax, sigmay, sigmaz; int cnt = 0; - if (!sigmaRTC_->varValueFill(cnt++, x)) throw std::runtime_error("Could not fill \"x\""); - if (!sigmaRTC_->varValueFill(cnt++, y)) throw std::runtime_error("Could not fill \"y\""); + if (!sigmaRTC_->varValueFill(cnt++, x)) throw std::runtime_error("Could not fill \"x\""); + if (!sigmaRTC_->varValueFill(cnt++, y)) throw std::runtime_error("Could not fill \"y\""); if (is3D_ && - !sigmaRTC_->varValueFill(cnt++, z)) throw std::runtime_error("Could not fill \"z\""); - if (!sigmaRTC_->varValueFill(cnt++, t_)) throw std::runtime_error("Could not fill \"t\""); - if (!sigmaRTC_->varAddrFill (cnt++, &sigmax)) throw std::runtime_error("Could not fill \"sigmax\""); - if (!sigmaRTC_->varAddrFill (cnt++, &sigmay)) throw std::runtime_error("Could not fill \"sigmay\""); + !sigmaRTC_->varValueFill(cnt++, z)) throw std::runtime_error("Could not fill \"z\""); + if (!sigmaRTC_->varValueFill(cnt++, t_)) throw std::runtime_error("Could not fill \"t\""); + if (!sigmaRTC_->varAddrFill(cnt++, &sigmax)) throw std::runtime_error("Could not fill \"sigmax\""); + if (!sigmaRTC_->varAddrFill(cnt++, &sigmay)) throw std::runtime_error("Could not fill \"sigmay\""); if (is3D_ && - !sigmaRTC_->varAddrFill (cnt++, &sigmaz)) throw std::runtime_error("Could not fill \"sigmaz\""); + !sigmaRTC_->varAddrFill(cnt++, &sigmaz)) throw std::runtime_error("Could not fill \"sigmaz\""); sigmaRTC_->execute(); @@ -179,60 +182,60 @@ class Tensor { } #endif -private: - bool useSigmaRTC_; - bool is3D_; + private: + bool useSigmaRTC_; + bool is3D_; double t_; #ifdef HAVE_MUELU_PAMGEN - mutable - Teuchos::RCP sigmaRTC_; + mutable Teuchos::RCP sigmaRTC_; #endif }; -template +template Teuchos::RCP BuildMatrix(bool is3D, const Tensor::magnitudeType>& tensor, Teuchos::ParameterList& list, const Teuchos::RCP& map, const Teuchos::RCP& coords) { typedef GlobalOrdinal GO; - typedef LocalOrdinal LO; - typedef Scalar SC; - using Teuchos::ArrayView; + typedef LocalOrdinal LO; + typedef Scalar SC; using Teuchos::ArrayRCP; + using Teuchos::ArrayView; typedef typename MultiVector::scalar_type Real; - GO nx = list.get("nx", (GO) -1); - GO ny = list.get("ny", (GO) -1); + GO nx = list.get("nx", (GO)-1); + GO ny = list.get("ny", (GO)-1); GO nz = -1; if (is3D) { // 3D - nz = list.get("nz", (GO) -1); + nz = list.get("nz", (GO)-1); if (nx == -1 || ny == -1 || nz == -1) { GO n = map->getGlobalNumElements(); - nx = (GO) Teuchos::ScalarTraits::pow(n, 0.33334); - ny = nx; nz = nx; + nx = (GO)Teuchos::ScalarTraits::pow(n, 0.33334); + ny = nx; + nz = nx; TEUCHOS_TEST_FOR_EXCEPTION(nx * ny * nz != n, std::logic_error, "You need to specify nx, ny, and nz"); } } else { // 2D if (nx == -1 || ny == -1) { GO n = map->getGlobalNumElements(); - nx = (GO) Teuchos::ScalarTraits::pow(n, 0.5); - ny = nx; + nx = (GO)Teuchos::ScalarTraits::pow(n, 0.5); + ny = nx; TEUCHOS_TEST_FOR_EXCEPTION(nx * ny != n, std::logic_error, "You need to specify nx, ny, and nz"); } } - double one = 1.0; - SC stretchx = list.get("stretchx", one); - SC stretchy = list.get("stretchy", one); - SC stretchz = list.get("stretchz", one); + double one = 1.0; + SC stretchx = list.get("stretchx", one); + SC stretchy = list.get("stretchy", one); + SC stretchz = list.get("stretchz", one); // bool keepBCs = list.get("keepBCs", false); LO nnz = (is3D ? 7 : 5); - Teuchos::RCP A = Galeri::Xpetra::MatrixTraits::Build(map, nnz); + Teuchos::RCP A = Galeri::Xpetra::MatrixTraits::Build(map, nnz); LO numMyElements = map->getLocalNumElements(); GO indexBase = map->getIndexBase(); @@ -262,24 +265,42 @@ Teuchos::RCP BuildMatrix(bool is3D, const Tensor BuildMatrix(bool is3D, const Tensor BuildMatrix(bool is3D, const Tensor +template void ConstructData(bool is3D, const Tensor::magnitudeType>& tensor, const std::string& matrixType, Teuchos::ParameterList& galeriList, - Xpetra::UnderlyingLib lib, Teuchos::RCP >& comm, - Teuchos::RCP >& A, - Teuchos::RCP >& map, - Teuchos::RCP::magnitudeType,LocalOrdinal,GlobalOrdinal,Node> >& coordinates, - Teuchos::RCP >& nullspace) { + Xpetra::UnderlyingLib lib, Teuchos::RCP>& comm, + Teuchos::RCP>& A, + Teuchos::RCP>& map, + Teuchos::RCP::magnitudeType, LocalOrdinal, GlobalOrdinal, Node>>& coordinates, + Teuchos::RCP>& nullspace) { #include + using Teuchos::ArrayRCP; using Teuchos::RCP; using Teuchos::rcp; - using Teuchos::ArrayRCP; using Teuchos::TimeMonitor; typedef typename Teuchos::ScalarTraits::magnitudeType real_type; - typedef typename Xpetra::MultiVector RealValuedMultiVector; - + typedef typename Xpetra::MultiVector RealValuedMultiVector; if (is3D) { // 3D map = Galeri::Xpetra::CreateMap(lib, "Cartesian3D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", map, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", map, galeriList); } else { // 2D map = Galeri::Xpetra::CreateMap(lib, "Cartesian2D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", map, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", map, galeriList); } - A = BuildMatrix(is3D, tensor, galeriList, map, coordinates); + A = BuildMatrix(is3D, tensor, galeriList, map, coordinates); } -template -int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int argc, char *argv[]) { +template +int main_(Teuchos::CommandLineProcessor& clp, Xpetra::UnderlyingLib& lib, int argc, char* argv[]) { #include + using Teuchos::ArrayRCP; using Teuchos::RCP; using Teuchos::rcp; - using Teuchos::ArrayRCP; using Teuchos::TimeMonitor; using namespace std::chrono; // ========================================================================= // MPI initialization using Teuchos // ========================================================================= - RCP > comm = Teuchos::DefaultComm::getComm(); + RCP> comm = Teuchos::DefaultComm::getComm(); // ========================================================================= // Convenient definitions @@ -384,28 +416,33 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int a SC one = STS::one(), zero = STS::zero(); RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& out = *fancy; + Teuchos::FancyOStream& out = *fancy; out.setOutputToRootOnly(0); // ========================================================================= // Parameters initialization // ========================================================================= GO nx = 20, ny = 20, nz = 20; - Galeri::Xpetra::Parameters galeriParameters(clp, nx, ny, nz, "Laplace3D"); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra - - std::string xmlFileName = "reuse_seq.xml"; clp.setOption("xml", &xmlFileName, "read parameters from a file"); - std::string solveType = "cg"; clp.setOption("solver", &solveType, "solve type: (none | cg | standalone)"); - typename Teuchos::ScalarTraits::magnitudeType tol = 1e-6; clp.setOption("tol", &tol, "solver convergence tolerance"); - int maxIts = 200; clp.setOption("its", &maxIts, "maximum number of solver iterations"); - int dim = 3; clp.setOption("dim", &dim, "space dimension"); + Galeri::Xpetra::Parameters galeriParameters(clp, nx, ny, nz, "Laplace3D"); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra + + std::string xmlFileName = "reuse_seq.xml"; + clp.setOption("xml", &xmlFileName, "read parameters from a file"); + std::string solveType = "cg"; + clp.setOption("solver", &solveType, "solve type: (none | cg | standalone)"); + typename Teuchos::ScalarTraits::magnitudeType tol = 1e-6; + clp.setOption("tol", &tol, "solver convergence tolerance"); + int maxIts = 200; + clp.setOption("its", &maxIts, "maximum number of solver iterations"); + int dim = 3; + clp.setOption("dim", &dim, "space dimension"); clp.recogniseAllOptions(true); switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } bool is3D = (dim == 3); @@ -418,7 +455,8 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int a // Problem construction // ========================================================================= // For comments, see Driver.cpp - out << "========================================================\n" << xpetraParameters << galeriParameters; + out << "========================================================\n" + << xpetraParameters << galeriParameters; std::string matrixType = galeriParameters.GetMatrixType(); out << "Processor subdomains in x direction: " << galeriList.get("mx") << std::endl @@ -438,7 +476,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int a Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, Teuchos::Ptr(¶mList), *comm); typedef typename Teuchos::ScalarTraits::magnitudeType real_type; - typedef typename Xpetra::MultiVector RealValuedMultiVector; + typedef typename Xpetra::MultiVector RealValuedMultiVector; Tensor tensor; if (paramList.isParameter("sigma")) { std::string sigmaString = paramList.get("sigma"); @@ -447,38 +485,44 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int a out << "Switching to RTC" << std::endl; tensor = Tensor(sigmaString, is3D); #else - (void)sigmaString; // fix compiler warning + (void)sigmaString; // fix compiler warning #endif } - out << "Parameter list:" << std::endl << paramList << std::endl; + out << "Parameter list:" << std::endl + << paramList << std::endl; // ========================================================================= // The LOOP // ========================================================================= std::vector reuseTypes, reuseNames; - reuseTypes.push_back("none"); reuseNames.push_back("none"); - reuseTypes.push_back("S"); reuseNames.push_back("smoothers"); - reuseTypes.push_back("tP"); reuseNames.push_back("tentative P"); - reuseTypes.push_back("RP"); reuseNames.push_back("smoothed P and R"); - reuseTypes.push_back("RAP"); reuseNames.push_back("coarse grids"); + reuseTypes.push_back("none"); + reuseNames.push_back("none"); + reuseTypes.push_back("S"); + reuseNames.push_back("smoothers"); + reuseTypes.push_back("tP"); + reuseNames.push_back("tentative P"); + reuseTypes.push_back("RP"); + reuseNames.push_back("smoothed P and R"); + reuseTypes.push_back("RAP"); + reuseNames.push_back("coarse grids"); const size_t numSteps = 8; high_resolution_clock::time_point tc; - std::vector> setup_time(reuseTypes.size()*numSteps); - std::vector> solve_time(reuseTypes.size()*numSteps); - std::vector num_its (reuseTypes.size()*numSteps); + std::vector> setup_time(reuseTypes.size() * numSteps); + std::vector> solve_time(reuseTypes.size() * numSteps); + std::vector num_its(reuseTypes.size() * numSteps); for (size_t k = 0; k < reuseTypes.size(); k++) { out << thickSeparator << " " << reuseTypes[k] << " " << thickSeparator << std::endl; paramList.set("reuse: type", reuseTypes[k]); - RCP A; - RCP map; + RCP A; + RCP map; RCP coordinates; - RCP nullspace; + RCP nullspace; tensor.setT(0); @@ -492,7 +536,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int a A->apply(*X, *B); Teuchos::ParameterList userParamList = paramList.sublist("user data"); - userParamList.set >("Coordinates", coordinates); + userParamList.set>("Coordinates", coordinates); RCP H = MueLu::CreateXpetraPreconditioner(A, paramList); for (size_t t = 1; t < numSteps; t++) { @@ -508,7 +552,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int a H = MueLu::CreateXpetraPreconditioner(A, paramList); else MueLu::ReuseXpetraPreconditioner(A, H); - setup_time[k*numSteps + t] = duration_cast>(high_resolution_clock::now() - tc); + setup_time[k * numSteps + t] = duration_cast>(high_resolution_clock::now() - tc); X->putScalar(zero); @@ -520,22 +564,22 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int a tc = high_resolution_clock::now(); H->Iterate(*B, *X, tol); - solve_time[k*numSteps + t] = duration_cast>(high_resolution_clock::now() - tc); + solve_time[k * numSteps + t] = duration_cast>(high_resolution_clock::now() - tc); } else if (solveType == "cg" || solveType == "gmres") { H->IsPreconditioner(true); #ifdef HAVE_MUELU_BELOS // Operator and Multivector type that will be used with Belos - typedef MultiVector MV; + typedef MultiVector MV; typedef Belos::OperatorT OP; // Define Operator and Preconditioner - Teuchos::RCP belosOp = Teuchos::rcp(new Belos::XpetraOp(A)); // Turns a Xpetra::Matrix object into a Belos operator - Teuchos::RCP belosPrec = Teuchos::rcp(new Belos::MueLuOp (H)); // Turns a MueLu::Hierarchy object into a Belos operator + Teuchos::RCP belosOp = Teuchos::rcp(new Belos::XpetraOp(A)); // Turns a Xpetra::Matrix object into a Belos operator + Teuchos::RCP belosPrec = Teuchos::rcp(new Belos::MueLuOp(H)); // Turns a MueLu::Hierarchy object into a Belos operator // Construct a Belos LinearProblem object - RCP > belosProblem = rcp(new Belos::LinearProblem(belosOp, X, B)); + RCP> belosProblem = rcp(new Belos::LinearProblem(belosOp, X, B)); belosProblem->setRightPrec(belosPrec); bool set = belosProblem->setProblem(); @@ -546,16 +590,16 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int a // Belos parameter list Teuchos::ParameterList belosList; - belosList.set("Maximum Iterations", maxIts); // Maximum number of iterations allowed - belosList.set("Convergence Tolerance", tol); // Relative convergence tolerance requested - belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); - belosList.set("Output Frequency", 1); - belosList.set("Output Style", Belos::Brief); + belosList.set("Maximum Iterations", maxIts); // Maximum number of iterations allowed + belosList.set("Convergence Tolerance", tol); // Relative convergence tolerance requested + belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); + belosList.set("Output Frequency", 1); + belosList.set("Output Style", Belos::Brief); // Create an iterative solver manager - RCP< Belos::SolverManager > solver; + RCP> solver; if (solveType == "cg") { - solver = rcp(new Belos::PseudoBlockCGSolMgr (belosProblem, rcp(&belosList, false))); + solver = rcp(new Belos::PseudoBlockCGSolMgr(belosProblem, rcp(&belosList, false))); } else if (solveType == "gmres") { solver = rcp(new Belos::BlockGmresSolMgr(belosProblem, rcp(&belosList, false))); } @@ -563,22 +607,24 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int a // Perform solve Belos::ReturnType ret = Belos::Unconverged; - tc = high_resolution_clock::now(); - ret = solver->solve(); - solve_time[k*numSteps + t] = duration_cast>(high_resolution_clock::now() - tc); + tc = high_resolution_clock::now(); + ret = solver->solve(); + solve_time[k * numSteps + t] = duration_cast>(high_resolution_clock::now() - tc); // Get the number of iterations for this solve. out << "Number of iterations performed for this solve: " << solver->getNumIters() << std::endl; // Check convergence if (ret != Belos::Converged) { - out << std::endl << "ERROR: Belos did not converge! " << std::endl; - num_its[k*numSteps+t] = -1; + out << std::endl + << "ERROR: Belos did not converge! " << std::endl; + num_its[k * numSteps + t] = -1; } else { - out << std::endl << "SUCCESS: Belos converged!" << std::endl; - num_its[k*numSteps+t] = solver->getNumIters(); + out << std::endl + << "SUCCESS: Belos converged!" << std::endl; + num_its[k * numSteps + t] = solver->getNumIters(); } -#endif //ifdef HAVE_MUELU_BELOS +#endif // ifdef HAVE_MUELU_BELOS } else { throw MueLu::Exceptions::RuntimeError("Unknown solver type: \"" + solveType + "\""); } @@ -590,18 +636,17 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int a out << thinSeparator << std::endl; for (size_t k = 0; k < reuseTypes.size(); k++) printf("step #%d reuse \"%20s\": setup = %5.2e, solve = %5.2e [%3d], total = %5.2e\n", static_cast(t), reuseNames[k].c_str(), - setup_time[k*numSteps+t].count(), solve_time[k*numSteps+t].count(), num_its[k*numSteps+t], - setup_time[k*numSteps+t].count() + solve_time[k*numSteps+t].count()); + setup_time[k * numSteps + t].count(), solve_time[k * numSteps + t].count(), num_its[k * numSteps + t], + setup_time[k * numSteps + t].count() + solve_time[k * numSteps + t].count()); } return EXIT_SUCCESS; } - //- -- -------------------------------------------------------- #define MUELU_AUTOMATIC_TEST_ETI_NAME main_ #include "MueLu_Test_ETI.hpp" -int main(int argc, char *argv[]) { - return Automatic_Test_ETI(argc,argv); +int main(int argc, char* argv[]) { + return Automatic_Test_ETI(argc, argv); } diff --git a/packages/muelu/example/advanced/multiplesolve/StandardReuse.cpp b/packages/muelu/example/advanced/multiplesolve/StandardReuse.cpp index 1e17f6d41337..8ff37de279ee 100644 --- a/packages/muelu/example/advanced/multiplesolve/StandardReuse.cpp +++ b/packages/muelu/example/advanced/multiplesolve/StandardReuse.cpp @@ -79,21 +79,20 @@ // The resulting preconditioners are identical to multigrid preconditioners built without recycling the parts described above. // This can be verified by using the --no-recycling option. -template +template void ConstructData(const std::string& matrixType, Teuchos::ParameterList& galeriList, Xpetra::UnderlyingLib lib, Teuchos::RCP >& comm, - Teuchos::RCP >& A, - Teuchos::RCP >& map, - Teuchos::RCP::magnitudeType,LocalOrdinal,GlobalOrdinal,Node> >& coordinates, - Teuchos::RCP >& nullspace) { + Teuchos::RCP >& A, + Teuchos::RCP >& map, + Teuchos::RCP::magnitudeType, LocalOrdinal, GlobalOrdinal, Node> >& coordinates, + Teuchos::RCP >& nullspace) { #include + using Teuchos::ArrayRCP; using Teuchos::RCP; using Teuchos::rcp; - using Teuchos::ArrayRCP; using Teuchos::TimeMonitor; typedef typename Teuchos::ScalarTraits::magnitudeType real_type; - typedef typename Xpetra::MultiVector RealValuedMultiVector; - + typedef typename Xpetra::MultiVector RealValuedMultiVector; // Galeri will attempt to create a square-as-possible distribution of subdomains di, e.g., // d1 d2 d3 @@ -109,36 +108,36 @@ void ConstructData(const std::string& matrixType, Teuchos::ParameterList& galeri // In the future, we hope to be able to first create a Galeri problem, and then request map and coordinates from it // At the moment, however, things are fragile as we hope that the Problem uses same map and coordinates inside if (matrixType == "Laplace1D") { - map = Galeri::Xpetra::CreateMap(lib, "Cartesian1D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", map, galeriList); + map = Galeri::Xpetra::CreateMap(lib, "Cartesian1D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", map, galeriList); } else if (matrixType == "Laplace2D" || matrixType == "Star2D" || matrixType == "BigStar2D" || matrixType == "Elasticity2D") { - map = Galeri::Xpetra::CreateMap(lib, "Cartesian2D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", map, galeriList); + map = Galeri::Xpetra::CreateMap(lib, "Cartesian2D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", map, galeriList); } else if (matrixType == "Laplace3D" || matrixType == "Brick3D" || matrixType == "Elasticity3D") { - map = Galeri::Xpetra::CreateMap(lib, "Cartesian3D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", map, galeriList); + map = Galeri::Xpetra::CreateMap(lib, "Cartesian3D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", map, galeriList); } // Expand map to do multiple DOF per node for block problems if (matrixType == "Elasticity2D") - map = Xpetra::MapFactory::Build(map, 2); + map = Xpetra::MapFactory::Build(map, 2); if (matrixType == "Elasticity3D") - map = Xpetra::MapFactory::Build(map, 3); + map = Xpetra::MapFactory::Build(map, 3); if (matrixType == "Elasticity2D" || matrixType == "Elasticity3D") { // Our default test case for elasticity: all boundaries of a square/cube have Neumann b.c. except left which has Dirichlet - galeriList.set("right boundary" , "Neumann"); + galeriList.set("right boundary", "Neumann"); galeriList.set("bottom boundary", "Neumann"); - galeriList.set("top boundary" , "Neumann"); - galeriList.set("front boundary" , "Neumann"); - galeriList.set("back boundary" , "Neumann"); + galeriList.set("top boundary", "Neumann"); + galeriList.set("front boundary", "Neumann"); + galeriList.set("back boundary", "Neumann"); } - RCP > Pr = - Galeri::Xpetra::BuildProblem(matrixType, map, galeriList); + RCP > Pr = + Galeri::Xpetra::BuildProblem(matrixType, map, galeriList); A = Pr->BuildMatrix(); if (matrixType == "Elasticity2D" || @@ -148,16 +147,16 @@ void ConstructData(const std::string& matrixType, Teuchos::ParameterList& galeri } } -template -int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int argc, char *argv[]) { +template +int main_(Teuchos::CommandLineProcessor& clp, Xpetra::UnderlyingLib& lib, int argc, char* argv[]) { #include + using Teuchos::ArrayRCP; using Teuchos::RCP; using Teuchos::rcp; - using Teuchos::ArrayRCP; using Teuchos::TimeMonitor; typedef typename Teuchos::ScalarTraits::magnitudeType real_type; - typedef typename Xpetra::MultiVector RealValuedMultiVector; + typedef typename Xpetra::MultiVector RealValuedMultiVector; // ========================================================================= // MPI initialization using Teuchos @@ -171,27 +170,31 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int ar SC one = STS::one(), zero = STS::zero(); RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& out = *fancy; + Teuchos::FancyOStream& out = *fancy; out.setOutputToRootOnly(0); // ========================================================================= // Parameters initialization // ========================================================================= GO nx = 100, ny = 100, nz = 100; - Galeri::Xpetra::Parameters galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra - - std::string xmlFileName = ""; clp.setOption("xml", &xmlFileName, "read parameters from a file"); - int numRebuilds = 0; clp.setOption("rebuild", &numRebuilds, "#times to rebuild hierarchy"); - bool useFilter = true; clp.setOption("filter", "nofilter", &useFilter, "Print out only Setup times"); - bool modify = true; clp.setOption("modify", "nomodify", &modify, "Change values of the matrix used for reuse"); + Galeri::Xpetra::Parameters galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra + + std::string xmlFileName = ""; + clp.setOption("xml", &xmlFileName, "read parameters from a file"); + int numRebuilds = 0; + clp.setOption("rebuild", &numRebuilds, "#times to rebuild hierarchy"); + bool useFilter = true; + clp.setOption("filter", "nofilter", &useFilter, "Print out only Setup times"); + bool modify = true; + clp.setOption("modify", "nomodify", &modify, "Change values of the matrix used for reuse"); clp.recogniseAllOptions(true); switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } // Retrieve matrix parameters (they may have been changed on the command line) @@ -202,12 +205,13 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int ar // Problem construction // ========================================================================= // For comments, see Driver.cpp - out << "========================================================\n" << xpetraParameters << galeriParameters; + out << "========================================================\n" + << xpetraParameters << galeriParameters; std::string matrixType = galeriParameters.GetMatrixType(); - RCP A, B; - RCP map; - RCP coordinates; - RCP nullspace; + RCP A, B; + RCP map; + RCP coordinates; + RCP nullspace; ConstructData(matrixType, galeriList, lib, comm, A, map, coordinates, nullspace); if (modify) { @@ -237,7 +241,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int ar Teuchos::ParameterList paramList; paramList.set("verbosity", "none"); - if(lib == Xpetra::UseEpetra) { + if (lib == Xpetra::UseEpetra) { out << "Setting: \"use kokkos refactor\" to: false" << std::endl; paramList.set("use kokkos refactor", false); } @@ -289,10 +293,14 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int ar // Setup #2-inf (reuse) // ========================================================================= std::vector reuseTypes, reuseNames; - reuseTypes.push_back("none"); reuseNames.push_back("none"); - reuseTypes.push_back("S"); reuseNames.push_back("smoothers"); - reuseTypes.push_back("tP"); reuseNames.push_back("tentative P"); - reuseTypes.push_back("RP"); reuseNames.push_back("smoothed P and R"); + reuseTypes.push_back("none"); + reuseNames.push_back("none"); + reuseTypes.push_back("S"); + reuseNames.push_back("smoothers"); + reuseTypes.push_back("tP"); + reuseNames.push_back("tentative P"); + reuseTypes.push_back("RP"); + reuseNames.push_back("smoothed P and R"); for (size_t k = 0; k < reuseTypes.size(); k++) { out << thickSeparator << " " << reuseTypes[k] << " " << thickSeparator << std::endl; @@ -310,7 +318,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int ar // Reuse setup RCP Bcopy = Xpetra::MatrixFactory2::BuildCopy(B); - RCP tm = TimeMonitor::getNewTimer("Setup #" + MueLu::toString(k+2) + ": reuse " + reuseNames[k]); + RCP tm = TimeMonitor::getNewTimer("Setup #" + MueLu::toString(k + 2) + ": reuse " + reuseNames[k]); for (int i = 0; i <= numRebuilds; i++) { out << thinSeparator << " " << reuseTypes[k] << " (rebuild #" << i << ") " << thinSeparator << std::endl; @@ -350,11 +358,10 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int ar return EXIT_SUCCESS; } - //- -- -------------------------------------------------------- #define MUELU_AUTOMATIC_TEST_ETI_NAME main_ #include "MueLu_Test_ETI.hpp" -int main(int argc, char *argv[]) { - return Automatic_Test_ETI(argc,argv); +int main(int argc, char* argv[]) { + return Automatic_Test_ETI(argc, argv); } diff --git a/packages/muelu/example/advanced/separatesetups/SeparateSetups.cpp b/packages/muelu/example/advanced/separatesetups/SeparateSetups.cpp index c4e0bef32ade..3935c3f75a10 100644 --- a/packages/muelu/example/advanced/separatesetups/SeparateSetups.cpp +++ b/packages/muelu/example/advanced/separatesetups/SeparateSetups.cpp @@ -72,7 +72,7 @@ #include #include -template +template int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int argc, char *argv[]) { #include @@ -88,19 +88,19 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg bool success = false; bool verbose = true; try { - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); + RCP > comm = Teuchos::DefaultComm::getComm(); // // Process command line arguments // - Galeri::Xpetra::Parameters matrixParameters(clp, 81); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra + Galeri::Xpetra::Parameters matrixParameters(clp, 81); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra - switch (clp.parse(argc,argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + switch (clp.parse(argc, argv)) { + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; default:; } @@ -114,46 +114,47 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg RCP map = MapFactory::Build(lib, matrixParameters.GetNumGlobalElements(), 0, comm); // Matrix - RCP > Pr = - Galeri::Xpetra::BuildProblem(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList()); + RCP > Pr = + Galeri::Xpetra::BuildProblem(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList()); RCP A = Pr->BuildMatrix(); // User defined nullspace - RCP nullSpace = VectorFactory::Build(map,1); nullSpace->putScalar((SC) 1.0); + RCP nullSpace = VectorFactory::Build(map, 1); + nullSpace->putScalar((SC)1.0); // Define B - RCP X = VectorFactory::Build(map,1); - RCP B = VectorFactory::Build(map,1); + RCP X = VectorFactory::Build(map, 1); + RCP B = VectorFactory::Build(map, 1); X->setSeed(846930886); X->randomize(); A->apply(*X, *B, Teuchos::NO_TRANS, (SC)1.0, (SC)0.0); // X = 0 - X->putScalar((SC) 0.0); + X->putScalar((SC)0.0); // // Create a multigrid configuration // // Transfer operators - RCP TentativePFact = rcp( new TentativePFactory() ); - RCP SaPFact = rcp( new SaPFactory() ); - RCP RFact = rcp( new TransPFactory()); + RCP TentativePFact = rcp(new TentativePFactory()); + RCP SaPFact = rcp(new SaPFactory()); + RCP RFact = rcp(new TransPFactory()); FactoryManager M; M.SetFactory("Ptent", TentativePFact); - M.SetFactory("P", SaPFact); - M.SetFactory("R", RFact); + M.SetFactory("P", SaPFact); + M.SetFactory("R", RFact); - M.SetFactory("Smoother", Teuchos::null); //skips smoother setup - M.SetFactory("CoarseSolver", Teuchos::null); //skips coarsest solve setup + M.SetFactory("Smoother", Teuchos::null); // skips smoother setup + M.SetFactory("CoarseSolver", Teuchos::null); // skips coarsest solve setup // // Multigrid setup phase // int startLevel = 0; - int maxLevels = 10; + int maxLevels = 10; std::cout << "=============== Setup transfers only ====================" << std::endl; @@ -165,11 +166,11 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg finestLevel->Set("Nullspace", nullSpace); // Indicate which Hierarchy operators we want to keep - H.Keep("P", SaPFact.get()); //SaPFact is the generating factory for P. - H.Keep("R", RFact.get()); //RFact is the generating factory for R. - H.Keep("Ptent", TentativePFact.get()); //SaPFact is the generating factory for P. + H.Keep("P", SaPFact.get()); // SaPFact is the generating factory for P. + H.Keep("R", RFact.get()); // RFact is the generating factory for R. + H.Keep("Ptent", TentativePFact.get()); // SaPFact is the generating factory for P. - H.Setup(M,startLevel,maxLevels); + H.Setup(M, startLevel, maxLevels); std::cout << "=============== Setup smoothers only ====================" << std::endl; @@ -180,19 +181,19 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Create Gauss-Seidel smoother. std::string ifpackType = "RELAXATION"; Teuchos::ParameterList ifpackList; - ifpackList.set("relaxation: sweeps", (LO) 3); - ifpackList.set("relaxation: damping factor", (SC) 1.0); + ifpackList.set("relaxation: sweeps", (LO)3); + ifpackList.set("relaxation: damping factor", (SC)1.0); RCP smootherPrototype = rcp(new TrilinosSmoother(ifpackType, ifpackList)); M.SetFactory("Smoother", rcp(new SmootherFactory(smootherPrototype))); // Create coarsest solver. - RCP coarseSolverPrototype = rcp( new DirectSolver() ); - RCP coarseSolverFact = rcp( new SmootherFactory(coarseSolverPrototype, Teuchos::null) ); + RCP coarseSolverPrototype = rcp(new DirectSolver()); + RCP coarseSolverFact = rcp(new SmootherFactory(coarseSolverPrototype, Teuchos::null)); M.SetFactory("CoarseSolver", coarseSolverFact); // Note that we pass the number of levels back in. - H.Setup(M,startLevel, H.GetNumLevels()); + H.Setup(M, startLevel, H.GetNumLevels()); std::cout << "=============== Solve ====================" << std::endl; @@ -211,23 +212,20 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg if (comm->getRank() == 0) { std::ios::fmtflags f(std::cout.flags()); std::cout << "||Residual|| = " << std::setiosflags(std::ios::fixed) << std::setprecision(20) << residualNorms << std::endl; - std::cout.flags(f); + std::cout.flags(f); } success = true; } TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); - return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); + return (success ? EXIT_SUCCESS : EXIT_FAILURE); } - //- -- -------------------------------------------------------- #define MUELU_AUTOMATIC_TEST_ETI_NAME main_ #include "MueLu_Test_ETI.hpp" int main(int argc, char *argv[]) { - return Automatic_Test_ETI(argc,argv); + return Automatic_Test_ETI(argc, argv); } - - diff --git a/packages/muelu/example/basic/Simple.cpp b/packages/muelu/example/basic/Simple.cpp index d8dc0fb308e3..6a16c8650772 100644 --- a/packages/muelu/example/basic/Simple.cpp +++ b/packages/muelu/example/basic/Simple.cpp @@ -77,17 +77,16 @@ #include #include - /*********************************************************************/ -template -int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int argc, char *argv[]) { +template +int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int argc, char *argv[]) { #include + using Teuchos::ArrayRCP; + using Teuchos::ParameterList; using Teuchos::RCP; using Teuchos::rcp; - using Teuchos::ArrayRCP; using Teuchos::TimeMonitor; - using Teuchos::ParameterList; // ========================================================================= // MPI initialization using Teuchos @@ -99,45 +98,68 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar // ========================================================================= typedef Teuchos::ScalarTraits STS; typedef typename STS::coordinateType real_type; - typedef Xpetra::MultiVector RealValuedMultiVector; + typedef Xpetra::MultiVector RealValuedMultiVector; // ========================================================================= // Parameters initialization // ========================================================================= GO nx = 100, ny = 100, nz = 100; - Galeri::Xpetra::Parameters galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra - - std::string xmlFileName = ""; clp.setOption("xml", &xmlFileName, "read parameters from an xml file"); - std::string yamlFileName = ""; clp.setOption("yaml", &yamlFileName, "read parameters from a yaml file"); - bool printTimings = true; clp.setOption("timings", "notimings", &printTimings, "print timings to screen"); - std::string timingsFormat = "table-fixed"; clp.setOption("time-format", &timingsFormat, "timings format (table-fixed | table-scientific | yaml)"); - std::string solveType = "belos"; clp.setOption("solver", &solveType, "solve type: (none | belos)"); - std::string belosType = "cg"; clp.setOption("belosType", &belosType, "belos solver type: (Pseudoblock CG | Block CG | Pseudoblock GMRES | Block GMRES | ...) see BelosSolverFactory.hpp for exhaustive list of solvers"); - double tol = 1e-12; clp.setOption("tol", &tol, "solver convergence tolerance"); - bool binaryFormat = false; clp.setOption("binary", "ascii", &binaryFormat, "read matrices in binary format"); - std::string rowMapFile; clp.setOption("rowmap", &rowMapFile, "map data file"); - std::string colMapFile; clp.setOption("colmap", &colMapFile, "colmap data file"); - std::string domainMapFile; clp.setOption("domainmap", &domainMapFile, "domainmap data file"); - std::string rangeMapFile; clp.setOption("rangemap", &rangeMapFile, "rangemap data file"); - std::string matrixFile; clp.setOption("matrix", &matrixFile, "matrix data file"); - std::string rhsFile; clp.setOption("rhs", &rhsFile, "rhs data file"); - std::string coordFile; clp.setOption("coords", &coordFile, "coordinates data file"); - std::string coordMapFile; clp.setOption("coordsmap", &coordMapFile, "coordinates map data file"); - std::string nullFile; clp.setOption("nullspace", &nullFile, "nullspace data file"); - std::string materialFile; clp.setOption("material", &materialFile, "material data file"); - int maxIts = 200; clp.setOption("its", &maxIts, "maximum number of solver iterations"); - int numVectors = 1; clp.setOption("multivector", &numVectors, "number of rhs to solve simultaneously"); - bool scaleResidualHist = true; clp.setOption("scale", "noscale", &scaleResidualHist, "scaled Krylov residual history"); - bool solvePreconditioned = true; clp.setOption("solve-preconditioned","no-solve-preconditioned", &solvePreconditioned, "use MueLu preconditioner in solve"); - int cacheSize = 0; clp.setOption("cachesize", &cacheSize, "cache size (in KB)"); + Galeri::Xpetra::Parameters galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra + + std::string xmlFileName = ""; + clp.setOption("xml", &xmlFileName, "read parameters from an xml file"); + std::string yamlFileName = ""; + clp.setOption("yaml", &yamlFileName, "read parameters from a yaml file"); + bool printTimings = true; + clp.setOption("timings", "notimings", &printTimings, "print timings to screen"); + std::string timingsFormat = "table-fixed"; + clp.setOption("time-format", &timingsFormat, "timings format (table-fixed | table-scientific | yaml)"); + std::string solveType = "belos"; + clp.setOption("solver", &solveType, "solve type: (none | belos)"); + std::string belosType = "cg"; + clp.setOption("belosType", &belosType, "belos solver type: (Pseudoblock CG | Block CG | Pseudoblock GMRES | Block GMRES | ...) see BelosSolverFactory.hpp for exhaustive list of solvers"); + double tol = 1e-12; + clp.setOption("tol", &tol, "solver convergence tolerance"); + bool binaryFormat = false; + clp.setOption("binary", "ascii", &binaryFormat, "read matrices in binary format"); + std::string rowMapFile; + clp.setOption("rowmap", &rowMapFile, "map data file"); + std::string colMapFile; + clp.setOption("colmap", &colMapFile, "colmap data file"); + std::string domainMapFile; + clp.setOption("domainmap", &domainMapFile, "domainmap data file"); + std::string rangeMapFile; + clp.setOption("rangemap", &rangeMapFile, "rangemap data file"); + std::string matrixFile; + clp.setOption("matrix", &matrixFile, "matrix data file"); + std::string rhsFile; + clp.setOption("rhs", &rhsFile, "rhs data file"); + std::string coordFile; + clp.setOption("coords", &coordFile, "coordinates data file"); + std::string coordMapFile; + clp.setOption("coordsmap", &coordMapFile, "coordinates map data file"); + std::string nullFile; + clp.setOption("nullspace", &nullFile, "nullspace data file"); + std::string materialFile; + clp.setOption("material", &materialFile, "material data file"); + int maxIts = 200; + clp.setOption("its", &maxIts, "maximum number of solver iterations"); + int numVectors = 1; + clp.setOption("multivector", &numVectors, "number of rhs to solve simultaneously"); + bool scaleResidualHist = true; + clp.setOption("scale", "noscale", &scaleResidualHist, "scaled Krylov residual history"); + bool solvePreconditioned = true; + clp.setOption("solve-preconditioned", "no-solve-preconditioned", &solvePreconditioned, "use MueLu preconditioner in solve"); + int cacheSize = 0; + clp.setOption("cachesize", &cacheSize, "cache size (in KB)"); clp.recogniseAllOptions(true); switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } TEUCHOS_TEST_FOR_EXCEPTION(xmlFileName != "" && yamlFileName != "", std::runtime_error, @@ -145,7 +167,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar // Instead of checking each time for rank, create a rank 0 stream RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& out = *fancy; + Teuchos::FancyOStream &out = *fancy; out.setOutputToRootOnly(0); ParameterList paramList; @@ -160,7 +182,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar if (inst == Xpetra::COMPLEX_INT_INT && solveType == "belos") { belosType = "gmres"; - out << "WARNING: CG will not work with COMPLEX scalars, switching to GMRES"< globalTimeMonitor = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: S - Global Time"))); RCP tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 1 - Matrix Build"))); - RCP A; - RCP map; + RCP A; + RCP map; RCP coordinates; RCP nullspace; RCP material; RCP X, B; // Load the matrix off disk (or generate it via Galeri) - MatrixLoad(comm,lib,binaryFormat,matrixFile,rhsFile,rowMapFile,colMapFile,domainMapFile,rangeMapFile,coordFile,coordMapFile,nullFile,materialFile,map,A,coordinates,nullspace,material,X,B,numVectors,galeriParameters,xpetraParameters,galeriStream); + MatrixLoad(comm, lib, binaryFormat, matrixFile, rhsFile, rowMapFile, colMapFile, domainMapFile, rangeMapFile, coordFile, coordMapFile, nullFile, materialFile, map, A, coordinates, nullspace, material, X, B, numVectors, galeriParameters, xpetraParameters, galeriStream); comm->barrier(); tm = Teuchos::null; out << galeriStream.str(); @@ -193,17 +215,17 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar // ========================================================================= // Preconditioner construction // ========================================================================= - bool useML = paramList.isParameter("use external multigrid package") && (paramList.get("use external multigrid package") == "ml"); - out<<"*********** MueLu ParameterList ***********"<("use external multigrid package") == "ml"); + out << "*********** MueLu ParameterList ***********" << std::endl; + out << paramList; + out << "*******************************************" << std::endl; RCP H; RCP Prec; { comm->barrier(); tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 2 - MueLu Setup"))); - PreconditionerSetup(A,coordinates,nullspace,material,paramList,false,false,useML,false,0,H,Prec); + PreconditionerSetup(A, coordinates, nullspace, material, paramList, false, false, useML, false, 0, H, Prec); comm->barrier(); tm = Teuchos::null; } @@ -213,29 +235,31 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar // ========================================================================= { comm->barrier(); - SystemSolve(A,X,B,H,Prec,out,solveType,belosType,false,false,useML,cacheSize,0,scaleResidualHist,solvePreconditioned,maxIts,tol); + SystemSolve(A, X, B, H, Prec, out, solveType, belosType, false, false, useML, cacheSize, 0, scaleResidualHist, solvePreconditioned, maxIts, tol); comm->barrier(); } - tm = Teuchos::null; + tm = Teuchos::null; globalTimeMonitor = Teuchos::null; if (printTimings) { RCP reportParams = rcp(new ParameterList); if (timingsFormat == "yaml") { - reportParams->set("Report format", "YAML"); // "Table" or "YAML" - reportParams->set("YAML style", "compact"); // "spacious" or "compact" + reportParams->set("Report format", "YAML"); // "Table" or "YAML" + reportParams->set("YAML style", "compact"); // "spacious" or "compact" } - reportParams->set("How to merge timer sets", "Union"); - reportParams->set("alwaysWriteLocal", false); - reportParams->set("writeGlobalStats", true); - reportParams->set("writeZeroTimers", false); + reportParams->set("How to merge timer sets", "Union"); + reportParams->set("alwaysWriteLocal", false); + reportParams->set("writeGlobalStats", true); + reportParams->set("writeZeroTimers", false); const std::string filter = ""; std::ios_base::fmtflags ff(out.flags()); - if (timingsFormat == "table-fixed") out << std::fixed; - else out << std::scientific; + if (timingsFormat == "table-fixed") + out << std::fixed; + else + out << std::scientific; TimeMonitor::report(comm.ptr(), out, filter, reportParams); out << std::setiosflags(ff); } @@ -254,5 +278,5 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar #include "MueLu_Test_ETI.hpp" int main(int argc, char *argv[]) { - return Automatic_Test_ETI(argc,argv); + return Automatic_Test_ETI(argc, argv); } diff --git a/packages/muelu/example/basic/Stratimikos.cpp b/packages/muelu/example/basic/Stratimikos.cpp index 998d5039194b..b7e242743250 100644 --- a/packages/muelu/example/basic/Stratimikos.cpp +++ b/packages/muelu/example/basic/Stratimikos.cpp @@ -82,26 +82,24 @@ The source code is not MueLu specific and can be used with any Stratimikos strat // Galeri includes #include - -template +template int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int argc, char *argv[]) { - #include +#include typedef Teuchos::ScalarTraits STS; typedef typename STS::coordinateType real_type; - typedef Xpetra::MultiVector RealValuedMultiVector; + typedef Xpetra::MultiVector RealValuedMultiVector; + using Teuchos::ParameterList; using Teuchos::RCP; using Teuchos::rcp; - using Teuchos::ParameterList; using Teuchos::TimeMonitor; bool success = false; bool verbose = true; try { - // // MPI initialization // - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); + RCP > comm = Teuchos::DefaultComm::getComm(); // // Parameters @@ -109,37 +107,55 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // manage parameters of the test case Galeri::Xpetra::Parameters matrixParameters(clp, 100, 100, 100, "Laplace2D"); // manage parameters of Xpetra - Xpetra::Parameters xpetraParameters(clp); + Xpetra::Parameters xpetraParameters(clp); // command line parameters - std::string xmlFileName = "stratimikos_ParameterList.xml"; clp.setOption("xml", &xmlFileName, "read parameters from an xml file"); - std::string yamlFileName = ""; clp.setOption("yaml", &yamlFileName, "read parameters from a yaml file"); - bool printTimings = false; clp.setOption("timings", "notimings", &printTimings, "print timings to screen"); - bool use_stacked_timer = false; clp.setOption("stacked-timer", "no-stacked-timer", &use_stacked_timer, "Run with or without stacked timer output"); - std::string timingsFormat = "table-fixed"; clp.setOption("time-format", &timingsFormat, "timings format (table-fixed | table-scientific | yaml)"); - bool binaryFormat = false; clp.setOption("binary", "ascii", &binaryFormat, "read matrices in binary format"); - std::string rowMapFile; clp.setOption("rowmap", &rowMapFile, "map data file"); - std::string colMapFile; clp.setOption("colmap", &colMapFile, "colmap data file"); - std::string domainMapFile; clp.setOption("domainmap", &domainMapFile, "domainmap data file"); - std::string rangeMapFile; clp.setOption("rangemap", &rangeMapFile, "rangemap data file"); - std::string matrixFile; clp.setOption("matrix", &matrixFile, "matrix data file"); - std::string rhsFile; clp.setOption("rhs", &rhsFile, "rhs data file"); - std::string coordFile; clp.setOption("coords", &coordFile, "coordinates data file"); - std::string coordMapFile; clp.setOption("coordsmap", &coordMapFile, "coordinates map data file"); - std::string nullFile; clp.setOption("nullspace", &nullFile, "nullspace data file"); - std::string materialFile; clp.setOption("material", &materialFile, "material data file"); - int numVectors = 1; clp.setOption("multivector", &numVectors, "number of rhs to solve simultaneously"); - int numSolves = 1; clp.setOption("numSolves", &numSolves, "number of times the system should be solved"); - - switch (clp.parse(argc,argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + std::string xmlFileName = "stratimikos_ParameterList.xml"; + clp.setOption("xml", &xmlFileName, "read parameters from an xml file"); + std::string yamlFileName = ""; + clp.setOption("yaml", &yamlFileName, "read parameters from a yaml file"); + bool printTimings = false; + clp.setOption("timings", "notimings", &printTimings, "print timings to screen"); + bool use_stacked_timer = false; + clp.setOption("stacked-timer", "no-stacked-timer", &use_stacked_timer, "Run with or without stacked timer output"); + std::string timingsFormat = "table-fixed"; + clp.setOption("time-format", &timingsFormat, "timings format (table-fixed | table-scientific | yaml)"); + bool binaryFormat = false; + clp.setOption("binary", "ascii", &binaryFormat, "read matrices in binary format"); + std::string rowMapFile; + clp.setOption("rowmap", &rowMapFile, "map data file"); + std::string colMapFile; + clp.setOption("colmap", &colMapFile, "colmap data file"); + std::string domainMapFile; + clp.setOption("domainmap", &domainMapFile, "domainmap data file"); + std::string rangeMapFile; + clp.setOption("rangemap", &rangeMapFile, "rangemap data file"); + std::string matrixFile; + clp.setOption("matrix", &matrixFile, "matrix data file"); + std::string rhsFile; + clp.setOption("rhs", &rhsFile, "rhs data file"); + std::string coordFile; + clp.setOption("coords", &coordFile, "coordinates data file"); + std::string coordMapFile; + clp.setOption("coordsmap", &coordMapFile, "coordinates map data file"); + std::string nullFile; + clp.setOption("nullspace", &nullFile, "nullspace data file"); + std::string materialFile; + clp.setOption("material", &materialFile, "material data file"); + int numVectors = 1; + clp.setOption("multivector", &numVectors, "number of rhs to solve simultaneously"); + int numSolves = 1; + clp.setOption("numSolves", &numSolves, "number of times the system should be solved"); + + switch (clp.parse(argc, argv)) { + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& out = *fancy; + Teuchos::FancyOStream &out = *fancy; out.setOutputToRootOnly(0); // Set up timers @@ -161,15 +177,15 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Construct the problem // - RCP A; - RCP map; + RCP A; + RCP map; RCP coordinates; - RCP nullspace; - RCP material; - RCP X, B; + RCP nullspace; + RCP material; + RCP X, B; std::ostringstream galeriStream; - MatrixLoad(comm,lib,binaryFormat,matrixFile,rhsFile,rowMapFile,colMapFile,domainMapFile,rangeMapFile,coordFile,coordMapFile,nullFile,materialFile,map,A,coordinates,nullspace,material,X,B,numVectors,matrixParameters,xpetraParameters,galeriStream); + MatrixLoad(comm, lib, binaryFormat, matrixFile, rhsFile, rowMapFile, colMapFile, domainMapFile, rangeMapFile, coordFile, coordMapFile, nullFile, materialFile, map, A, coordinates, nullspace, material, X, B, numVectors, matrixParameters, xpetraParameters, galeriStream); out << galeriStream.str(); X->putScalar(0); @@ -177,11 +193,11 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Build Thyra linear algebra objects // - RCP > xpCrsA = Teuchos::rcp_dynamic_cast >(A); + RCP > xpCrsA = Teuchos::rcp_dynamic_cast >(A); - RCP > thyraA = Xpetra::ThyraUtils::toThyra(xpCrsA->getCrsMatrix()); - RCP< Thyra::MultiVectorBase > thyraX = Teuchos::rcp_const_cast >(Xpetra::ThyraUtils::toThyraMultiVector(X)); - RCP > thyraB = Xpetra::ThyraUtils::toThyraMultiVector(B); + RCP > thyraA = Xpetra::ThyraUtils::toThyra(xpCrsA->getCrsMatrix()); + RCP > thyraX = Teuchos::rcp_const_cast >(Xpetra::ThyraUtils::toThyraMultiVector(X)); + RCP > thyraB = Xpetra::ThyraUtils::toThyraMultiVector(B); // // Build Stratimikos solver @@ -190,22 +206,22 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // This is the Stratimikos main class (= factory of solver factory). Stratimikos::LinearSolverBuilder linearSolverBuilder; // Register MueLu as a Stratimikos preconditioner strategy. - Stratimikos::enableMueLu(linearSolverBuilder); + Stratimikos::enableMueLu(linearSolverBuilder); // add coordinates and nullspace to parameter list if (paramList->isSublist("Preconditioner Types") && paramList->sublist("Preconditioner Types").isSublist("MueLu")) { - ParameterList& userParamList = paramList->sublist("Preconditioner Types").sublist("MueLu").sublist("user data"); - userParamList.set >("Coordinates", coordinates); - userParamList.set >("Nullspace", nullspace); - } + ParameterList &userParamList = paramList->sublist("Preconditioner Types").sublist("MueLu").sublist("user data"); + userParamList.set >("Coordinates", coordinates); + userParamList.set >("Nullspace", nullspace); + } // Setup solver parameters using a Stratimikos parameter list. linearSolverBuilder.setParameterList(paramList); // Build a new "solver factory" according to the previously specified parameter list. RCP > solverFactory = Thyra::createLinearSolveStrategy(linearSolverBuilder); - auto precFactory = solverFactory->getPreconditionerFactory(); + auto precFactory = solverFactory->getPreconditionerFactory(); RCP > prec; Teuchos::RCP > thyraInverseA; if (!precFactory.is_null()) { @@ -244,19 +260,21 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg } else { RCP reportParams = rcp(new ParameterList); if (timingsFormat == "yaml") { - reportParams->set("Report format", "YAML"); // "Table" or "YAML" - reportParams->set("YAML style", "compact"); // "spacious" or "compact" + reportParams->set("Report format", "YAML"); // "Table" or "YAML" + reportParams->set("YAML style", "compact"); // "spacious" or "compact" } - reportParams->set("How to merge timer sets", "Union"); - reportParams->set("alwaysWriteLocal", false); - reportParams->set("writeGlobalStats", true); - reportParams->set("writeZeroTimers", false); + reportParams->set("How to merge timer sets", "Union"); + reportParams->set("alwaysWriteLocal", false); + reportParams->set("writeGlobalStats", true); + reportParams->set("writeZeroTimers", false); const std::string filter = ""; std::ios_base::fmtflags ff(out.flags()); - if (timingsFormat == "table-fixed") out << std::fixed; - else out << std::scientific; + if (timingsFormat == "table-fixed") + out << std::fixed; + else + out << std::scientific; TimeMonitor::report(comm.ptr(), out, filter, reportParams); out << std::setiosflags(ff); } @@ -264,18 +282,16 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg TimeMonitor::clearCounters(); out << std::endl; - } TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); - return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); + return (success ? EXIT_SUCCESS : EXIT_FAILURE); } - //- -- -------------------------------------------------------- #define MUELU_AUTOMATIC_TEST_ETI_NAME main_ #include "MueLu_Test_ETI.hpp" int main(int argc, char *argv[]) { - return Automatic_Test_ETI(argc,argv); + return Automatic_Test_ETI(argc, argv); } diff --git a/packages/muelu/example/basic/Teko.cpp b/packages/muelu/example/basic/Teko.cpp index a2c75895118a..29ab168ef2d6 100644 --- a/packages/muelu/example/basic/Teko.cpp +++ b/packages/muelu/example/basic/Teko.cpp @@ -75,12 +75,11 @@ #include "Stratimikos_DefaultLinearSolverBuilder.hpp" #include "Stratimikos_MueLuHelpers.hpp" - // Belos includes #include "BelosConfigDefs.hpp" #include "BelosLinearProblem.hpp" #include "BelosBlockGmresSolMgr.hpp" -#include "BelosThyraAdapter.hpp" // Requires Stratimikos... +#include "BelosThyraAdapter.hpp" // Requires Stratimikos... #include /*@ \label{lned:end-includes} @*/ @@ -91,80 +90,81 @@ using Teuchos::rcpFromRef; RCP buildLibPL(); -template -void GenerateDefault_2x2_Splitting(const RCP >& crsMat, - Teko::LinearOp & A, - Teko::MultiVector & x, - Teko::MultiVector & b) -{ - typedef Tpetra::Vector TP_Vec; - typedef Tpetra::CrsMatrix TP_Crs; - typedef Tpetra::Operator TP_Op; - RCP zeroCrsMat = rcp(new TP_Crs(*crsMat, Teuchos::Copy)); - zeroCrsMat->setAllToScalar(0.0); - - RCP Mat = crsMat; - RCP zeroMat = zeroCrsMat; - - // Allocate some right handside vectors - RCP x0_tp = rcp(new TP_Vec(Mat->getDomainMap())); - RCP x1_tp = rcp(new TP_Vec(Mat->getDomainMap())); - RCP b0_tp = rcp(new TP_Vec(Mat->getRangeMap())); - RCP b1_tp = rcp(new TP_Vec(Mat->getRangeMap())); - b0_tp->randomize(); - b1_tp->randomize(); - - RCP > domain = Thyra::tpetraVectorSpace(Mat->getDomainMap()); - RCP > range = Thyra::tpetraVectorSpace(Mat->getRangeMap()); - - ///////////////////////////////////////////////////////// - // Build Teko compatible matrices and vectors - ///////////////////////////////////////////////////////// - - // convert them to teko compatible sub vectors - Teko::MultiVector x0_th = Thyra::tpetraVector(domain, x0_tp); - Teko::MultiVector x1_th = Thyra::tpetraVector(domain, x1_tp); - Teko::MultiVector b0_th = Thyra::tpetraVector( range, b0_tp); - Teko::MultiVector b1_th = Thyra::tpetraVector( range, b1_tp); - std::vector x_vec; x_vec.push_back(x0_th); x_vec.push_back(x1_th); - std::vector b_vec; b_vec.push_back(b0_th); b_vec.push_back(b1_th); - - x = Teko::buildBlockedMultiVector(x_vec); // these will be used in the Teko solve - b = Teko::buildBlockedMultiVector(b_vec); - - // Build the Teko compatible linear system - Teko::LinearOp thMat = Thyra::tpetraLinearOp(range,domain,Mat); - Teko::LinearOp thZero = Thyra::tpetraLinearOp(range,domain,zeroMat); - A = Thyra::block2x2(thMat,thZero,thZero,thMat); // build an upper triangular 2x2 -} +template +void GenerateDefault_2x2_Splitting(const RCP > &crsMat, + Teko::LinearOp &A, + Teko::MultiVector &x, + Teko::MultiVector &b) { + typedef Tpetra::Vector TP_Vec; + typedef Tpetra::CrsMatrix TP_Crs; + typedef Tpetra::Operator TP_Op; + RCP zeroCrsMat = rcp(new TP_Crs(*crsMat, Teuchos::Copy)); + zeroCrsMat->setAllToScalar(0.0); + + RCP Mat = crsMat; + RCP zeroMat = zeroCrsMat; + + // Allocate some right handside vectors + RCP x0_tp = rcp(new TP_Vec(Mat->getDomainMap())); + RCP x1_tp = rcp(new TP_Vec(Mat->getDomainMap())); + RCP b0_tp = rcp(new TP_Vec(Mat->getRangeMap())); + RCP b1_tp = rcp(new TP_Vec(Mat->getRangeMap())); + b0_tp->randomize(); + b1_tp->randomize(); + + RCP > domain = Thyra::tpetraVectorSpace(Mat->getDomainMap()); + RCP > range = Thyra::tpetraVectorSpace(Mat->getRangeMap()); + ///////////////////////////////////////////////////////// + // Build Teko compatible matrices and vectors + ///////////////////////////////////////////////////////// -int extract_int(std::istringstream & iss) { + // convert them to teko compatible sub vectors + Teko::MultiVector x0_th = Thyra::tpetraVector(domain, x0_tp); + Teko::MultiVector x1_th = Thyra::tpetraVector(domain, x1_tp); + Teko::MultiVector b0_th = Thyra::tpetraVector(range, b0_tp); + Teko::MultiVector b1_th = Thyra::tpetraVector(range, b1_tp); + std::vector x_vec; + x_vec.push_back(x0_th); + x_vec.push_back(x1_th); + std::vector b_vec; + b_vec.push_back(b0_th); + b_vec.push_back(b1_th); + + x = Teko::buildBlockedMultiVector(x_vec); // these will be used in the Teko solve + b = Teko::buildBlockedMultiVector(b_vec); + + // Build the Teko compatible linear system + Teko::LinearOp thMat = Thyra::tpetraLinearOp(range, domain, Mat); + Teko::LinearOp thZero = Thyra::tpetraLinearOp(range, domain, zeroMat); + A = Thyra::block2x2(thMat, thZero, thZero, thMat); // build an upper triangular 2x2 +} + +int extract_int(std::istringstream &iss) { std::string s; iss >> s; - if(s != "") { - return stoi(s); } - else + if (s != "") { + return stoi(s); + } else return -1; } - // Generates proc-by-proc block gid lists -template -std::vector > read_block_gids(std::string partitionFile, RCP > & rowMap) { - using V = Tpetra::Vector; - using CRS = Tpetra::CrsMatrix; +template +std::vector > read_block_gids(std::string partitionFile, RCP > &rowMap) { + using V = Tpetra::Vector; + using CRS = Tpetra::CrsMatrix; - RCP pfile = Tpetra::MatrixMarket::Reader::readVectorFile(partitionFile,rowMap->getComm(),rowMap); + RCP pfile = Tpetra::MatrixMarket::Reader::readVectorFile(partitionFile, rowMap->getComm(), rowMap); int num_blocks = 1 + pfile->normInf(); - if(!rowMap->getComm()->getRank()) - std::cout<<"Reading partition file: Found "<getComm()->getRank()) + std::cout << "Reading partition file: Found " << num_blocks << " blocks" << std::endl; std::vector > block_gids(num_blocks); auto vv = pfile->get1dView(); - for(LO i=0; i<(LO)vv.size(); i++){ + for (LO i = 0; i < (LO)vv.size(); i++) { LO block_id = vv[i]; block_gids[block_id].push_back(rowMap->getGlobalElement(i)); } @@ -172,314 +172,296 @@ std::vector > read_block_gids(std::string partitionFile, RCP -void ReadSplittingFromDisk(const std::string & partitionFile, - const RCP >& crsMat, - RCP< Tpetra::Operator > & A) { +template +void ReadSplittingFromDisk(const std::string &partitionFile, + const RCP > &crsMat, + RCP > &A) { // The format for the partition file is a number of lines of the form: // proc_id block_id gid0, gid1,... // // Blocks do not need to be uniquely owned by a processor. // Each rank is going to read the file, one at a time (to avoid hammering on the disk) - auto comm = crsMat->getRowMap()->getComm(); - RCP > rowmap = crsMat->getRowMap(); - std::vector > my_blocks_and_gids = read_block_gids(partitionFile,rowmap); + auto comm = crsMat->getRowMap()->getComm(); + RCP > rowmap = crsMat->getRowMap(); + std::vector > my_blocks_and_gids = read_block_gids(partitionFile, rowmap); - RCP rA = rcp( new Teko::TpetraHelpers::BlockedTpetraOperator(my_blocks_and_gids,crsMat)); + RCP rA = rcp(new Teko::TpetraHelpers::BlockedTpetraOperator(my_blocks_and_gids, crsMat)); A = rA; } +template +int solve_thyra(RCP > &crsMat, const std::string &xmlFile) { + typedef Tpetra::CrsMatrix TP_Crs; -template - int solve_thyra(RCP > & crsMat, const std::string &xmlFile) { - typedef Tpetra::CrsMatrix TP_Crs; - - typedef Thyra::MultiVectorBase MV; - typedef Thyra::LinearOpBase OP; + typedef Thyra::MultiVectorBase MV; + typedef Thyra::LinearOpBase OP; auto comm = crsMat->getRowMap()->getComm(); RCP linearSolverBuilder = Teuchos::rcp(new Stratimikos::DefaultLinearSolverBuilder); - + ///////////////////////////////////////////////////////// // Build the Thyra operators ///////////////////////////////////////////////////////// - Teko::LinearOp A; - Teko::MultiVector x,b; - GenerateDefault_2x2_Splitting(crsMat,A,x,b); - - - ///////////////////////////////////////////////////////// - // Build the preconditioner - ///////////////////////////////////////////////////////// - - // build an InverseLibrary - RCP invLib; - RCP inverse; - if(xmlFile == "") { - invLib = Teko::InverseLibrary::buildFromParameterList(*buildLibPL(),linearSolverBuilder); - inverse = invLib->getInverseFactory("Gauss-Seidel"); - } - else { - Teuchos::ParameterList xmlList; - Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFile, Teuchos::Ptr(&xmlList),*comm); - invLib = Teko::InverseLibrary::buildFromParameterList(xmlList,linearSolverBuilder); - inverse = invLib->getInverseFactory("MyTekoPreconditioner"); - } - // build the inverse factory needed by the example preconditioner - - - // build the preconditioner from the jacobian - Teko::LinearOp prec = Teko::buildInverse(*inverse,A); - - // Setup the Belos solver - ///////////////////////////////////////////////////////// - - Teuchos::ParameterList belosList; - belosList.set( "Num Blocks", 200 ); // Maximum number of blocks in Krylov factorization - belosList.set( "Block Size",1 ); // Blocksize to be used by iterative solver - belosList.set( "Maximum Iterations", 200 ); // Maximum number of iterations allowed - belosList.set( "Maximum Restarts", 1 ); // Maximum number of restarts allowed - belosList.set( "Convergence Tolerance", 1e-5 ); // Relative convergence tolerance requested - belosList.set( "Verbosity", 33);//Belos::Errors + Belos::Warnings + Belos::TimingDetails + Belos::StatusTestDetails ); - belosList.set( "Output Frequency", 1 ); - belosList.set( "Output Style", 1 ); - - RCP > problem = rcp(new Belos::LinearProblem( A, x, b ) ); - problem->setLeftPrec(prec); - problem->setProblem(); // should check the return type!!! - - RCP > solver - = rcp(new Belos::BlockGmresSolMgr(problem, rcpFromRef(belosList))); - - // - // Perform solve - // - Belos::ReturnType ret = solver->solve(); - - if (ret!=Belos::Converged) { - std::cout << std::endl << "ERROR: Belos did not converge!" << std::endl; - return -1; - } - - return 0; -} + Teko::LinearOp A; + Teko::MultiVector x, b; + GenerateDefault_2x2_Splitting(crsMat, A, x, b); + + ///////////////////////////////////////////////////////// + // Build the preconditioner + ///////////////////////////////////////////////////////// + + // build an InverseLibrary + RCP invLib; + RCP inverse; + if (xmlFile == "") { + invLib = Teko::InverseLibrary::buildFromParameterList(*buildLibPL(), linearSolverBuilder); + inverse = invLib->getInverseFactory("Gauss-Seidel"); + } else { + Teuchos::ParameterList xmlList; + Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFile, Teuchos::Ptr(&xmlList), *comm); + invLib = Teko::InverseLibrary::buildFromParameterList(xmlList, linearSolverBuilder); + inverse = invLib->getInverseFactory("MyTekoPreconditioner"); + } + // build the inverse factory needed by the example preconditioner + // build the preconditioner from the jacobian + Teko::LinearOp prec = Teko::buildInverse(*inverse, A); + + // Setup the Belos solver + ///////////////////////////////////////////////////////// + Teuchos::ParameterList belosList; + belosList.set("Num Blocks", 200); // Maximum number of blocks in Krylov factorization + belosList.set("Block Size", 1); // Blocksize to be used by iterative solver + belosList.set("Maximum Iterations", 200); // Maximum number of iterations allowed + belosList.set("Maximum Restarts", 1); // Maximum number of restarts allowed + belosList.set("Convergence Tolerance", 1e-5); // Relative convergence tolerance requested + belosList.set("Verbosity", 33); // Belos::Errors + Belos::Warnings + Belos::TimingDetails + Belos::StatusTestDetails ); + belosList.set("Output Frequency", 1); + belosList.set("Output Style", 1); + RCP > problem = rcp(new Belos::LinearProblem(A, x, b)); + problem->setLeftPrec(prec); + problem->setProblem(); // should check the return type!!! -template - int solve_tpetra(RCP > & crsMat, RCP > & b, RCP > & coords, const std::string &xmlFile,const std::string &partitionFile) { - //typedef Tpetra::Map<> TP_Map; - //typedef Tpetra::Vector TP_Vec; - typedef Tpetra::CrsMatrix TP_Crs; - typedef Thyra::PreconditionerFactoryBase Base; + RCP > solver = rcp(new Belos::BlockGmresSolMgr(problem, rcpFromRef(belosList))); - typedef Thyra::MultiVectorBase MV; - typedef Thyra::LinearOpBase OP; + // + // Perform solve + // + Belos::ReturnType ret = solver->solve(); + + if (ret != Belos::Converged) { + std::cout << std::endl + << "ERROR: Belos did not converge!" << std::endl; + return -1; + } + + return 0; +} + +template +int solve_tpetra(RCP > &crsMat, RCP > &b, RCP > &coords, const std::string &xmlFile, const std::string &partitionFile) { + // typedef Tpetra::Map<> TP_Map; + // typedef Tpetra::Vector TP_Vec; + typedef Tpetra::CrsMatrix TP_Crs; + typedef Thyra::PreconditionerFactoryBase Base; + + typedef Thyra::MultiVectorBase MV; + typedef Thyra::LinearOpBase OP; // typedef Tpetra::Vector MV; -// typedef Tpetra::Operator OP; + // typedef Tpetra::Operator OP; auto comm = crsMat->getRowMap()->getComm(); // tell Stratimikos => Teko about MueLu RCP linearSolverBuilder = Teuchos::rcp(new Stratimikos::DefaultLinearSolverBuilder); - Stratimikos::enableMueLu(*linearSolverBuilder); - + Stratimikos::enableMueLu(*linearSolverBuilder); + ///////////////////////////////////////////////////////// // Build the Thyra operators ///////////////////////////////////////////////////////// - RCP > A; - RCP > x = rcp(new Tpetra::Vector(b->getMap())); - ReadSplittingFromDisk(partitionFile,crsMat,A); + RCP > A; + RCP > x = rcp(new Tpetra::Vector(b->getMap())); + ReadSplittingFromDisk(partitionFile, crsMat, A); x->putScalar(Teuchos::ScalarTraits::zero()); - + RCP > xt = Thyra::createVector(x); RCP > bt = Thyra::createVector(b); + ///////////////////////////////////////////////////////// + // Build the preconditioner + ///////////////////////////////////////////////////////// + + // build an InverseLibrary + RCP invLib; + RCP inverse; + if (xmlFile == "") { + invLib = Teko::InverseLibrary::buildFromParameterList(*buildLibPL(), linearSolverBuilder); + inverse = invLib->getInverseFactory("Gauss-Seidel"); + } else { + Teuchos::ParameterList xmlList; + Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFile, Teuchos::Ptr(&xmlList), *comm); + + // Add coordinates if we need to + if (!coords.is_null()) { + // FIXME: Please do this more generally + auto &sublist = xmlList.sublist("MueluScalar").sublist("user data"); + sublist.set("Coordinates", Xpetra::toXpetra(coords)); + } + + invLib = Teko::InverseLibrary::buildFromParameterList(xmlList, linearSolverBuilder); + inverse = invLib->getInverseFactory("MyTekoPreconditioner"); + } - ///////////////////////////////////////////////////////// - // Build the preconditioner - ///////////////////////////////////////////////////////// - - // build an InverseLibrary - RCP invLib; - RCP inverse; - if(xmlFile == "") { - invLib = Teko::InverseLibrary::buildFromParameterList(*buildLibPL(),linearSolverBuilder); - inverse = invLib->getInverseFactory("Gauss-Seidel"); - } - else { - Teuchos::ParameterList xmlList; - Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFile, Teuchos::Ptr(&xmlList),*comm); - - // Add coordinates if we need to - if(!coords.is_null()){ - // FIXME: Please do this more generally - auto & sublist = xmlList.sublist("MueluScalar").sublist("user data"); - sublist.set("Coordinates",Xpetra::toXpetra(coords)); - } - - invLib = Teko::InverseLibrary::buildFromParameterList(xmlList,linearSolverBuilder); - inverse = invLib->getInverseFactory("MyTekoPreconditioner"); - } - - // build the inverse factory needed by the example preconditioner - Teko::TpetraHelpers::InverseFactoryOperator ifo(inverse); - ifo.buildInverseOperator(A); - - - // build the preconditioner from the jacobian - Teko::LinearOp At = Thyra::tpetraLinearOp(Thyra::tpetraVectorSpace(A->getRangeMap()),Thyra::tpetraVectorSpace(A->getDomainMap()),A); - - - Teko::LinearOp prec = Teko::buildInverse(*inverse,At); - - // Setup the Belos solver - ///////////////////////////////////////////////////////// - - Teuchos::ParameterList belosList; - belosList.set( "Num Blocks", 200 ); // Maximum number of blocks in Krylov factorization - belosList.set( "Block Size",1 ); // Blocksize to be used by iterative solver - belosList.set( "Maximum Iterations", 200 ); // Maximum number of iterations allowed - belosList.set( "Maximum Restarts", 1 ); // Maximum number of restarts allowed - belosList.set( "Convergence Tolerance", 1e-5 ); // Relative convergence tolerance requested - belosList.set( "Verbosity", 33);//Belos::Errors + Belos::Warnings + Belos::TimingDetails + Belos::StatusTestDetails ); - belosList.set( "Output Frequency", 1 ); - belosList.set( "Output Style", 1 ); - - RCP > problem = rcp(new Belos::LinearProblem( At, xt, bt ) ); - problem->setLeftPrec(prec); - problem->setProblem(); // should check the return type!!! - - RCP > solver - = rcp(new Belos::BlockGmresSolMgr(problem, rcpFromRef(belosList))); - - // - // Perform solve - // - Belos::ReturnType ret = solver->solve(); - - if (ret!=Belos::Converged) { - std::cout << std::endl << "ERROR: Belos did not converge!" << std::endl; - return -1; - } - - return 0; -} + // build the inverse factory needed by the example preconditioner + Teko::TpetraHelpers::InverseFactoryOperator ifo(inverse); + ifo.buildInverseOperator(A); + // build the preconditioner from the jacobian + Teko::LinearOp At = Thyra::tpetraLinearOp(Thyra::tpetraVectorSpace(A->getRangeMap()), Thyra::tpetraVectorSpace(A->getDomainMap()), A); -int main(int argc,char * argv[]) -{ - typedef double SC; - - typedef Tpetra::Map<> TP_Map; - typedef Tpetra::Vector TP_Vec; - typedef Tpetra::MultiVector TP_MV; - typedef Tpetra::CrsMatrix TP_Crs; - //typedef Tpetra::Operator TP_Op; - - typedef TP_Vec::local_ordinal_type LO; - typedef TP_Vec::global_ordinal_type GO; - typedef TP_Vec::node_type NO; - - - // calls MPI_Init and MPI_Finalize - Teuchos::GlobalMPISession mpiSession(&argc,&argv); - - // Parse CLI options - Teuchos::CommandLineProcessor clp(false); - std::string rhsFile; clp.setOption("rhs", &rhsFile, "rhs data file"); - - std::string rowMapFile; clp.setOption("rowmap", &rowMapFile, "map data file"); - std::string xmlFile = ""; clp.setOption("xml", &xmlFile, "read Tekko parameters from an xml file"); - std::string matrixFile = "../data/nsjac.mm"; clp.setOption("matrix", &matrixFile, "matrix data file"); - - std::string partitionFile = ""; clp.setOption("partition", &partitionFile, "partition file which defines the blocks"); - - - std::string coordsFile; clp.setOption("coords", &coordsFile, "coords data file"); - - std::string coordsMapFile; clp.setOption("coordsmap", &coordsMapFile, "coords data file"); - - clp.recogniseAllOptions(true); - switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; - case Teuchos::CommandLineProcessor::PARSE_ERROR: - case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; - } - - ///////////////////////////////////////////////////////// - // Build the Tpetra matrices and vectors - ///////////////////////////////////////////////////////// - - // read in the CRS matrix - RCP crsMat; - RCP rhs; - RCP coords; - if(rowMapFile == "") { - crsMat = Tpetra::MatrixMarket::Reader::readSparseFile(matrixFile, Tpetra::getDefaultComm()); - } - else { - RCP rowMap = Tpetra::MatrixMarket::Reader::readMapFile(rowMapFile,Tpetra::getDefaultComm()); - RCP colMap; - crsMat = Tpetra::MatrixMarket::Reader::readSparseFile(matrixFile,rowMap,colMap,rowMap,rowMap); - - if(rhsFile!= "") { - rhs = Tpetra::MatrixMarket::Reader::readVectorFile(rhsFile,rowMap->getComm(),rowMap); - } - - if(coordsFile!= "") { - RCP coordsMap = rowMap; - if(coordsMapFile != "") - coordsMap = Tpetra::MatrixMarket::Reader::readMapFile(coordsMapFile,rowMap->getComm()); - coords = Tpetra::MatrixMarket::Reader::readDenseFile(coordsFile,coordsMap->getComm(),coordsMap); - } - } - - // Sanity checks - if(rhs.is_null()) throw std::runtime_error("rhs is null"); - if(crsMat.is_null()) throw std::runtime_error("crsMat is null"); - - - auto comm = crsMat->getRowMap()->getComm(); - - - ///////////////////////////////////////////////////////// - // Build the Thyra operators - ///////////////////////////////////////////////////////// - Teko::LinearOp A; - Teko::MultiVector x,b; - - int rv; - if(partitionFile == "") - rv = solve_thyra(crsMat,xmlFile); - else - rv = solve_tpetra(crsMat,rhs,coords,xmlFile,partitionFile); - - return rv; + Teko::LinearOp prec = Teko::buildInverse(*inverse, At); + // Setup the Belos solver + ///////////////////////////////////////////////////////// + Teuchos::ParameterList belosList; + belosList.set("Num Blocks", 200); // Maximum number of blocks in Krylov factorization + belosList.set("Block Size", 1); // Blocksize to be used by iterative solver + belosList.set("Maximum Iterations", 200); // Maximum number of iterations allowed + belosList.set("Maximum Restarts", 1); // Maximum number of restarts allowed + belosList.set("Convergence Tolerance", 1e-5); // Relative convergence tolerance requested + belosList.set("Verbosity", 33); // Belos::Errors + Belos::Warnings + Belos::TimingDetails + Belos::StatusTestDetails ); + belosList.set("Output Frequency", 1); + belosList.set("Output Style", 1); + + RCP > problem = rcp(new Belos::LinearProblem(At, xt, bt)); + problem->setLeftPrec(prec); + problem->setProblem(); // should check the return type!!! + + RCP > solver = rcp(new Belos::BlockGmresSolMgr(problem, rcpFromRef(belosList))); + + // + // Perform solve + // + Belos::ReturnType ret = solver->solve(); + + if (ret != Belos::Converged) { + std::cout << std::endl + << "ERROR: Belos did not converge!" << std::endl; + return -1; + } + + return 0; } -RCP buildLibPL() -{ - RCP pl = rcp(new Teuchos::ParameterList()); - - { - Teuchos::ParameterList & sub_jac = pl->sublist("Jacobi"); - sub_jac.set("Type","Block Jacobi"); - sub_jac.set("Inverse Type","Ifpack2"); - - Teuchos::ParameterList & sub_gs = pl->sublist("Gauss-Seidel"); - sub_gs.set("Type","Block Gauss-Seidel"); - sub_gs.set("Use Upper Triangle",true); - sub_gs.set("Inverse Type","Ifpack2"); - } - return pl; +int main(int argc, char *argv[]) { + typedef double SC; + + typedef Tpetra::Map<> TP_Map; + typedef Tpetra::Vector TP_Vec; + typedef Tpetra::MultiVector TP_MV; + typedef Tpetra::CrsMatrix TP_Crs; + // typedef Tpetra::Operator TP_Op; + + typedef TP_Vec::local_ordinal_type LO; + typedef TP_Vec::global_ordinal_type GO; + typedef TP_Vec::node_type NO; + + // calls MPI_Init and MPI_Finalize + Teuchos::GlobalMPISession mpiSession(&argc, &argv); + + // Parse CLI options + Teuchos::CommandLineProcessor clp(false); + std::string rhsFile; + clp.setOption("rhs", &rhsFile, "rhs data file"); + + std::string rowMapFile; + clp.setOption("rowmap", &rowMapFile, "map data file"); + std::string xmlFile = ""; + clp.setOption("xml", &xmlFile, "read Tekko parameters from an xml file"); + std::string matrixFile = "../data/nsjac.mm"; + clp.setOption("matrix", &matrixFile, "matrix data file"); + + std::string partitionFile = ""; + clp.setOption("partition", &partitionFile, "partition file which defines the blocks"); + + std::string coordsFile; + clp.setOption("coords", &coordsFile, "coords data file"); + + std::string coordsMapFile; + clp.setOption("coordsmap", &coordsMapFile, "coords data file"); + + clp.recogniseAllOptions(true); + switch (clp.parse(argc, argv)) { + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + case Teuchos::CommandLineProcessor::PARSE_ERROR: + case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + } + + ///////////////////////////////////////////////////////// + // Build the Tpetra matrices and vectors + ///////////////////////////////////////////////////////// + + // read in the CRS matrix + RCP crsMat; + RCP rhs; + RCP coords; + if (rowMapFile == "") { + crsMat = Tpetra::MatrixMarket::Reader::readSparseFile(matrixFile, Tpetra::getDefaultComm()); + } else { + RCP rowMap = Tpetra::MatrixMarket::Reader::readMapFile(rowMapFile, Tpetra::getDefaultComm()); + RCP colMap; + crsMat = Tpetra::MatrixMarket::Reader::readSparseFile(matrixFile, rowMap, colMap, rowMap, rowMap); + + if (rhsFile != "") { + rhs = Tpetra::MatrixMarket::Reader::readVectorFile(rhsFile, rowMap->getComm(), rowMap); + } + + if (coordsFile != "") { + RCP coordsMap = rowMap; + if (coordsMapFile != "") + coordsMap = Tpetra::MatrixMarket::Reader::readMapFile(coordsMapFile, rowMap->getComm()); + coords = Tpetra::MatrixMarket::Reader::readDenseFile(coordsFile, coordsMap->getComm(), coordsMap); + } + } + + // Sanity checks + if (rhs.is_null()) throw std::runtime_error("rhs is null"); + if (crsMat.is_null()) throw std::runtime_error("crsMat is null"); + + auto comm = crsMat->getRowMap()->getComm(); + + ///////////////////////////////////////////////////////// + // Build the Thyra operators + ///////////////////////////////////////////////////////// + Teko::LinearOp A; + Teko::MultiVector x, b; + + int rv; + if (partitionFile == "") + rv = solve_thyra(crsMat, xmlFile); + else + rv = solve_tpetra(crsMat, rhs, coords, xmlFile, partitionFile); + + return rv; } +RCP buildLibPL() { + RCP pl = rcp(new Teuchos::ParameterList()); + + { + Teuchos::ParameterList &sub_jac = pl->sublist("Jacobi"); + sub_jac.set("Type", "Block Jacobi"); + sub_jac.set("Inverse Type", "Ifpack2"); + + Teuchos::ParameterList &sub_gs = pl->sublist("Gauss-Seidel"); + sub_gs.set("Type", "Block Gauss-Seidel"); + sub_gs.set("Use Upper Triangle", true); + sub_gs.set("Inverse Type", "Ifpack2"); + } + return pl; +} diff --git a/packages/muelu/matlab/bin/muemex.cpp b/packages/muelu/matlab/bin/muemex.cpp index e167c16d6201..b75100351ecc 100644 --- a/packages/muelu/matlab/bin/muemex.cpp +++ b/packages/muelu/matlab/bin/muemex.cpp @@ -50,7 +50,7 @@ #define IS_TRUE 1 #define MUEMEX_ERROR -1 -//Do not compile MueMex if any of these aren't available +// Do not compile MueMex if any of these aren't available #if !defined HAVE_MUELU_EPETRA || !defined HAVE_MUELU_MATLAB #error "MueMex requires Epetra, Tpetra and MATLAB." #endif @@ -77,142 +77,134 @@ extern void _main(); #define MUEMEX_DEFAULT_NUMPDES 1 #define MUEMEX_DEFAULT_ADAPTIVEVECS 0 #define MUEMEX_DEFAULT_USEDEFAULTNS true -#define MMABS(x) ((x)>0?(x):(-(x))) -#define MMISINT(x) ((x)==0?(((x-(int)(x))<1e-15)?true:false):(((x-(int)(x))<1e-15*MMABS(x))?true:false)) +#define MMABS(x) ((x) > 0 ? (x) : (-(x))) +#define MMISINT(x) ((x) == 0 ? (((x - (int)(x)) < 1e-15) ? true : false) : (((x - (int)(x)) < 1e-15 * MMABS(x)) ? true : false)) /* Debugging */ //#define VERBOSE_OUTPUT namespace MueLu { -//Need subclass of Hierarchy that gives public access to list of FactoryManagers -template -class OpenHierarchy : public Hierarchy -{ - public: - const RCP& GetFactoryManager(const int levelID) const; +// Need subclass of Hierarchy that gives public access to list of FactoryManagers +template +class OpenHierarchy : public Hierarchy { + public: + const RCP& GetFactoryManager(const int levelID) const; }; -template -const RCP& OpenHierarchy::GetFactoryManager(const int levelID) const -{ +template +const RCP& OpenHierarchy::GetFactoryManager(const int levelID) const { TEUCHOS_TEST_FOR_EXCEPTION(levelID < 0 || levelID > this->GetNumLevels(), Exceptions::RuntimeError, - "MueLu::Hierarchy::GetFactoryManager(): invalid input parameter value: LevelID = " << levelID); + "MueLu::Hierarchy::GetFactoryManager(): invalid input parameter value: LevelID = " << levelID); return this->GetLevelManager(levelID); } -//Declare and call default constructor for data_pack_list vector (starts empty) +// Declare and call default constructor for data_pack_list vector (starts empty) vector> MuemexSystemList::list; int MuemexSystemList::nextID = 0; -//Need a global flag to keep track of Epetra vs. Tpetra for constructing multivectors for param lists +// Need a global flag to keep track of Epetra vs. Tpetra for constructing multivectors for param lists bool useEpetra = false; -//Parse a string to get each bit of Belos verbosity -int strToMsgType(const char* str) -{ - if(str == NULL) +// Parse a string to get each bit of Belos verbosity +int strToMsgType(const char* str) { + if (str == NULL) return Belos::Errors; - else if(strstr(str, "Warnings") != NULL) + else if (strstr(str, "Warnings") != NULL) return Belos::Warnings; - else if(strstr(str, "IterationDetails") != NULL) + else if (strstr(str, "IterationDetails") != NULL) return Belos::IterationDetails; - else if(strstr(str, "OrthoDetails") != NULL) + else if (strstr(str, "OrthoDetails") != NULL) return Belos::OrthoDetails; - else if(strstr(str, "FinalSummary") != NULL) + else if (strstr(str, "FinalSummary") != NULL) return Belos::FinalSummary; - else if(strstr(str, "TimingDetails") != NULL) + else if (strstr(str, "TimingDetails") != NULL) return Belos::TimingDetails; - else if(strstr(str, "StatusTestDetails") != NULL) + else if (strstr(str, "StatusTestDetails") != NULL) return Belos::StatusTestDetails; - else if(strstr(str, "Debug") != NULL) + else if (strstr(str, "Debug") != NULL) return Belos::Debug; - //This has no effect when added/OR'd with flags + // This has no effect when added/OR'd with flags return Belos::Errors; } -MuemexType strToDataType(const char* str, char* typeName, bool complexFlag = false) -{ +MuemexType strToDataType(const char* str, char* typeName, bool complexFlag = false) { std::string temp(str); std::string myStr = trim(temp); MuemexType matrixType, multivectorType, scalarType; - if(!complexFlag) - { - matrixType = XPETRA_MATRIX_DOUBLE; + if (!complexFlag) { + matrixType = XPETRA_MATRIX_DOUBLE; multivectorType = XPETRA_MULTIVECTOR_DOUBLE; - scalarType = DOUBLE; - } - else - { - matrixType = XPETRA_MATRIX_COMPLEX; + scalarType = DOUBLE; + } else { + matrixType = XPETRA_MATRIX_COMPLEX; multivectorType = XPETRA_MULTIVECTOR_COMPLEX; - scalarType = COMPLEX; + scalarType = COMPLEX; } size_t npos = string::npos; - if(myStr == "A" || - myStr == "P" || myStr == "Ptent" || - myStr == "R") + if (myStr == "A" || + myStr == "P" || myStr == "Ptent" || + myStr == "R") return matrixType; - if(myStr == "Nullspace") + if (myStr == "Nullspace") return multivectorType; - if(myStr == "Aggregates") + if (myStr == "Aggregates") return AGGREGATES; - if(myStr == "Graph") + if (myStr == "Graph") return GRAPH; - if(myStr == "Coordinates") + if (myStr == "Coordinates") return XPETRA_MULTIVECTOR_DOUBLE; #ifdef HAVE_MUELU_INTREPID2 - if(myStr == "pcoarsen: element to node map") + if (myStr == "pcoarsen: element to node map") return FIELDCONTAINER_ORDINAL; #endif - //Check for custom variable + // Check for custom variable size_t firstWordStart = myStr.find_first_not_of(' '); size_t firstWordEnd = myStr.find(' ', firstWordStart); std::string firstWord = myStr.substr(firstWordStart, firstWordEnd - firstWordStart); - if(firstWord.length() > 0) { - temp = myStr.substr(firstWordEnd,myStr.length()-firstWordEnd); + if (firstWord.length() > 0) { + temp = myStr.substr(firstWordEnd, myStr.length() - firstWordEnd); std::string secondWord = trim(temp); - if(secondWord.length() > 0) { - //make first word lowercase + if (secondWord.length() > 0) { + // make first word lowercase std::transform(firstWord.begin(), firstWord.end(), firstWord.begin(), ::tolower); - //compare first word with possible values - if(firstWord.find("matrix") != npos) + // compare first word with possible values + if (firstWord.find("matrix") != npos) return matrixType; - if(firstWord.find("multivector") != npos) + if (firstWord.find("multivector") != npos) return multivectorType; - if(firstWord.find("map") != npos) + if (firstWord.find("map") != npos) return XPETRA_MAP; - if(firstWord.find("ordinalvector") != npos) + if (firstWord.find("ordinalvector") != npos) return XPETRA_ORDINAL_VECTOR; - if(firstWord.find("int") != npos) + if (firstWord.find("int") != npos) return INT; - if(firstWord.find("scalar") != npos) + if (firstWord.find("scalar") != npos) return scalarType; - if(firstWord.find("double") != npos) + if (firstWord.find("double") != npos) return DOUBLE; - if(firstWord.find("complex") != npos) + if (firstWord.find("complex") != npos) return COMPLEX; } } - if(typeName) - { + if (typeName) { std::string typeString(typeName); std::transform(typeString.begin(), typeString.end(), typeString.begin(), ::tolower); - if(typeString.find("matrix") != npos) + if (typeString.find("matrix") != npos) return matrixType; - if(typeString.find("multivector") != npos) + if (typeString.find("multivector") != npos) return multivectorType; - if(typeString.find("map") != npos) + if (typeString.find("map") != npos) return XPETRA_MAP; - if(typeString.find("ordinalvector") != npos) + if (typeString.find("ordinalvector") != npos) return XPETRA_ORDINAL_VECTOR; - if(typeString.find("int") != npos) + if (typeString.find("int") != npos) return INT; - if(typeString.find("scalar") != npos) + if (typeString.find("scalar") != npos) return scalarType; - if(typeString.find("double") != npos) + if (typeString.find("double") != npos) return DOUBLE; - if(typeString.find("complex") != npos) + if (typeString.find("complex") != npos) return COMPLEX; string errMsg = typeString + " is not a valid type."; throw runtime_error(errMsg); @@ -220,53 +212,48 @@ MuemexType strToDataType(const char* str, char* typeName, bool complexFlag = fal throw runtime_error("Could not determine type of data."); } -//Parse a string to get Belos output style (Brief is default) -int strToOutputStyle(const char* str) -{ - if(strstr("General", str) != NULL) +// Parse a string to get Belos output style (Brief is default) +int strToOutputStyle(const char* str) { + if (strstr("General", str) != NULL) return Belos::General; else return Belos::Brief; } -//Get Belos "Verbosity" setting for its ParameterList -int getBelosVerbosity(const char* input) -{ +// Get Belos "Verbosity" setting for its ParameterList +int getBelosVerbosity(const char* input) { int result = 0; - char* str = (char*) input; + char* str = (char*)input; char* pch; pch = strtok(str, " +,"); - if(pch == NULL) + if (pch == NULL) return result; result |= strToMsgType(pch); - while(pch != NULL) - { - pch = strtok(NULL, " +,"); - if(pch == NULL) - return result; - result |= strToMsgType(pch); - } + while (pch != NULL) { + pch = strtok(NULL, " +,"); + if (pch == NULL) + return result; + result |= strToMsgType(pch); + } return result; } -template -mxArray* TpetraSystem::solve(RCP params, RCP> matrix, const mxArray* b, int& iters) -{ +template +mxArray* TpetraSystem::solve(RCP params, RCP> matrix, const mxArray* b, int& iters) { mxArray* output; - try - { + try { int matSize = A->getGlobalNumRows(); - //Define Tpetra vector/multivector types for convenience + // Define Tpetra vector/multivector types for convenience typedef Tpetra::Vector Tpetra_Vector; typedef Tpetra::MultiVector Tpetra_MultiVector; typedef Tpetra::Operator Tpetra_Operator; RCP> comm = Tpetra::getDefaultComm(); - //numGlobalIndices for map constructor is the number of rows in matrix/vectors, right? - RCP map = rcp(new muemex_map_type(matSize, (mm_GlobalOrd) 0, comm)); - RCP rhs = loadDataFromMatlab>>(b); - RCP lhs = rcp(new Tpetra_MultiVector(map, rhs->getNumVectors())); - //rhs is initialized, lhs is not - // Default params + // numGlobalIndices for map constructor is the number of rows in matrix/vectors, right? + RCP map = rcp(new muemex_map_type(matSize, (mm_GlobalOrd)0, comm)); + RCP rhs = loadDataFromMatlab>>(b); + RCP lhs = rcp(new Tpetra_MultiVector(map, rhs->getNumVectors())); + // rhs is initialized, lhs is not + // Default params params->get("Output Frequency", 1); params->get("Output Style", Belos::Brief); @@ -275,66 +262,51 @@ mxArray* TpetraSystem::solve(RCP params, RCPget("Verbosity", Belos::Errors | Belos::Warnings | Belos::IterationDetails | Belos::Warnings | Belos::StatusTestDetails); #endif - //register all possible solvers + // register all possible solvers auto problem = rcp(new Belos::LinearProblem(matrix, lhs, rhs)); problem->setRightPrec(prec); - if(!problem->setProblem()) - { + if (!problem->setProblem()) { throw std::runtime_error("ERROR: failed to set up Belos problem."); } std::string solverName = "GMRES"; - if(params->isParameter("solver")) - { + if (params->isParameter("solver")) { solverName = params->template get("solver"); } - //Convert from basic MueMex solver names to the official Belos names. - //At the same time, check that solverName is in the valid set. + // Convert from basic MueMex solver names to the official Belos names. + // At the same time, check that solverName is in the valid set. std::string belosSolverName; - if(solverName == "GMRES") - { + if (solverName == "GMRES") { belosSolverName = "PseudoBlock GMRES"; - } - else if(solverName == "CG") - { + } else if (solverName == "CG") { belosSolverName = "PseudoBlock CG"; - } - else - { + } else { std::string msg = std::string("ERROR: requested solver \"") + solverName + "\" not supported. Currently supported solvers: CG, GMRES"; mexPrintf("%s\n", msg.c_str()); output = mxCreateDoubleScalar(0); return output; } - Teuchos::RCP > solver; + Teuchos::RCP> solver; Belos::SolverFactory factory; - try - { - //Just use the default parameters for the solver - solver = factory.create (belosSolverName, params); - } - catch(std::exception& e) - { + try { + // Just use the default parameters for the solver + solver = factory.create(belosSolverName, params); + } catch (std::exception& e) { mexPrintf("%s\n", e.what()); output = mxCreateDoubleScalar(0); return output; } solver->setProblem(problem); Belos::ReturnType ret = solver->solve(); - iters = solver->getNumIters(); - if(ret == Belos::Converged) - { + iters = solver->getNumIters(); + if (ret == Belos::Converged) { mexPrintf("Success, Belos converged!\n"); output = saveDataToMatlab(lhs); - } - else - { + } else { mexPrintf("Belos failed to converge.\n"); - iters = 0; + iters = 0; output = mxCreateDoubleScalar(0); } - } - catch(exception& e) - { + } catch (exception& e) { mexPrintf("Error occurred while running Belos solver:\n"); cout << e.what() << endl; output = mxCreateDoubleScalar(0); @@ -342,44 +314,35 @@ mxArray* TpetraSystem::solve(RCP params, RCP -mxArray* TpetraSystem::apply(const mxArray* r) -{ +template +mxArray* TpetraSystem::apply(const mxArray* r) { typedef Tpetra::MultiVector Tpetra_MultiVector; RCP rhs = loadDataFromMatlab>>(r); RCP lhs = rcp(new Tpetra_MultiVector(rhs->getMap(), rhs->getNumVectors())); - try - { + try { this->prec->apply(*rhs, *lhs); - } - catch(exception& e) - { + } catch (exception& e) { mexPrintf("Error occurred while applying MueLu-Tpetra preconditioner:\n"); cout << e.what() << endl; } return saveDataToMatlab(lhs); } -template<> -RCP getDatapackHierarchy(MuemexSystem* dp) -{ +template <> +RCP getDatapackHierarchy(MuemexSystem* dp) { RCP> hier; - switch(dp->type) - { - case EPETRA: - { - EpetraSystem* pack = (EpetraSystem*) dp; - hier = pack->getHierarchy(); + switch (dp->type) { + case EPETRA: { + EpetraSystem* pack = (EpetraSystem*)dp; + hier = pack->getHierarchy(); break; } - case TPETRA: - { - TpetraSystem* pack = (TpetraSystem*) dp; - hier = pack->getHierarchy(); + case TPETRA: { + TpetraSystem* pack = (TpetraSystem*)dp; + hier = pack->getHierarchy(); break; } - default: - { + default: { throw runtime_error("Got unexpected linear system type for real-valued functions."); } } @@ -387,113 +350,95 @@ RCP getDatapackHierarchy(MuemexSystem* dp) } #ifdef HAVE_COMPLEX_SCALARS -template<> RCP getDatapackHierarchy(MuemexSystem* dp) -{ - return ((TpetraSystem*) dp)->getHierarchy(); +template <> +RCP getDatapackHierarchy(MuemexSystem* dp) { + return ((TpetraSystem*)dp)->getHierarchy(); } #endif -template -void setHierarchyData(MuemexSystem* problem, int levelID, T& data, string& dataName) -{ +template +void setHierarchyData(MuemexSystem* problem, int levelID, T& data, string& dataName) { RCP level; - if(problem->type == EPETRA) - { - RCP> hier = ((EpetraSystem*) problem)->getHierarchy(); - level = hier->GetLevel(levelID); - } - else if(problem->type == TPETRA) - { - RCP> hier = ((TpetraSystem*) problem)->getHierarchy(); - level = hier->GetLevel(levelID); - } - else if(problem->type == TPETRA_COMPLEX) - { + if (problem->type == EPETRA) { + RCP> hier = ((EpetraSystem*)problem)->getHierarchy(); + level = hier->GetLevel(levelID); + } else if (problem->type == TPETRA) { + RCP> hier = ((TpetraSystem*)problem)->getHierarchy(); + level = hier->GetLevel(levelID); + } else if (problem->type == TPETRA_COMPLEX) { #ifdef HAVE_COMPLEX_SCALARS - RCP> hier = ((TpetraSystem*) problem)->getHierarchy(); - level = hier->GetLevel(levelID); + RCP> hier = ((TpetraSystem*)problem)->getHierarchy(); + level = hier->GetLevel(levelID); #else throw std::runtime_error("setHierarchyData(): complex scalars not supported."); #endif } - if(level.is_null()) + if (level.is_null()) throw runtime_error("Error getting level when setting custom level data."); level->Set(dataName, data); level->AddKeepFlag(dataName, NoFactory::get(), UserData); } -//data pack base class implementation +// data pack base class implementation -MuemexSystem::MuemexSystem(DataPackType probType) : id(MUEMEX_ERROR), type(probType) {} +MuemexSystem::MuemexSystem(DataPackType probType) + : id(MUEMEX_ERROR) + , type(probType) {} MuemexSystem::~MuemexSystem() {} -mxArray* MuemexSystem::getHierarchyData(string dataName, MuemexType dataType, int levelID) -{ +mxArray* MuemexSystem::getHierarchyData(string dataName, MuemexType dataType, int levelID) { mxArray* output = NULL; - try - { - //First, get Level, which doesn't depend on Epetra vs. Tpetra + try { + // First, get Level, which doesn't depend on Epetra vs. Tpetra RCP level; RCP fmb; - if(this->type == TPETRA) - { - TpetraSystem* tsys = (TpetraSystem*) this; - if(tsys->keepAll) + if (this->type == TPETRA) { + TpetraSystem* tsys = (TpetraSystem*)this; + if (tsys->keepAll) fmb = tsys->systemManagers[levelID]; - } - else if(this->type == TPETRA_COMPLEX) - { + } else if (this->type == TPETRA_COMPLEX) { #ifdef HAVE_COMPLEX_SCALARS - TpetraSystem* tsys = (TpetraSystem*) this; - if(tsys->keepAll) + TpetraSystem* tsys = (TpetraSystem*)this; + if (tsys->keepAll) fmb = tsys->systemManagers[levelID]; #else throw std::runtime_error("getHierarchyData(): complex scalars not supported."); #endif } - const FactoryBase* factory = NoFactory::get(); //(ptr to constant) - bool needFMB = true; - if(dataName == "A" || dataName == "P") //these are kept by default, don't use actual factory pointer - //Otherwise would break getting A and P when 'keep' is off + const FactoryBase* factory = NoFactory::get(); //(ptr to constant) + bool needFMB = true; + if (dataName == "A" || dataName == "P") // these are kept by default, don't use actual factory pointer + // Otherwise would break getting A and P when 'keep' is off needFMB = false; - switch(this->type) - { + switch (this->type) { case EPETRA: - case TPETRA: - { + case TPETRA: { RCP> hier = rcp_static_cast>(getDatapackHierarchy(this)); - level = hier->GetLevel(levelID); - if(needFMB) - { - if(fmb.is_null()) - fmb = (RCP) hier->GetFactoryManager(levelID); - if(!fmb.is_null()) - { - try - { + level = hier->GetLevel(levelID); + if (needFMB) { + if (fmb.is_null()) + fmb = (RCP)hier->GetFactoryManager(levelID); + if (!fmb.is_null()) { + try { factory = fmb->GetFactory(dataName).get(); - } - catch(exception& e) {} //forced to try using NoFactory (which will work with default keeps A, P) + } catch (exception& e) { + } // forced to try using NoFactory (which will work with default keeps A, P) } } break; } - case TPETRA_COMPLEX: - { + case TPETRA_COMPLEX: { #ifdef HAVE_COMPLEX_SCALARS RCP> hier = rcp_static_cast>(getDatapackHierarchy(this)); - level = hier->GetLevel(levelID); - if(needFMB) - { - if(fmb.is_null()) - fmb = (RCP) hier->GetFactoryManager(levelID); - if(!fmb.is_null()) - { - try - { + level = hier->GetLevel(levelID); + if (needFMB) { + if (fmb.is_null()) + fmb = (RCP)hier->GetFactoryManager(levelID); + if (!fmb.is_null()) { + try { factory = fmb->GetFactory(dataName).get(); - } - catch(exception& e) {} //attempt to use NoFactory + } catch (exception& e) { + } // attempt to use NoFactory } } break; @@ -502,70 +447,58 @@ mxArray* MuemexSystem::getHierarchyData(string dataName, MuemexType dataType, in #endif } } - if(level.is_null()) + if (level.is_null()) throw runtime_error("Can't get level data because level is null."); bool dataIsAvailable = level->IsAvailable(dataName, factory); - if(!dataIsAvailable) - { - //Give the level the FactoryManager again so it can provide the data (by re-creating it, if necessary) + if (!dataIsAvailable) { + // Give the level the FactoryManager again so it can provide the data (by re-creating it, if necessary) level->SetFactoryManager(fmb); } - //Given the dataName and factory pointer, all data in the level should now be accessible - switch(dataType) - { + // Given the dataName and factory pointer, all data in the level should now be accessible + switch (dataType) { case XPETRA_MATRIX_DOUBLE: return saveDataToMatlab(level->Get>(dataName, factory)); - case XPETRA_MATRIX_COMPLEX: - { + case XPETRA_MATRIX_COMPLEX: { #ifdef HAVE_COMPLEX_SCALARS return saveDataToMatlab(level->Get>(dataName, factory)); #endif } case XPETRA_MULTIVECTOR_DOUBLE: - if(dataName == "Coordinates") - { - //Coordinates is special because it's always user-provided on level 0, not always provided at all, not always kept in the level (only kept if doing agg viz, etc), and is always MV regardless of problem scalar type + if (dataName == "Coordinates") { + // Coordinates is special because it's always user-provided on level 0, not always provided at all, not always kept in the level (only kept if doing agg viz, etc), and is always MV regardless of problem scalar type double errReturn = -1; - if(level->GetLevelID() == 0) - { - //Try to get coordinates as if it's user data, but don't be surprised if it's not there at all. - try - { + if (level->GetLevelID() == 0) { + // Try to get coordinates as if it's user data, but don't be surprised if it's not there at all. + try { RCP coords = level->Get>(dataName, NoFactory::get()); - if(coords.is_null()) - throw runtime_error("Coordinates were not available (Level 0)."); //just print the message below and return -1 + if (coords.is_null()) + throw runtime_error("Coordinates were not available (Level 0)."); // just print the message below and return -1 return saveDataToMatlab(coords); - } - catch(exception& e) - { - cout << endl << "Coordinates were not available on Level 0." << endl; + } catch (exception& e) { + cout << endl + << "Coordinates were not available on Level 0." << endl; cout << "They must be provided by the user and aren't generated or kept by default (even in MueMex 'keep' mode)." << endl; cout << "User-provided coordinates for Level 0 will be kept and passed to other levels if something requires them:" << endl; - cout << "aggregate visualization, brick aggregation, repartitioning or distance laplacian filtering." << endl << endl; + cout << "aggregate visualization, brick aggregation, repartitioning or distance laplacian filtering." << endl + << endl; return saveDataToMatlab(errReturn); } - } - else - { - //If coords are provided & kept, they are produced by CoordinatesTransferFactory in levels > 0. - try - { + } else { + // If coords are provided & kept, they are produced by CoordinatesTransferFactory in levels > 0. + try { RCP coords = level->Get>(dataName, factory); - if(coords.is_null()) + if (coords.is_null()) throw runtime_error("Coordinates were not available (Level > 0)."); return saveDataToMatlab(coords); - } - catch(exception& e) - { + } catch (exception& e) { cout << "Coordinates must be provided by the user and aren't generated or kept by default (even in MueMex 'keep' mode)." << endl; cout << "User-provided coordinates for Level 0 will be kept and passed to other levels if something requires them:" << endl; - cout << "aggregate visualization, brick aggregation, repartitioning or distance laplacian filtering." << endl << endl; + cout << "aggregate visualization, brick aggregation, repartitioning or distance laplacian filtering." << endl + << endl; return saveDataToMatlab(errReturn); } } - } - else - { + } else { return saveDataToMatlab(level->Get>(dataName, factory)); } case XPETRA_MULTIVECTOR_COMPLEX: @@ -590,61 +523,53 @@ mxArray* MuemexSystem::getHierarchyData(string dataName, MuemexType dataType, in default: throw runtime_error("Invalid MuemexType for getting hierarchy data."); } - } - catch(exception& e) - { + } catch (exception& e) { mexPrintf("Error occurred while getting hierarchy data.\n"); cout << e.what() << endl; } return output; } -//EpetraSystem impl +// EpetraSystem impl -EpetraSystem::EpetraSystem() : MuemexSystem(EPETRA) {} +EpetraSystem::EpetraSystem() + : MuemexSystem(EPETRA) {} EpetraSystem::~EpetraSystem() {} -int EpetraSystem::status() -{ +int EpetraSystem::status() { mexPrintf("**** Problem ID %d [MueLu_Epetra] ****\n", id); - if(!A.is_null()) + if (!A.is_null()) mexPrintf("Matrix: %dx%d w/ %d nnz\n", A->NumGlobalRows(), A->NumGlobalCols(), A->NumMyNonzeros()); mexPrintf("Operator Complexity: %f\n", operatorComplexity); - if(!List.is_null()) - { - mexPrintf("Parameter List:\n"); - List->print(); - } + if (!List.is_null()) { + mexPrintf("Parameter List:\n"); + List->print(); + } mexPrintf("\n"); return IS_TRUE; -}/*end status*/ +} /*end status*/ -int EpetraSystem::setup(const mxArray* matlabA, bool haveCoords, const mxArray* matlabCoords) -{ +int EpetraSystem::setup(const mxArray* matlabA, bool haveCoords, const mxArray* matlabCoords) { bool success = false; - try - { - /* Matrix Fill */ - A = loadDataFromMatlab>(matlabA); - if(haveCoords) - { - //Create 'user data' sublist if it doesn't already exist - auto userData = Teuchos::sublist(List, "user data"); - userData->set("Coordinates", loadDataFromMatlab>(matlabCoords)); - } - prec = MueLu::CreateEpetraPreconditioner(A, *List); - //underlying the Epetra_Operator prec is a MueLu::EpetraOperator - RCP meo = rcp_static_cast(prec); - operatorComplexity = meo->GetHierarchy()->GetOperatorComplexity(); - success = true; - } - catch(exception& e) - { - mexPrintf("Error occurred while setting up epetra problem:\n"); - cout << e.what() << endl; + try { + /* Matrix Fill */ + A = loadDataFromMatlab>(matlabA); + if (haveCoords) { + // Create 'user data' sublist if it doesn't already exist + auto userData = Teuchos::sublist(List, "user data"); + userData->set("Coordinates", loadDataFromMatlab>(matlabCoords)); } + prec = MueLu::CreateEpetraPreconditioner(A, *List); + // underlying the Epetra_Operator prec is a MueLu::EpetraOperator + RCP meo = rcp_static_cast(prec); + operatorComplexity = meo->GetHierarchy()->GetOperatorComplexity(); + success = true; + } catch (exception& e) { + mexPrintf("Error occurred while setting up epetra problem:\n"); + cout << e.what() << endl; + } return success ? IS_TRUE : IS_FALSE; -}/*end setup*/ +} /*end setup*/ /* EpetraSystem::solve - Given two Teuchos lists, one in the EpetraSystem, and one of solve-time options, this routine calls the relevant solver and returns the solution. @@ -655,13 +580,11 @@ int EpetraSystem::setup(const mxArray* matlabA, bool haveCoords, const mxArray* iters - number of iterations taken [O] Returns: IS_TRUE if solve was succesful, IS_FALSE otherwise */ -mxArray* EpetraSystem::solve(RCP TPL, RCP matrix, const mxArray* b, int& iters) -{ +mxArray* EpetraSystem::solve(RCP TPL, RCP matrix, const mxArray* b, int& iters) { mxArray* output; - try - { - //Set up X and B - Epetra_Map map = matrix->DomainMap(); + try { + // Set up X and B + Epetra_Map map = matrix->DomainMap(); RCP rhs = loadDataFromMatlab>(b); RCP lhs = rcp(new Epetra_MultiVector(map, rhs->NumVectors(), true)); // Default params @@ -672,33 +595,28 @@ mxArray* EpetraSystem::solve(RCP TPL, RCP matri #else TPL->get("Verbosity", Belos::Errors + Belos::Warnings + Belos::IterationDetails + Belos::Warnings + Belos::StatusTestDetails); #endif - RCP> problem = rcp(new Belos::LinearProblem(matrix, lhs, rhs)); - RCP epo = rcp(new Belos::EpetraPrecOp(prec)); + RCP> problem = rcp(new Belos::LinearProblem(matrix, lhs, rhs)); + RCP epo = rcp(new Belos::EpetraPrecOp(prec)); problem->setRightPrec(epo); bool set = problem->setProblem(); TEUCHOS_TEST_FOR_EXCEPTION(!set, runtime_error, "Linear Problem failed to set up correctly!"); Belos::SolverFactory factory; - //Get the solver name from the parameter list, default to PseudoBlockGmres if none specified by user - string solverName = TPL->get("solver", "GMRES"); + // Get the solver name from the parameter list, default to PseudoBlockGmres if none specified by user + string solverName = TPL->get("solver", "GMRES"); RCP> solver = factory.create(solverName, TPL); solver->setProblem(problem); Belos::ReturnType ret = solver->solve(); - if(ret == Belos::Converged) - { + if (ret == Belos::Converged) { mexPrintf("Success, Belos converged!\n"); - iters = solver->getNumIters(); + iters = solver->getNumIters(); output = saveDataToMatlab(lhs); - } - else - { + } else { mexPrintf("Belos failed to converge.\n"); - iters = 0; + iters = 0; output = mxCreateDoubleScalar(0); } output = saveDataToMatlab(lhs); - } - catch(exception& e) - { + } catch (exception& e) { mexPrintf("Error occurred during Belos solve:\n"); cout << e.what() << endl; output = mxCreateDoubleScalar(0); @@ -706,65 +624,53 @@ mxArray* EpetraSystem::solve(RCP TPL, RCP matri return output; } -mxArray* EpetraSystem::apply(const mxArray* r) -{ +mxArray* EpetraSystem::apply(const mxArray* r) { RCP rhs = loadDataFromMatlab>(r); Epetra_SerialComm Comm; Epetra_Map map(rhs->GlobalLength(), 0, Comm); RCP lhs = rcp(new Epetra_MultiVector(map, rhs->NumVectors(), true)); - try - { + try { this->prec->Apply(*rhs, *lhs); - } - catch(exception& e) - { + } catch (exception& e) { mexPrintf("Error occurred while applying MueLu-Epetra preconditioner:\n"); cout << e.what() << endl; } return saveDataToMatlab(lhs); } -RCP EpetraSystem::getHierarchy() -{ - RCP meo = rcp_static_cast(prec); +RCP EpetraSystem::getHierarchy() { + RCP meo = rcp_static_cast(prec); RCP> hier = meo->GetHierarchy(); - if(hier.is_null()) + if (hier.is_null()) throw runtime_error("Hierarchy from Epetra problem was null."); return hier; } -//tpetra_double_data_pack implementation +// tpetra_double_data_pack implementation -template<> TpetraSystem::TpetraSystem() : MuemexSystem(TPETRA) {} -template<> TpetraSystem::~TpetraSystem() {} +template <> +TpetraSystem::TpetraSystem() + : MuemexSystem(TPETRA) {} +template <> +TpetraSystem::~TpetraSystem() {} -template -int TpetraSystem::setup(const mxArray* matlabA, bool haveCoords, const mxArray* matlabCoords) -{ - //decide whether do do default or custom setup +template +int TpetraSystem::setup(const mxArray* matlabA, bool haveCoords, const mxArray* matlabCoords) { + // decide whether do do default or custom setup bool doCustomSetup = List->isParameter("keep") && List->isType("keep") && List->get("keep"); - List->remove("keep", false); //"keep" would cause Plist validation to fail if left in - if(doCustomSetup) - { - try - { + List->remove("keep", false); //"keep" would cause Plist validation to fail if left in + if (doCustomSetup) { + try { customSetup(matlabA, haveCoords, matlabCoords); - } - catch(exception& e) - { + } catch (exception& e) { cout << "An error occurred during Tpetra custom problem setup:" << endl; cout << e.what() << endl; return IS_FALSE; } - } - else - { - try - { + } else { + try { normalSetup(matlabA, haveCoords, matlabCoords); - } - catch(exception& e) - { + } catch (exception& e) { cout << "An error occurred during Tpetra preconditioner setup:" << endl; cout << e.what(); return IS_FALSE; @@ -773,50 +679,38 @@ int TpetraSystem::setup(const mxArray* matlabA, bool haveCoords, const m return IS_TRUE; } -template -void TpetraSystem::normalSetup(const mxArray* matlabA, bool haveCoords, const mxArray* matlabCoords) -{ +template +void TpetraSystem::normalSetup(const mxArray* matlabA, bool haveCoords, const mxArray* matlabCoords) { keepAll = false; - A = loadDataFromMatlab>>(matlabA); - RCP > opA(A); + A = loadDataFromMatlab>>(matlabA); + RCP> opA(A); RCP> mop; - if(haveCoords) - { + if (haveCoords) { auto userData = Teuchos::sublist(List, "user data"); userData->set("Coordinates", loadDataFromMatlab>(matlabCoords)); } - //Create the nullspace if not already set by user through XML - if(!(List->isSublist("level 0") && List->sublist("level 0", true).isParameter("Nullspace")) - && !(List->isSublist("user data") && List->sublist("user data", true).isParameter("Nullspace"))) - { + // Create the nullspace if not already set by user through XML + if (!(List->isSublist("level 0") && List->sublist("level 0", true).isParameter("Nullspace")) && !(List->isSublist("user data") && List->sublist("user data", true).isParameter("Nullspace"))) { int nPDE = MasterList::getDefault("number of equations"); - if (List->isSublist("Matrix")) - { + if (List->isSublist("Matrix")) { // Factory style parameter list const Teuchos::ParameterList& operatorList = List->sublist("Matrix"); if (operatorList.isParameter("PDE equations")) nPDE = operatorList.get("PDE equations"); - } - else if (List->isParameter("number of equations")) - { + } else if (List->isParameter("number of equations")) { // Easy style parameter list nPDE = List->get("number of equations"); } mexPrintf("** Constructing nullspace for %d PDEs\n", nPDE); auto domainMap = A->getDomainMap(); auto nullspace = rcp(new Tpetra::MultiVector(domainMap, nPDE)); - if (nPDE == 1) - { + if (nPDE == 1) { nullspace->putScalar(Teuchos::ScalarTraits::one()); - } - else - { + } else { typedef typename Teuchos::ArrayRCP::size_type arrayRCPSizeType; - for (int i = 0; i < nPDE; i++) - { + for (int i = 0; i < nPDE; i++) { Teuchos::ArrayRCP nsData = nullspace->getDataNonConst(i); - for (arrayRCPSizeType j = 0; j < nsData.size(); j++) - { + for (arrayRCPSizeType j = 0; j < nsData.size(); j++) { mm_GlobalOrd GID = domainMap->getGlobalElement(j) - domainMap->getIndexBase(); if ((GID - i) % nPDE == 0) nsData[j] = Teuchos::ScalarTraits::one(); @@ -826,69 +720,58 @@ void TpetraSystem::normalSetup(const mxArray* matlabA, bool haveCoords, auto userData = Teuchos::sublist(List, "user data"); userData->set("Nullspace", nullspace); } - mop = MueLu::CreateTpetraPreconditioner(opA, *List); + mop = MueLu::CreateTpetraPreconditioner(opA, *List); prec = rcp_implicit_cast>(mop); // print data?? - //mop->GetHierarchy()->GetLevel(0)->print(std::cout, MueLu::Debug); + // mop->GetHierarchy()->GetLevel(0)->print(std::cout, MueLu::Debug); operatorComplexity = mop->GetHierarchy()->GetOperatorComplexity(); } -template -void TpetraSystem::customSetup(const mxArray* matlabA, bool haveCoords, const mxArray* matlabCoords) -{ +template +void TpetraSystem::customSetup(const mxArray* matlabA, bool haveCoords, const mxArray* matlabCoords) { keepAll = true; - A = loadDataFromMatlab>>(matlabA); + A = loadDataFromMatlab>>(matlabA); RCP> mop; - //Now modify CreateTpetraPreconditioner to set keep flags on all factories + // Now modify CreateTpetraPreconditioner to set keep flags on all factories typedef Xpetra::MultiVector MultiVector; typedef Xpetra::Matrix Matrix; typedef Hierarchy Hierarchy; typedef HierarchyManager HierarchyManager; RCP mueluFactory = rcp(new ParameterListInterpreter(*List, A->getComm())); - RCP H = mueluFactory->CreateHierarchy(); + RCP H = mueluFactory->CreateHierarchy(); H->setlib(Xpetra::UseTpetra); RCP xA = TpetraCrs_To_XpetraMatrix(A); H->GetLevel(0)->Set("A", xA); - if(haveCoords) - { + if (haveCoords) { RCP> coords = loadDataFromMatlab>(matlabCoords); H->GetLevel(0)->Set("Coordinates", coords); } - //Decide whether user passed level 0 Nullspace in parameter list. If not, make it here. - if(!List->isSublist("level 0") || !List->sublist("level 0", true).isParameter("Nullspace")) - { + // Decide whether user passed level 0 Nullspace in parameter list. If not, make it here. + if (!List->isSublist("level 0") || !List->sublist("level 0", true).isParameter("Nullspace")) { int nPDE = MasterList::getDefault("number of equations"); - if (List->isSublist("Matrix")) - { + if (List->isSublist("Matrix")) { // Factory style parameter list const Teuchos::ParameterList& operatorList = List->sublist("Matrix"); if (operatorList.isParameter("PDE equations")) nPDE = operatorList.get("PDE equations"); - } - else if (List->isParameter("number of equations")) - { + } else if (List->isParameter("number of equations")) { // Easy style parameter list nPDE = List->get("number of equations"); } RCP nullspace = Xpetra::MultiVectorFactory::Build(xA->getDomainMap(), nPDE); - if (nPDE == 1) - { + if (nPDE == 1) { nullspace->putScalar(Teuchos::ScalarTraits::one()); - } - else - { + } else { typedef typename Teuchos::ArrayRCP::size_type arrayRCPSizeType; - for (int i = 0; i < nPDE; i++) - { + for (int i = 0; i < nPDE; i++) { Teuchos::ArrayRCP nsData = nullspace->getDataNonConst(i); - for (arrayRCPSizeType j = 0; j < nsData.size(); j++) - { - //TODO optimizations: - //TODO This can be optimized by getting the domain map and index base outside the loop. - //TODO Also, the whole local-to-global lookup table can be fetched one time, instead of repeatedly - //TODO calling getGlobalElement. + for (arrayRCPSizeType j = 0; j < nsData.size(); j++) { + // TODO optimizations: + // TODO This can be optimized by getting the domain map and index base outside the loop. + // TODO Also, the whole local-to-global lookup table can be fetched one time, instead of repeatedly + // TODO calling getGlobalElement. mm_GlobalOrd GID = A->getDomainMap()->getGlobalElement(j) - A->getDomainMap()->getIndexBase(); if ((GID - i) % nPDE == 0) nsData[j] = Teuchos::ScalarTraits::one(); @@ -900,99 +783,93 @@ void TpetraSystem::customSetup(const mxArray* matlabA, bool haveCoords, Teuchos::ParameterList nonSerialList, dummyList; ExtractNonSerializableData(*List, dummyList, nonSerialList); HierarchyUtils::AddNonSerializableDataToHierarchy(*mueluFactory, *H, nonSerialList); - //Set up dummy levels in hierarchy - for(int i = 0; i < 5; i++) - { + // Set up dummy levels in hierarchy + for (int i = 0; i < 5; i++) { RCP l = rcp(new Level()); H->AddLevel(l); } - //Set keep flags on ALL factories in ALL levels - //We have access to H's list of FactoryManagers so we know how to get factory pointer given the name of the factory. - //We don't know which names are in the FactoryManagers though so a brute force approach is needed... - vector keepItems = {"A", "P", "R", "Ptent", "Aggregates", "Coordinates", "UnAmalgamationInfo", "Smoother", "PreSmoother", "PostSmoother", "CoarseSolver", "Graph", "CoarseMap", "Nullspace", "Ppattern", "Constraint", "CoarseNumZLayers", "LineDetection_Layers", "LineDetection_VertLineIds", "Partition", "Importer", "DofsPerNode", "Filtering" "pcoarsen: element to node map"}; + // Set keep flags on ALL factories in ALL levels + // We have access to H's list of FactoryManagers so we know how to get factory pointer given the name of the factory. + // We don't know which names are in the FactoryManagers though so a brute force approach is needed... + vector keepItems = {"A", "P", "R", "Ptent", "Aggregates", "Coordinates", "UnAmalgamationInfo", "Smoother", "PreSmoother", "PostSmoother", "CoarseSolver", "Graph", "CoarseMap", "Nullspace", "Ppattern", "Constraint", "CoarseNumZLayers", "LineDetection_Layers", "LineDetection_VertLineIds", "Partition", "Importer", "DofsPerNode", + "Filtering" + "pcoarsen: element to node map"}; RCP> openH = rcp_static_cast, Hierarchy>(H); - if(openH.is_null()) + if (openH.is_null()) throw runtime_error("Could not cast RCP to subclass."); - for(int lvl = 0; lvl < H->GetNumLevels(); lvl++) - { - RCP fman = (RCP) mueluFactory->GetFactoryManager(lvl); + for (int lvl = 0; lvl < H->GetNumLevels(); lvl++) { + RCP fman = (RCP)mueluFactory->GetFactoryManager(lvl); systemManagers.push_back(fman); - for(auto s : keepItems) - { - try - { - const RCP fact = fman->GetFactory(s); //will throw if factory doesn't exist, ignore in that case - if(!fact.is_null()) - { - FactoryBase* factPtr = (FactoryBase*) fact.get(); - //Add keep flag to level + for (auto s : keepItems) { + try { + const RCP fact = fman->GetFactory(s); // will throw if factory doesn't exist, ignore in that case + if (!fact.is_null()) { + FactoryBase* factPtr = (FactoryBase*)fact.get(); + // Add keep flag to level H->GetLevel(lvl)->Keep(s, factPtr); } + } catch (exception& e) { } - catch(exception& e) {} } } mueluFactory->SetupHierarchy(*H); operatorComplexity = H->GetOperatorComplexity(); - prec = rcp(new TpetraOperator(H)); + prec = rcp(new TpetraOperator(H)); } -template<> -int TpetraSystem::status() -{ +template <> +int TpetraSystem::status() { mexPrintf("**** Problem ID %d [MueLu_Tpetra] ****\n", id); - if(!A.is_null()) + if (!A.is_null()) mexPrintf("Matrix: %dx%d w/ %d nnz\n", A->getGlobalNumRows(), A->getGlobalNumCols(), A->getGlobalNumEntries()); mexPrintf("Operator Complexity: %f\n", operatorComplexity); - if(!List.is_null()) - { - mexPrintf("Parameter List:\n"); - List->print(); - } + if (!List.is_null()) { + mexPrintf("Parameter List:\n"); + List->print(); + } mexPrintf("\n"); return IS_TRUE; } -template -RCP> TpetraSystem::getHierarchy() -{ +template +RCP> TpetraSystem::getHierarchy() { RCP> mueluOp = rcp_static_cast, Tpetra::Operator>(prec); - if(mueluOp.is_null()) + if (mueluOp.is_null()) throw runtime_error("Tpetra precondition operator was null."); RCP> hier = mueluOp->GetHierarchy(); - if(hier.is_null()) + if (hier.is_null()) throw runtime_error("Hierarchy from Tpetra problem was null."); return hier; } -//tpetra_complex_data_pack implementation +// tpetra_complex_data_pack implementation #ifdef HAVE_COMPLEX_SCALARS -template<> TpetraSystem::TpetraSystem() : MuemexSystem(TPETRA_COMPLEX) {} -template<> TpetraSystem::~TpetraSystem() {} - -template<> -int TpetraSystem::status() -{ +template <> +TpetraSystem::TpetraSystem() + : MuemexSystem(TPETRA_COMPLEX) {} +template <> +TpetraSystem::~TpetraSystem() {} + +template <> +int TpetraSystem::status() { mexPrintf("**** Problem ID %d [MueLu_Tpetra (Complex Scalars)] ****\n", id); - if(!A.is_null()) + if (!A.is_null()) mexPrintf("Matrix: %dx%d w/ %d nnz\n", A->getGlobalNumRows(), A->getGlobalNumCols(), A->getGlobalNumEntries()); mexPrintf("Operator Complexity: %f\n", operatorComplexity); - if(!List.is_null()) - { + if (!List.is_null()) { mexPrintf("Parameter List:\n"); List->print(); } mexPrintf("\n"); return IS_TRUE; } -#endif //HAVE_COMPLEX_SCALARS +#endif // HAVE_COMPLEX_SCALARS -//MuemexSystemList namespace implementation +// MuemexSystemList namespace implementation -void MuemexSystemList::clearAll() -{ - //When items are cleared, RCPs will auto-delete the datapacks +void MuemexSystemList::clearAll() { + // When items are cleared, RCPs will auto-delete the datapacks list.clear(); } @@ -1001,14 +878,13 @@ void MuemexSystemList::clearAll() D - The MuemexSystem. [I] Returns: problem id number of D */ -int MuemexSystemList::add(RCP D) -{ +int MuemexSystemList::add(RCP D) { TEUCHOS_ASSERT(!D.is_null()); D->id = nextID; nextID++; list.push_back(D); return D->id; -} /*end add*/ +} /*end add*/ /* find - Finds problem by id Parameters: @@ -1016,78 +892,63 @@ int MuemexSystemList::add(RCP D) Returns: pointer to MuemexSystem matching 'id', if found, NULL if not found. */ -RCP MuemexSystemList::find(int id) -{ - if(isInList(id)) - { - for(auto problem : list) - { - if(problem->id == id) +RCP MuemexSystemList::find(int id) { + if (isInList(id)) { + for (auto problem : list) { + if (problem->id == id) return problem; } } RCP notFound; return notFound; -}/*end find*/ +} /*end find*/ /* remove - Removes problem by id Parameters: id - ID number [I] Returns: IS_TRUE if remove was succesful, IS_FALSE otherwise */ -int MuemexSystemList::remove(int id) -{ +int MuemexSystemList::remove(int id) { int index = -1; - for(int i = 0; i < int(list.size()); i++) - { - if(list[i]->id == id) - { - index = i; - break; - } - } - if(index == -1) - { - mexErrMsgTxt("Error: Tried to clean up a problem that doesn't exist."); - return IS_FALSE; + for (int i = 0; i < int(list.size()); i++) { + if (list[i]->id == id) { + index = i; + break; } + } + if (index == -1) { + mexErrMsgTxt("Error: Tried to clean up a problem that doesn't exist."); + return IS_FALSE; + } list.erase(list.begin() + index); return IS_TRUE; -}/*end remove*/ +} /*end remove*/ /* size - Number of stored problems */ -int MuemexSystemList::size() -{ +int MuemexSystemList::size() { return list.size(); } /* Returns the status of all members of the list Returns IS_TRUE */ -int MuemexSystemList::status_all() -{ - //This prints all the existing problems in ascending order by ID - for(int i = 0; i < nextID; i++) - { - for(auto problem : list) - { - if(problem->id == i) - { - problem->status(); - break; - } - } +int MuemexSystemList::status_all() { + // This prints all the existing problems in ascending order by ID + for (int i = 0; i < nextID; i++) { + for (auto problem : list) { + if (problem->id == i) { + problem->status(); + break; + } } + } return IS_TRUE; -}/*end status_all */ +} /*end status_all */ -bool MuemexSystemList::isInList(int id) -{ +bool MuemexSystemList::isInList(int id) { bool rv = false; - for(auto problem : list) - { - if(problem->id == id) - { + for (auto problem : list) { + if (problem->id == id) { rv = true; break; } @@ -1103,107 +964,97 @@ bool MuemexSystemList::isInList(int id) Return value: Which mode to run the program in. */ -MODE_TYPE sanity_check(int nrhs, const mxArray *prhs[]) -{ +MODE_TYPE sanity_check(int nrhs, const mxArray* prhs[]) { MODE_TYPE rv = MODE_ERROR; /* Check for mode */ - if(nrhs == 0) + if (nrhs == 0) mexErrMsgTxt("Error: muelu() expects at least one argument\n"); /* Pull mode data from 1st Input */ - MODE_TYPE mode = (MODE_TYPE) loadDataFromMatlab(prhs[0]); - switch (mode) - { + MODE_TYPE mode = (MODE_TYPE)loadDataFromMatlab(prhs[0]); + switch (mode) { case MODE_SETUP: - if(nrhs > 1 && mxIsSparse(prhs[1])) - { - if(nrhs > 3 && mxIsSparse(prhs[2]) && mxIsSparse(prhs[3])) + if (nrhs > 1 && mxIsSparse(prhs[1])) { + if (nrhs > 3 && mxIsSparse(prhs[2]) && mxIsSparse(prhs[3])) rv = MODE_ERROR; else rv = MODE_SETUP; - } - else - { + } else { mexErrMsgTxt("Error: Invalid input for setup\n"); } break; case MODE_SOLVE: - //problem ID and matrix or rhs must be numeric - if(nrhs >= 2 && mxIsNumeric(prhs[1]) && mxIsNumeric(prhs[2])) + // problem ID and matrix or rhs must be numeric + if (nrhs >= 2 && mxIsNumeric(prhs[1]) && mxIsNumeric(prhs[2])) rv = MODE_SOLVE; else mexErrMsgTxt("Error: Invalid input for solve\n"); break; case MODE_APPLY: - //problem ID and RHS must be numeric - if(nrhs == 3 && mxIsNumeric(prhs[1]) && mxIsNumeric(prhs[2])) + // problem ID and RHS must be numeric + if (nrhs == 3 && mxIsNumeric(prhs[1]) && mxIsNumeric(prhs[2])) rv = MODE_APPLY; else mexErrMsgTxt("Error: Invalid input for apply\n"); break; case MODE_CLEANUP: - if(nrhs == 1 || nrhs == 2) + if (nrhs == 1 || nrhs == 2) rv = MODE_CLEANUP; else mexErrMsgTxt("Error: Extraneous args for cleanup\n"); break; case MODE_STATUS: - if(nrhs == 1 || nrhs == 2) + if (nrhs == 1 || nrhs == 2) rv = MODE_STATUS; else mexErrMsgTxt("Error: Extraneous args for status\n"); break; case MODE_AGGREGATE: - if(nrhs > 1 && mxIsSparse(prhs[1])) - //Uncomment the next line and remove one after when implementing aggregate mode - //rv = MODE_AGGREGATE; + if (nrhs > 1 && mxIsSparse(prhs[1])) + // Uncomment the next line and remove one after when implementing aggregate mode + // rv = MODE_AGGREGATE; rv = MODE_ERROR; else mexErrMsgTxt("Error: Invalid input for aggregate\n"); break; case MODE_GET: - if(nrhs < 4 || nrhs > 5) + if (nrhs < 4 || nrhs > 5) mexErrMsgTxt("Error: Wrong number of args for get\n"); else rv = MODE_GET; break; default: - printf("Mode number = %d\n", (int) mode); + printf("Mode number = %d\n", (int)mode); mexErrMsgTxt("Error: Invalid input mode\n"); - }; + }; return rv; } /*end sanity_check*/ -void csc_print(int n, int* rowind, int* colptr, double* vals) -{ +void csc_print(int n, int* rowind, int* colptr, double* vals) { int i, j; - for(i = 0; i < n; i++) - { - for(j = colptr[i]; j < colptr[i + 1]; j++) - { + for (i = 0; i < n; i++) { + for (j = colptr[i]; j < colptr[i + 1]; j++) { mexPrintf("%d %d %20.16e\n", rowind[j], i, vals[j]); } } } -void parse_list_item(RCP List, char *option_name, const mxArray *prhs) -{ - //List shouldn't be NULL but if it is, initialize here - if(List.is_null()) - { +void parse_list_item(RCP List, char* option_name, const mxArray* prhs) { + // List shouldn't be NULL but if it is, initialize here + if (List.is_null()) { List = rcp(new ParameterList); } mxClassID cid; int i, M, N, *opt_int; - char *opt_char; - double *opt_float; + char* opt_char; + double* opt_float; string opt_str; RCP sublist = rcp(new ParameterList); mxArray *cell1, *cell2; /* Pull relevant info the the option value */ cid = mxGetClassID(prhs); - M = mxGetM(prhs); - N = mxGetN(prhs); + M = mxGetM(prhs); + N = mxGetN(prhs); /* Add to the Teuchos list */ // extract potential typeStr. The code is based on the assumption that @@ -1211,24 +1062,22 @@ void parse_list_item(RCP List, char *option_name, const mxArray * // between "map" type (representing a Xpetra::Map) and multivector (default) vector typestring = tokenizeList(option_name); std::transform(typestring[0].begin(), typestring[0].end(), typestring[0].begin(), ::tolower); - size_t WordStart = typestring[0].find_first_not_of(' '); - size_t WordEnd = typestring[0].find(' ', WordStart); + size_t WordStart = typestring[0].find_first_not_of(' '); + size_t WordEnd = typestring[0].find(' ', WordStart); std::string typeStr = typestring[0].substr(WordStart, WordEnd - WordStart); ///// - switch(cid) - { + switch (cid) { case mxCHAR_CLASS: // String opt_char = mxArrayToString(prhs); - opt_str = opt_char; + opt_str = opt_char; List->set(option_name, opt_str); - if(strcmp(option_name, MUEMEX_INTERFACE) == 0) - { - if(strcmp(opt_str.c_str(), "epetra") == 0) + if (strcmp(option_name, MUEMEX_INTERFACE) == 0) { + if (strcmp(opt_str.c_str(), "epetra") == 0) useEpetra = true; - else if(strcmp(opt_str.c_str(), "tpetra") == 0) + else if (strcmp(opt_str.c_str(), "tpetra") == 0) useEpetra = false; } mxFree(opt_char); @@ -1236,23 +1085,17 @@ void parse_list_item(RCP List, char *option_name, const mxArray * case mxDOUBLE_CLASS: case mxSINGLE_CLASS: // Single or double, real or complex - if(mxIsComplex(prhs)) - { + if (mxIsComplex(prhs)) { #ifndef HAVE_COMPLEX_SCALARS - opt_float = mxGetPr(prhs); + opt_float = mxGetPr(prhs); double* opt_float_imag = mxGetPi(prhs); - //assuming user wants std::complex here... - if(M == 1 && N == 1) - { + // assuming user wants std::complex here... + if (M == 1 && N == 1) { List->set(option_name, complex_t(*opt_float, *opt_float_imag)); - } - else if(M == 0 || N == 0) - { - List->set(option_name, (complex_t*) NULL); - } - else - { - if(mxIsSparse(prhs)) + } else if (M == 0 || N == 0) { + List->set(option_name, (complex_t*)NULL); + } else { + if (mxIsSparse(prhs)) List->set(option_name, loadDataFromMatlab>(prhs)); else List->set(option_name, loadDataFromMatlab>(prhs)); @@ -1261,30 +1104,21 @@ void parse_list_item(RCP List, char *option_name, const mxArray * std::cerr << "Error: cannot load argument \"" << option_name << "\" because complex is not instantiated in this build.\n"; throw std::invalid_argument("Complex not supported"); #endif - } - else - { + } else { opt_float = mxGetPr(prhs); - if(M == 1 && N == 1 && MMISINT(opt_float[0])) - { - List->set(option_name, (int) opt_float[0]); - } - else if(M == 1 && N == 1) - { + if (M == 1 && N == 1 && MMISINT(opt_float[0])) { + List->set(option_name, (int)opt_float[0]); + } else if (M == 1 && N == 1) { List->set(option_name, opt_float[0]); - } - else if(M == 0 || N == 0) - { - List->set(option_name, (double*) NULL); - } - else - { - if(mxIsSparse(prhs)) + } else if (M == 0 || N == 0) { + List->set(option_name, (double*)NULL); + } else { + if (mxIsSparse(prhs)) List->set(option_name, loadDataFromMatlab>(prhs)); else { - if(typeStr == "map") // data stored as Xpetra::Map type + if (typeStr == "map") // data stored as Xpetra::Map type List->set(option_name, loadDataFromMatlab>(prhs)); - else // data stored as MultiVector + else // data stored as MultiVector List->set(option_name, loadDataFromMatlab>(prhs)); } } @@ -1292,11 +1126,11 @@ void parse_list_item(RCP List, char *option_name, const mxArray * break; case mxLOGICAL_CLASS: // Bool - if(M == 1 && N == 1) + if (M == 1 && N == 1) List->set(option_name, mxIsLogicalScalarTrue(prhs)); else List->set(option_name, mxGetLogicals(prhs)); - //NTS: The else probably doesn't work. + // NTS: The else probably doesn't work. break; case mxINT8_CLASS: case mxUINT8_CLASS: @@ -1305,8 +1139,8 @@ void parse_list_item(RCP List, char *option_name, const mxArray * case mxINT32_CLASS: case mxUINT32_CLASS: // Integer - opt_int = (int*) mxGetData(prhs); - if(M == 1 && N == 1) + opt_int = (int*)mxGetData(prhs); + if (M == 1 && N == 1) List->set(option_name, *opt_int); #ifdef HAVE_MUELU_INTREPID2 else if (strcmp(option_name, "pcoarsen: element to node map") == 0) @@ -1321,11 +1155,10 @@ void parse_list_item(RCP List, char *option_name, const mxArray * case mxCELL_CLASS: // Interpret a cell list as a nested teuchos list. // NTS: Assuming that it's a 1D row ordered array - for(i = 0; i < N; i += 2) - { + for (i = 0; i < N; i += 2) { cell1 = mxGetCell(prhs, i); cell2 = mxGetCell(prhs, i + 1); - if(!mxIsChar(cell1)) + if (!mxIsChar(cell1)) mexErrMsgTxt("Error: Input options are not in ['parameter',value] format!\n"); opt_char = mxArrayToString(cell1); parse_list_item(sublist, opt_char, cell2); @@ -1338,29 +1171,20 @@ void parse_list_item(RCP List, char *option_name, const mxArray * case mxFUNCTION_CLASS: case mxUNKNOWN_CLASS: case mxSTRUCT_CLASS: - //Currently Graph and Aggregates are stored as structures - if(isValidMatlabAggregates(prhs)) - { - try - { + // Currently Graph and Aggregates are stored as structures + if (isValidMatlabAggregates(prhs)) { + try { List->set(option_name, loadDataFromMatlab>(prhs)); break; - } - catch(exception& e) - { + } catch (exception& e) { cout << e.what(); throw runtime_error("Parsing aggregates in parameter list failed."); } - } - else if(isValidMatlabGraph(prhs)) - { - try - { + } else if (isValidMatlabGraph(prhs)) { + try { List->set(option_name, loadDataFromMatlab>(prhs)); break; - } - catch(exception& e) - { + } catch (exception& e) { cout << e.what(); throw runtime_error("Parsing graph in parameter list failed."); } @@ -1368,7 +1192,7 @@ void parse_list_item(RCP List, char *option_name, const mxArray * default: mexPrintf("Error parsing input option: %s [type=%d]\n", option_name, cid); mexErrMsgTxt("Error: An input option is invalid!\n"); - }; + }; } /**************************************************************/ @@ -1381,13 +1205,11 @@ void parse_list_item(RCP List, char *option_name, const mxArray * prhs - The problem inputs [I] Return value: Teuchos list containing all parameters passed in by the user. */ -RCP build_teuchos_list(int nrhs, const mxArray *prhs[]) -{ +RCP build_teuchos_list(int nrhs, const mxArray* prhs[]) { RCP TPL = rcp(new ParameterList); char* option_name; - for(int i = 0; i < nrhs; i += 2) - { - if(i == nrhs - 1 || !mxIsChar(prhs[i])) + for (int i = 0; i < nrhs; i += 2) { + if (i == nrhs - 1 || !mxIsChar(prhs[i])) mexErrMsgTxt("Error: Input options are not in ['parameter',value] format!\n"); /* What option are we setting? */ option_name = mxArrayToString(prhs[i]); @@ -1401,22 +1223,20 @@ RCP build_teuchos_list(int nrhs, const mxArray *prhs[]) } /*end build_teuchos_list*/ -} //end MueLu namespace (mexFunction must be in global namespace) +} // namespace MueLu -using namespace MueLu; //...but give mexFunction access to all MueLu members defined above +using namespace MueLu; //...but give mexFunction access to all MueLu members defined above -void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) -{ - //Lazily initialize Tpetra - if(!Tpetra::isInitialized()) - { - int argc = 0; +void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { + // Lazily initialize Tpetra + if (!Tpetra::isInitialized()) { + int argc = 0; char** argv = NULL; Tpetra::initialize(&argc, &argv); } double* id; int rv; - //Arrays representing vectors + // Arrays representing vectors string intf; RCP List; MODE_TYPE mode; @@ -1428,207 +1248,173 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) #ifdef FORCE_FACTORIES_TO_COMPILE { // debug - MueLu::TwoLevelMatlabFactory f1; - MueLu::SingleLevelMatlabFactory f2; + MueLu::TwoLevelMatlabFactory f1; + MueLu::SingleLevelMatlabFactory f2; } #endif - switch(mode) - { - case MODE_SETUP: - { - try - { - double oc = 0; + switch (mode) { + case MODE_SETUP: { + try { + double oc = 0; bool haveCoords = false; - if(nrhs == 2) + if (nrhs == 2) List = rcp(new ParameterList); - else if(nrhs == 3) - { - if(!mxIsNumeric(prhs[2]) || mxIsSparse(prhs[2]) || mxIsComplex(prhs[2])) + else if (nrhs == 3) { + if (!mxIsNumeric(prhs[2]) || mxIsSparse(prhs[2]) || mxIsComplex(prhs[2])) throw runtime_error("Expected real-valued, dense Coordinates array as the third muelu argument"); - else - { + else { haveCoords = true; - List = rcp(new ParameterList); + List = rcp(new ParameterList); } - } - else - { - if(mxIsNumeric(prhs[2]) && !mxIsSparse(prhs[2]) && !mxIsComplex(prhs[2])) - { - List = build_teuchos_list(nrhs - 3, &(prhs[3])); + } else { + if (mxIsNumeric(prhs[2]) && !mxIsSparse(prhs[2]) && !mxIsComplex(prhs[2])) { + List = build_teuchos_list(nrhs - 3, &(prhs[3])); haveCoords = true; - } - else - { - //assume that the parameters start at third argument if it doesn't seem like coords are there + } else { + // assume that the parameters start at third argument if it doesn't seem like coords are there List = build_teuchos_list(nrhs - 2, &(prhs[2])); } } - //Combine xml and easy parameter lists - if(List->isParameter("xml parameter file")) - { + // Combine xml and easy parameter lists + if (List->isParameter("xml parameter file")) { RCP xmlParams = Teuchos::getParametersFromXmlFile(List->get("xml parameter file")); List->remove("xml parameter file"); List->setParametersNotAlreadySet(*xmlParams); } - if(mxIsComplex(prhs[1])) - { - //Abort if input is complex but complex isn't supported + if (mxIsComplex(prhs[1])) { + // Abort if input is complex but complex isn't supported #ifndef HAVE_COMPLEX_SCALARS mexPrintf("Error: Complex scalars unsupported by this build of Trilinos.\n"); throw runtime_error("Complex scalars not supported."); #endif } intf = List->get(MUEMEX_INTERFACE, "tpetra"); - List->remove(MUEMEX_INTERFACE); //no longer need this parameter - if(intf == "epetra") - { - if(mxIsComplex(prhs[1])) - { + List->remove(MUEMEX_INTERFACE); // no longer need this parameter + if (intf == "epetra") { + if (mxIsComplex(prhs[1])) { mexPrintf("Error: Attempting to use complex-valued matrix with Epetra, which is unsupported.\n"); mexPrintf("Use Tpetra with complex matrices instead.\n"); throw runtime_error("Tried to use complex matrix with Epetra"); } RCP dp = rcp(new EpetraSystem()); - dp->List = List; - dp->setup(prhs[1], haveCoords, haveCoords ? prhs[2] : (mxArray*) NULL); + dp->List = List; + dp->setup(prhs[1], haveCoords, haveCoords ? prhs[2] : (mxArray*)NULL); oc = dp->operatorComplexity; - D = rcp_implicit_cast(dp); - } - else if(intf == "tpetra") - { - //infer scalar type from prhs (can be double or complex) - if(mxIsComplex(prhs[1])) - { + D = rcp_implicit_cast(dp); + } else if (intf == "tpetra") { + // infer scalar type from prhs (can be double or complex) + if (mxIsComplex(prhs[1])) { #ifdef HAVE_COMPLEX_SCALARS RCP> dp = rcp(new TpetraSystem()); - dp->List = List; - dp->setup(prhs[1], haveCoords, haveCoords ? prhs[2] : (mxArray*) NULL); + dp->List = List; + dp->setup(prhs[1], haveCoords, haveCoords ? prhs[2] : (mxArray*)NULL); oc = dp->operatorComplexity; - D = rcp_implicit_cast(dp); + D = rcp_implicit_cast(dp); #else throw runtime_error("Complex scalars not supported."); #endif - } - else - { + } else { RCP> dp = rcp(new TpetraSystem()); - dp->List = List; - dp->setup(prhs[1], haveCoords, haveCoords ? prhs[2] : (mxArray*) NULL); + dp->List = List; + dp->setup(prhs[1], haveCoords, haveCoords ? prhs[2] : (mxArray*)NULL); oc = dp->operatorComplexity; - D = rcp_implicit_cast(dp); + D = rcp_implicit_cast(dp); } } rv = MuemexSystemList::add(D); mexPrintf("Set up problem #%d\n", rv); - if(nlhs > 0) - { - plhs[0] = mxCreateNumericMatrix(1, 1, mxINT32_CLASS, mxREAL); - *((int*) mxGetData(plhs[0])) = rv; - //output OC also - if(nlhs > 1) + if (nlhs > 0) { + plhs[0] = mxCreateNumericMatrix(1, 1, mxINT32_CLASS, mxREAL); + *((int*)mxGetData(plhs[0])) = rv; + // output OC also + if (nlhs > 1) plhs[1] = mxCreateDoubleScalar(oc); } mexLock(); - } - catch(exception& e) - { + } catch (exception& e) { mexPrintf("An error occurred during setup routine:\n"); cout << e.what() << endl; - if(nlhs > 0) - { - plhs[0] = mxCreateNumericMatrix(1, 1, mxINT32_CLASS, mxREAL); - *((int*) mxGetData(plhs[0])) = -1; + if (nlhs > 0) { + plhs[0] = mxCreateNumericMatrix(1, 1, mxINT32_CLASS, mxREAL); + *((int*)mxGetData(plhs[0])) = -1; } - if(nlhs > 1) + if (nlhs > 1) plhs[1] = mxCreateDoubleScalar(0); } break; } - case MODE_SOLVE: - { + case MODE_SOLVE: { int iters; - try - { + try { bool reuse; - //MODE_SOLVE, probID, matrix, bVec, params OR - //MODE_SOLVE, probID, bVec, params - //prhs[0] holds the MODE_SOLVE enum value - //if reusing, either nrhs == 3 or prhs[3] is not a string + // MODE_SOLVE, probID, matrix, bVec, params OR + // MODE_SOLVE, probID, bVec, params + // prhs[0] holds the MODE_SOLVE enum value + // if reusing, either nrhs == 3 or prhs[3] is not a string //(because bVec can't be a string) - if(MuemexSystemList::size() == 0) + if (MuemexSystemList::size() == 0) throw runtime_error("No linear systems are set up."); - if(nrhs == 3 || (nrhs > 3 && mxGetClassID(prhs[3]) == mxCHAR_CLASS)) - { - //No matrix supplied as argument, use one from setup - if(nrhs > 3) + if (nrhs == 3 || (nrhs > 3 && mxGetClassID(prhs[3]) == mxCHAR_CLASS)) { + // No matrix supplied as argument, use one from setup + if (nrhs > 3) List = build_teuchos_list(nrhs - 3, &prhs[3]); else List = rcp(new ParameterList); reuse = true; - } - else - { - if(nrhs > 4) + } else { + if (nrhs > 4) List = build_teuchos_list(nrhs - 4, &prhs[4]); else List = rcp(new ParameterList); reuse = false; } - if(List->isType("Output Style")) - { + if (List->isType("Output Style")) { int type = strToOutputStyle(List->get("Output Style", "Belos::Brief").c_str()); List->remove("Output Style"); - //Reset the ParameterList entry to be of type int instead of string + // Reset the ParameterList entry to be of type int instead of string List->set("Output Style", type); } - //Convert Belos msg type string to int in List - //Note: if the parameter value is already an int, don't touch it. - if(List->isType("Verbosity")) - { - //Errors + Warnings is already the default Belos verbosity setting + // Convert Belos msg type string to int in List + // Note: if the parameter value is already an int, don't touch it. + if (List->isType("Verbosity")) { + // Errors + Warnings is already the default Belos verbosity setting int verb = getBelosVerbosity(List->get("Verbosity", "Belos::Errors + Belos::Warnings").c_str()); List->remove("Verbosity"); List->set("Verbosity", verb); } - int probID = loadDataFromMatlab(prhs[1]); + int probID = loadDataFromMatlab(prhs[1]); RCP dp = MuemexSystemList::find(probID); - if(dp.is_null()) + if (dp.is_null()) throw runtime_error("Problem handle not allocated."); - //get pointer to MATLAB array that will be "B" or "rhs" multivector + // get pointer to MATLAB array that will be "B" or "rhs" multivector const mxArray* rhs = reuse ? prhs[2] : prhs[3]; - switch(dp->type) - { - case EPETRA: - { + switch (dp->type) { + case EPETRA: { RCP esys = rcp_static_cast(dp); RCP matrix; - if(reuse) + if (reuse) matrix = esys->GetMatrix(); else matrix = loadDataFromMatlab>(prhs[2]); plhs[0] = esys->solve(List, matrix, rhs, iters); break; } - case TPETRA: - { + case TPETRA: { RCP> tsys = rcp_static_cast, MuemexSystem>(dp); RCP matrix; - if(reuse) + if (reuse) matrix = tsys->GetMatrix(); else matrix = loadDataFromMatlab>(prhs[2]); plhs[0] = tsys->solve(List, matrix, rhs, iters); break; } - case TPETRA_COMPLEX: - { + case TPETRA_COMPLEX: { #ifdef HAVE_COMPLEX_SCALARS RCP> tsys = rcp_static_cast, MuemexSystem>(dp); RCP matrix; - if(reuse) + if (reuse) matrix = tsys->GetMatrix(); else matrix = loadDataFromMatlab>(prhs[2]); @@ -1640,49 +1426,41 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) #endif } } - if(nlhs > 1) + if (nlhs > 1) plhs[1] = saveDataToMatlab(iters); - } - catch(exception& e) - { + } catch (exception& e) { mexPrintf("An error occurred during the solve routine:\n"); cout << e.what() << endl; } break; } - case MODE_APPLY: - { - try - { - //MODE_APPLY, probID, rhsVec - //prhs[0] holds the MODE_APPLY enum value - if(MuemexSystemList::size() == 0) + case MODE_APPLY: { + try { + // MODE_APPLY, probID, rhsVec + // prhs[0] holds the MODE_APPLY enum value + if (MuemexSystemList::size() == 0) throw runtime_error("No linear systems are set up."); - int probID = loadDataFromMatlab(prhs[1]); + int probID = loadDataFromMatlab(prhs[1]); RCP dp = MuemexSystemList::find(probID); - if(dp.is_null()) + if (dp.is_null()) throw runtime_error("Problem handle not allocated."); - //get pointer to MATLAB array that will be "B" or "rhs" multivector + // get pointer to MATLAB array that will be "B" or "rhs" multivector const mxArray* rhs = prhs[2]; - switch(dp->type) - { - case EPETRA: - { + switch (dp->type) { + case EPETRA: { RCP esys = rcp_static_cast(dp); - plhs[0] = esys->apply(rhs); + plhs[0] = esys->apply(rhs); break; } - case TPETRA: - { + case TPETRA: { RCP> tsys = rcp_static_cast, MuemexSystem>(dp); - plhs[0] = tsys->apply(rhs); + plhs[0] = tsys->apply(rhs); break; } - case TPETRA_COMPLEX: - { + case TPETRA_COMPLEX: { #ifdef HAVE_COMPLEX_SCALARS RCP> tsys = rcp_static_cast, MuemexSystem>(dp); - plhs[0] = tsys->apply(rhs); + plhs[0] = tsys->apply(rhs); break; #else std::cerr << "Cannot solve complex-valued system because complex is not enabled in this build.\n"; @@ -1690,122 +1468,99 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) #endif } } - } - catch(exception& e) - { + } catch (exception& e) { mexPrintf("An error occurred during the apply routine:\n"); cout << e.what() << endl; } break; } - case MODE_CLEANUP: - { - try - { + case MODE_CLEANUP: { + try { mexPrintf("MueMex in cleanup mode.\n"); - if(MuemexSystemList::size() > 0 && nrhs == 1) - { + if (MuemexSystemList::size() > 0 && nrhs == 1) { /* Cleanup all problems */ - for(int i = 0; i < MuemexSystemList::size(); i++) + for (int i = 0; i < MuemexSystemList::size(); i++) mexUnlock(); MuemexSystemList::clearAll(); rv = 1; - } - else if(MuemexSystemList::size() > 0 && nrhs == 2) - { + } else if (MuemexSystemList::size() > 0 && nrhs == 2) { /* Cleanup one problem */ int probID = loadDataFromMatlab(prhs[1]); mexPrintf("Cleaning up problem #%d\n", probID); rv = MuemexSystemList::remove(probID); - if(rv) + if (rv) mexUnlock(); } /*end elseif*/ - else - { + else { rv = 0; } /* Set return value */ plhs[0] = mxCreateNumericMatrix(1, 1, mxINT32_CLASS, mxREAL); - id = (double*) mxGetData(plhs[0]); - *id = double(rv); - } - catch(exception& e) - { + id = (double*)mxGetData(plhs[0]); + *id = double(rv); + } catch (exception& e) { mexPrintf("An error occurred during the cleanup routine:\n"); cout << e.what() << endl; - if(nlhs > 0) - { - plhs[0] = mxCreateNumericMatrix(1,1, mxINT32_CLASS, mxREAL); - *(int*) mxGetData(plhs[0]) = 0; + if (nlhs > 0) { + plhs[0] = mxCreateNumericMatrix(1, 1, mxINT32_CLASS, mxREAL); + *(int*)mxGetData(plhs[0]) = 0; } } break; } - case MODE_STATUS: - { - try - { - //mexPrintf("MueMex in status checking mode.\n"); - if(MuemexSystemList::size() > 0 && nrhs == 1) - { + case MODE_STATUS: { + try { + // mexPrintf("MueMex in status checking mode.\n"); + if (MuemexSystemList::size() > 0 && nrhs == 1) { /* Status check on all problems */ rv = MuemexSystemList::status_all(); - }/*end if*/ - else if(MuemexSystemList::size() > 0 && nrhs == 2) - { + } /*end if*/ + else if (MuemexSystemList::size() > 0 && nrhs == 2) { /* Status check one problem */ int probID = loadDataFromMatlab(prhs[1]); - D = MuemexSystemList::find(probID); - if(D.is_null()) + D = MuemexSystemList::find(probID); + if (D.is_null()) throw runtime_error("Error: Problem handle not allocated.\n"); rv = D->status(); - }/*end elseif*/ + } /*end elseif*/ else mexPrintf("No problems set up.\n"); - if(nlhs > 0) - { + if (nlhs > 0) { int outVal = 0; - plhs[0] = saveDataToMatlab(outVal); + plhs[0] = saveDataToMatlab(outVal); } - } - catch(exception& e) - { + } catch (exception& e) { mexPrintf("An error occurred during the status routine:\n"); cout << e.what() << endl; int outVal = -1; - if(nlhs > 0) + if (nlhs > 0) plhs[0] = saveDataToMatlab(outVal); } break; } - case MODE_GET: - { - try - { - int probID = loadDataFromMatlab(prhs[1]); - int levelID = loadDataFromMatlab(prhs[2]); - char* dataName = mxArrayToString(prhs[3]); + case MODE_GET: { + try { + int probID = loadDataFromMatlab(prhs[1]); + int levelID = loadDataFromMatlab(prhs[2]); + char* dataName = mxArrayToString(prhs[3]); MuemexType outputType = INT; - RCP dp = MuemexSystemList::find(probID); - if(dp.is_null()) - { + RCP dp = MuemexSystemList::find(probID); + if (dp.is_null()) { throw runtime_error("Problem handle not allocated."); } - //See if typeHint was given + // See if typeHint was given char* paramTypeName = NULL; - if(nrhs > 4) { + if (nrhs > 4) { paramTypeName = mxArrayToString(prhs[4]); mexPrintf("paramTypeName %s", paramTypeName); } bool complexFlag = dp->type == TPETRA_COMPLEX; - //std::cout << "before strToDataType dataName=" << dataName << " paramTypeName " << paramTypeName << std::endl; + // std::cout << "before strToDataType dataName=" << dataName << " paramTypeName " << paramTypeName << std::endl; outputType = strToDataType(dataName, paramTypeName, complexFlag); - plhs[0] = dp->getHierarchyData(string(dataName), outputType, levelID); - } - catch(exception& e) - { + plhs[0] = dp->getHierarchyData(string(dataName), outputType, levelID); + } catch (exception& e) { mexPrintf("An error occurred during the get routine:\n"); cout << e.what() << endl; plhs[0] = mxCreateDoubleScalar(0); @@ -1818,5 +1573,5 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) case MODE_AGGREGATE: default: mexPrintf("Mode not supported yet."); - } + } } diff --git a/packages/muelu/matlab/src/MueLu_MatlabSmoother.cpp b/packages/muelu/matlab/src/MueLu_MatlabSmoother.cpp index e6490ac3fadb..efaec72740ab 100644 --- a/packages/muelu/matlab/src/MueLu_MatlabSmoother.cpp +++ b/packages/muelu/matlab/src/MueLu_MatlabSmoother.cpp @@ -44,7 +44,6 @@ // // @HEADER - #include "MueLu_ExplicitInstantiation.hpp" #include "MueLu_MatlabSmoother_def.hpp" @@ -52,8 +51,8 @@ #include "TpetraCore_ETIHelperMacros.h" #ifdef HAVE_MUELU_MATLAB -#define MUELU_LOCAL_INSTANT(S,LO,GO,N) \ - template class MueLu::MatlabSmoother; +#define MUELU_LOCAL_INSTANT(S, LO, GO, N) \ + template class MueLu::MatlabSmoother; TPETRA_ETI_MANGLING_TYPEDEFS() diff --git a/packages/muelu/matlab/src/MueLu_MatlabSmoother_decl.hpp b/packages/muelu/matlab/src/MueLu_MatlabSmoother_decl.hpp index b31b7823d731..6dd3d6b79c30 100644 --- a/packages/muelu/matlab/src/MueLu_MatlabSmoother_decl.hpp +++ b/packages/muelu/matlab/src/MueLu_MatlabSmoother_decl.hpp @@ -58,155 +58,150 @@ namespace MueLu { - /*! - @class MatlabSmoother - @ingroup MueMexClasses - @brief Class that encapsulates Matlab smoothers. - - // This class creates an Matlab preconditioner factory. The factory creates a smoother based on the - // type and ParameterList passed into the constructor. See the constructor for more information. - */ - - template ::scalar_type, - class LocalOrdinal = typename SmootherPrototype::local_ordinal_type, - class GlobalOrdinal = typename SmootherPrototype::global_ordinal_type, - class Node = typename SmootherPrototype::node_type> - class MatlabSmoother : public SmootherPrototype - { +/*! + @class MatlabSmoother + @ingroup MueMexClasses + @brief Class that encapsulates Matlab smoothers. + + // This class creates an Matlab preconditioner factory. The factory creates a smoother based on the + // type and ParameterList passed into the constructor. See the constructor for more information. + */ + +template ::scalar_type, + class LocalOrdinal = typename SmootherPrototype::local_ordinal_type, + class GlobalOrdinal = typename SmootherPrototype::global_ordinal_type, + class Node = typename SmootherPrototype::node_type> +class MatlabSmoother : public SmootherPrototype { #undef MUELU_MATLABSMOOTHER_SHORT #include "MueLu_UseShortNames.hpp" - public: + public: + //! @name Constructors / destructors + //@{ + // TODO: update doc for Matlab. + /*! @brief Constructor - //! @name Constructors / destructors - //@{ - //TODO: update doc for Matlab. - /*! @brief Constructor + ADD DOCUMENTATION HERE - ADD DOCUMENTATION HERE - - */ + */ #ifndef _MSC_VER - // Avoid error C3772: invalid friend template declaration - template - friend class MatlabSmoother; + // Avoid error C3772: invalid friend template declaration + template + friend class MatlabSmoother; #endif - MatlabSmoother(const Teuchos::ParameterList& paramList = Teuchos::ParameterList()); + MatlabSmoother(const Teuchos::ParameterList& paramList = Teuchos::ParameterList()); - //! Destructor - virtual ~MatlabSmoother() { } + //! Destructor + virtual ~MatlabSmoother() {} - //@} + //@} - void SetParameterList(const Teuchos::ParameterList& paramList); + void SetParameterList(const Teuchos::ParameterList& paramList); - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level ¤tLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Computational methods. - //@{ + //! @name Computational methods. + //@{ - /*! @brief Set up the smoother. + /*! @brief Set up the smoother. - This creates the underlying Matlab smoother object, copies any parameter list options - supplied to the constructor to the Matlab object, and computes the preconditioner. + This creates the underlying Matlab smoother object, copies any parameter list options + supplied to the constructor to the Matlab object, and computes the preconditioner. - TODO The eigenvalue estimate should come from A_, not the Matlab parameter list. - */ - void Setup(Level ¤tLevel); + TODO The eigenvalue estimate should come from A_, not the Matlab parameter list. + */ + void Setup(Level& currentLevel); - /*! @brief Apply the preconditioner. + /*! @brief Apply the preconditioner. - Solves the linear system AX=B using the constructed smoother. + Solves the linear system AX=B using the constructed smoother. - @param X initial guess - @param B right-hand side - @param InitialGuessIsZero (optional) If false, some work can be avoided. Whether this actually saves any work depends on the underlying Matlab implementation. - */ - void Apply(MultiVector& X, const MultiVector& B, bool InitialGuessIsZero = false) const; + @param X initial guess + @param B right-hand side + @param InitialGuessIsZero (optional) If false, some work can be avoided. Whether this actually saves any work depends on the underlying Matlab implementation. + */ + void Apply(MultiVector& X, const MultiVector& B, bool InitialGuessIsZero = false) const; - //@} + //@} - //! @name Utilities - //@{ + //! @name Utilities + //@{ - RCP Copy() const; + RCP Copy() const; - //@} + //@} - //! Clone the smoother to a different node type - template - RCP > - clone(const RCP& node2, const Teuchos::RCP >& A_newnode) const; + //! Clone the smoother to a different node type + template + RCP > + clone(const RCP& node2, const Teuchos::RCP >& A_newnode) const; - //! @name Overridden from Teuchos::Describable - //@{ + //! @name Overridden from Teuchos::Describable + //@{ - //! Return a simple one-line description of this object. - std::string description() const; + //! Return a simple one-line description of this object. + std::string description() const; - //! Print the object with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const - void print(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const; + //! Print the object with some verbosity level to an FancyOStream object. + // using MueLu::Describable::describe; // overloading, not hiding + // void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const + void print(Teuchos::FancyOStream& out, const VerbLevel verbLevel = Default) const; - size_t getNodeSmootherComplexity() const {return Teuchos::OrdinalTraits::invalid();} + size_t getNodeSmootherComplexity() const { return Teuchos::OrdinalTraits::invalid(); } + //@} - //@} + private: + //! List of arguments to the MATLAB setup function besides "A", in order + mutable std::string needsSetup_; - private: + //! Amount of solve data (besides A, LHS & RHS) + size_t solveDataSize_; - //! List of arguments to the MATLAB setup function besides "A", in order - mutable std::string needsSetup_; + //! List of data generated by setup which will be sent to solve after "A", "LHS" and "RHS" + std::vector > solveData_; - //! Amount of solve data (besides A, LHS & RHS) - size_t solveDataSize_; + //! Matlab setup function + std::string setupFunction_; - //! List of data generated by setup which will be sent to solve after "A", "LHS" and "RHS" - std::vector > solveData_; - - //! Matlab setup function - std::string setupFunction_; + //! Matlab solve function + std::string solveFunction_; - //! Matlab solve function - std::string solveFunction_; + //! Matrix, (maybe) used in apply + mutable RCP A_; - //! Matrix, (maybe) used in apply - mutable RCP A_; - - }; // class MatlabSmoother +}; // class MatlabSmoother - template - template - RCP > - MatlabSmoother::clone(const RCP& node2, const RCP >& A_newnode) const { - const ParameterList& paramList = this->GetParameterList(); +template +template +RCP > +MatlabSmoother::clone(const RCP& node2, const RCP >& A_newnode) const { + const ParameterList& paramList = this->GetParameterList(); - RCP > cloneSmoother = + RCP > cloneSmoother = rcp(new MatlabSmoother(paramList)); - cloneSmoother->needsSetup_ = needsSetup_; - cloneSmoother->setupFunction_ = setupFunction_; - cloneSmoother->solveFunction_ = solveFunction_; - cloneSmoother->A_ = A_; - - for(size_t i=0; i< solveData_.size(); i++) - cloneSmoother->solveData_->push_back(solveData_[i]); - cloneSmoother->SetParameterList(paramList); - cloneSmoother->IsSetup(this->IsSetup()); - return cloneSmoother; - } + cloneSmoother->needsSetup_ = needsSetup_; + cloneSmoother->setupFunction_ = setupFunction_; + cloneSmoother->solveFunction_ = solveFunction_; + cloneSmoother->A_ = A_; + for (size_t i = 0; i < solveData_.size(); i++) + cloneSmoother->solveData_->push_back(solveData_[i]); + cloneSmoother->SetParameterList(paramList); + cloneSmoother->IsSetup(this->IsSetup()); + return cloneSmoother; +} -} // namespace MueLu +} // namespace MueLu #define MUELU_MATLABSMOOTHER_SHORT -#endif // HAVE_MUELU_MATLAB -#endif // MUELU_MATLABSMOOTHER_DECL_HPP +#endif // HAVE_MUELU_MATLAB +#endif // MUELU_MATLABSMOOTHER_DECL_HPP diff --git a/packages/muelu/matlab/src/MueLu_MatlabSmoother_def.hpp b/packages/muelu/matlab/src/MueLu_MatlabSmoother_def.hpp index 0e5a0cbf8fa1..d5d99a41891d 100644 --- a/packages/muelu/matlab/src/MueLu_MatlabSmoother_def.hpp +++ b/packages/muelu/matlab/src/MueLu_MatlabSmoother_def.hpp @@ -51,125 +51,116 @@ #if defined(HAVE_MUELU_MATLAB) #include "MueLu_Monitor.hpp" - namespace MueLu { - template - MatlabSmoother::MatlabSmoother(const Teuchos::ParameterList& paramList) - { - SetParameterList(paramList); - } +template +MatlabSmoother::MatlabSmoother(const Teuchos::ParameterList& paramList) { + SetParameterList(paramList); +} - template - void MatlabSmoother::SetParameterList(const Teuchos::ParameterList& paramList) - { - Factory::SetParameterList(paramList); - ParameterList& pL = const_cast(this->GetParameterList()); - setupFunction_ = pL.get("Setup Function",""); - solveFunction_ = pL.get("Solve Function",""); - solveDataSize_ = pL.get("Number of Solver Args", 0); - } +template +void MatlabSmoother::SetParameterList(const Teuchos::ParameterList& paramList) { + Factory::SetParameterList(paramList); + ParameterList& pL = const_cast(this->GetParameterList()); + setupFunction_ = pL.get("Setup Function", ""); + solveFunction_ = pL.get("Solve Function", ""); + solveDataSize_ = pL.get("Number of Solver Args", 0); +} - template - void MatlabSmoother::DeclareInput(Level ¤tLevel) const - { - using namespace std; - this->Input(currentLevel, "A"); - ParameterList& pL = const_cast(this->GetParameterList()); - needsSetup_ = pL.get("Needs"); - vector needsList = tokenizeList(needsSetup_); - for(size_t i = 0; i < needsList.size(); i++) - { - if(!IsParamMuemexVariable(needsList[i]) && needsList[i] != "Level") - this->Input(currentLevel, needsList[i]); - } +template +void MatlabSmoother::DeclareInput(Level& currentLevel) const { + using namespace std; + this->Input(currentLevel, "A"); + ParameterList& pL = const_cast(this->GetParameterList()); + needsSetup_ = pL.get("Needs"); + vector needsList = tokenizeList(needsSetup_); + for (size_t i = 0; i < needsList.size(); i++) { + if (!IsParamMuemexVariable(needsList[i]) && needsList[i] != "Level") + this->Input(currentLevel, needsList[i]); } +} - template - void MatlabSmoother::Setup(Level& currentLevel) - { - using namespace std; - FactoryMonitor m(*this, "Setup Smoother", currentLevel); - if (this->IsSetup() == true) - this->GetOStream(Warnings0) << "MueLu::MatlabSmoother::Setup(): Setup() has already been called"; - vector> InputArgs = processNeeds(this, needsSetup_, currentLevel); - A_ = Factory::Get>(currentLevel, "A"); - RCP AmatArg = rcp_implicit_cast(rcp(new MuemexData>(A_))); - //Always add A to the beginning of InputArgs - InputArgs.insert(InputArgs.begin(), AmatArg); - // Call mex function - if(!setupFunction_.length()) - throw runtime_error("Invalid matlab function name"); - solveData_= callMatlab(setupFunction_, solveDataSize_, InputArgs); - this->GetOStream(Statistics1) << description() << endl; - this->IsSetup(true); //mark the smoother as set up - } +template +void MatlabSmoother::Setup(Level& currentLevel) { + using namespace std; + FactoryMonitor m(*this, "Setup Smoother", currentLevel); + if (this->IsSetup() == true) + this->GetOStream(Warnings0) << "MueLu::MatlabSmoother::Setup(): Setup() has already been called"; + vector> InputArgs = processNeeds(this, needsSetup_, currentLevel); + A_ = Factory::Get>(currentLevel, "A"); + RCP AmatArg = rcp_implicit_cast(rcp(new MuemexData>(A_))); + // Always add A to the beginning of InputArgs + InputArgs.insert(InputArgs.begin(), AmatArg); + // Call mex function + if (!setupFunction_.length()) + throw runtime_error("Invalid matlab function name"); + solveData_ = callMatlab(setupFunction_, solveDataSize_, InputArgs); + this->GetOStream(Statistics1) << description() << endl; + this->IsSetup(true); // mark the smoother as set up +} - template - void MatlabSmoother::Apply(MultiVector& X, const MultiVector& B, bool InitialGuessIsZero) const - { - TEUCHOS_TEST_FOR_EXCEPTION(SmootherPrototype::IsSetup() == false, Exceptions::RuntimeError, - "MueLu::MatlabSmoother::Apply(): Setup() has not been called"); - using namespace Teuchos; - using namespace std; - if(InitialGuessIsZero) - X.putScalar(0.0); - // Push on A as first input - vector> InputArgs; - InputArgs.push_back(rcp(new MuemexData>(A_))); - // Push on LHS & RHS - RCP Xrcp(&X, false); - MultiVector* BPtrNonConst = (MultiVector*) &B; - RCP Brcp = rcp(BPtrNonConst, false); - RCP>> XData = rcp(new MuemexData>(Xrcp)); - RCP>> BData = rcp(new MuemexData>(Brcp)); - InputArgs.push_back(XData); - InputArgs.push_back(BData); - for(size_t i = 0; i < solveData_.size(); i++) - InputArgs.push_back(solveData_[i]); - if(!solveFunction_.length()) throw std::runtime_error("Invalid matlab function name"); - vector> mexOutput = callMatlab(solveFunction_, 1, InputArgs); - RCP>> mydata = Teuchos::rcp_static_cast>>(mexOutput[0]); - X = *(mydata->getData()); - } +template +void MatlabSmoother::Apply(MultiVector& X, const MultiVector& B, bool InitialGuessIsZero) const { + TEUCHOS_TEST_FOR_EXCEPTION(SmootherPrototype::IsSetup() == false, Exceptions::RuntimeError, + "MueLu::MatlabSmoother::Apply(): Setup() has not been called"); + using namespace Teuchos; + using namespace std; + if (InitialGuessIsZero) + X.putScalar(0.0); + // Push on A as first input + vector> InputArgs; + InputArgs.push_back(rcp(new MuemexData>(A_))); + // Push on LHS & RHS + RCP Xrcp(&X, false); + MultiVector* BPtrNonConst = (MultiVector*)&B; + RCP Brcp = rcp(BPtrNonConst, false); + RCP>> XData = rcp(new MuemexData>(Xrcp)); + RCP>> BData = rcp(new MuemexData>(Brcp)); + InputArgs.push_back(XData); + InputArgs.push_back(BData); + for (size_t i = 0; i < solveData_.size(); i++) + InputArgs.push_back(solveData_[i]); + if (!solveFunction_.length()) throw std::runtime_error("Invalid matlab function name"); + vector> mexOutput = callMatlab(solveFunction_, 1, InputArgs); + RCP>> mydata = Teuchos::rcp_static_cast>>(mexOutput[0]); + X = *(mydata->getData()); +} - template - RCP> MatlabSmoother::Copy() const - { - RCP smoother = rcp(new MatlabSmoother(*this) ); - smoother->SetParameterList(this->GetParameterList()); - return smoother; - } +template +RCP> MatlabSmoother::Copy() const { + RCP smoother = rcp(new MatlabSmoother(*this)); + smoother->SetParameterList(this->GetParameterList()); + return smoother; +} - template - std::string MatlabSmoother::description() const { - std::ostringstream out; - if (SmootherPrototype::IsSetup()) { - out << "Matlab Smoother("< +std::string MatlabSmoother::description() const { + std::ostringstream out; + if (SmootherPrototype::IsSetup()) { + out << "Matlab Smoother(" << setupFunction_ << "/" << solveFunction_ << ")"; + } else { + out << SmootherPrototype::description(); } + return out.str(); +} - template - void MatlabSmoother::print(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { - MUELU_DESCRIBE; - - if (verbLevel & Parameters0) - out << "Matlab Smoother("< +void MatlabSmoother::print(Teuchos::FancyOStream& out, const VerbLevel verbLevel) const { + MUELU_DESCRIBE; - if (verbLevel & Parameters1) { - out0 << "Parameter list: " << std::endl; - Teuchos::OSTab tab2(out); - out << this->GetParameterList(); - } + if (verbLevel & Parameters0) + out << "Matlab Smoother(" << setupFunction_ << "/" << solveFunction_ << ")"; - if (verbLevel & Debug) { - out0 << "IsSetup: " << Teuchos::toString(SmootherPrototype::IsSetup()) << std::endl; - } + if (verbLevel & Parameters1) { + out0 << "Parameter list: " << std::endl; + Teuchos::OSTab tab2(out); + out << this->GetParameterList(); } + if (verbLevel & Debug) { + out0 << "IsSetup: " << Teuchos::toString(SmootherPrototype::IsSetup()) << std::endl; + } +} // Dummy specializations for GO = long long /*template <> @@ -190,8 +181,7 @@ void MatlabSmoother,int,long long>::Apply(MultiVector& X, c throw std::runtime_error("MatlabSmoother does not support GlobalOrdinal == long long."); }*/ +} // namespace MueLu -} // namespace MueLu - -#endif // HAVE_MUELU_MATLAB -#endif // MUELU_MATLABSMOOTHER_DEF_HPP +#endif // HAVE_MUELU_MATLAB +#endif // MUELU_MATLABSMOOTHER_DEF_HPP diff --git a/packages/muelu/matlab/src/MueLu_MatlabUtils.cpp b/packages/muelu/matlab/src/MueLu_MatlabUtils.cpp index 4824c411dd27..bbd4a2d38615 100644 --- a/packages/muelu/matlab/src/MueLu_MatlabUtils.cpp +++ b/packages/muelu/matlab/src/MueLu_MatlabUtils.cpp @@ -50,7 +50,7 @@ #else /* Stuff for MATLAB R2006b vs. previous versions */ -#if(defined(MX_API_VER) && MX_API_VER >= 0x07030000) +#if (defined(MX_API_VER) && MX_API_VER >= 0x07030000) #else typedef int mwIndex; #endif @@ -61,33 +61,32 @@ using namespace Teuchos; namespace MueLu { /* Explicit instantiation of MuemexData variants */ - template class MuemexData > >; - template class MuemexData > >; - template class MuemexData > >; - template class MuemexData > >; - template class MuemexData>; - template class MuemexData>; - template class MuemexData; - template class MuemexData; - template class MuemexData; - template class MuemexData; - template class MuemexData; - template class MuemexData > >; - template class MuemexData > >; - template class MuemexData >; - template class MuemexData > >; - template class MuemexData > >; - template class MuemexData>>; +template class MuemexData > >; +template class MuemexData > >; +template class MuemexData > >; +template class MuemexData > >; +template class MuemexData >; +template class MuemexData >; +template class MuemexData; +template class MuemexData; +template class MuemexData; +template class MuemexData; +template class MuemexData; +template class MuemexData > >; +template class MuemexData > >; +template class MuemexData >; +template class MuemexData > >; +template class MuemexData > >; +template class MuemexData > >; -//Flag set to true if MATLAB's CSC matrix index type is not int (usually false) +// Flag set to true if MATLAB's CSC matrix index type is not int (usually false) bool rewrap_ints = sizeof(int) != sizeof(mwIndex); -int* mwIndex_to_int(int N, mwIndex* mwi_array) -{ - //int* rv = (int*) malloc(N * sizeof(int)); - int* rv = new int[N]; // not really better but may avoid confusion for valgrind - for(int i = 0; i < N; i++) - rv[i] = (int) mwi_array[i]; +int* mwIndex_to_int(int N, mwIndex* mwi_array) { + // int* rv = (int*) malloc(N * sizeof(int)); + int* rv = new int[N]; // not really better but may avoid confusion for valgrind + for (int i = 0; i < N; i++) + rv[i] = (int)mwi_array[i]; return rv; } @@ -95,27 +94,26 @@ int* mwIndex_to_int(int N, mwIndex* mwi_array) /* Specializations */ /* ******************************* */ -template<> mxArray* createMatlabSparse(int numRows, int numCols, int nnz) -{ +template <> +mxArray* createMatlabSparse(int numRows, int numCols, int nnz) { return mxCreateSparse(numRows, numCols, nnz, mxREAL); } -template<> mxArray* createMatlabSparse(int numRows, int numCols, int nnz) -{ +template <> +mxArray* createMatlabSparse(int numRows, int numCols, int nnz) { return mxCreateSparse(numRows, numCols, nnz, mxCOMPLEX); } -template<> void fillMatlabArray(double* array, const mxArray* mxa, int n) -{ +template <> +void fillMatlabArray(double* array, const mxArray* mxa, int n) { memcpy(mxGetPr(mxa), array, n * sizeof(double)); } -template<> void fillMatlabArray(complex_t* array, const mxArray* mxa, int n) -{ +template <> +void fillMatlabArray(complex_t* array, const mxArray* mxa, int n) { double* pr = mxGetPr(mxa); double* pi = mxGetPi(mxa); - for(int i = 0; i < n; i++) - { + for (int i = 0; i < n; i++) { pr[i] = std::real(array[i]); pi[i] = std::imag(array[i]); } @@ -125,26 +123,21 @@ template<> void fillMatlabArray(complex_t* array, const mxArray* mxa, /* Callback Functions */ /******************************/ -void callMatlabNoArgs(std::string function) -{ +void callMatlabNoArgs(std::string function) { int result = mexEvalString(function.c_str()); - if(result != 0) - mexPrintf("An error occurred while running a MATLAB command.");\ + if (result != 0) + mexPrintf("An error occurred while running a MATLAB command."); } -std::vector> callMatlab(std::string function, int numOutputs, std::vector> args) -{ +std::vector > callMatlab(std::string function, int numOutputs, std::vector > args) { using Teuchos::rcp_static_cast; - mxArray** matlabArgs = new mxArray* [args.size()]; - mxArray** matlabOutput = new mxArray* [numOutputs]; - std::vector> output; + mxArray** matlabArgs = new mxArray*[args.size()]; + mxArray** matlabOutput = new mxArray*[numOutputs]; + std::vector > output; - for(int i = 0; i < int(args.size()); i++) - { - try - { - switch(args[i]->type) - { + for (int i = 0; i < int(args.size()); i++) { + try { + switch (args[i]->type) { case BOOL: matlabArgs[i] = rcp_static_cast, MuemexArg>(args[i])->convertToMatlab(); break; @@ -161,77 +154,71 @@ std::vector> callMatlab(std::string function, int numOutputs, std matlabArgs[i] = rcp_static_cast, MuemexArg>(args[i])->convertToMatlab(); break; case XPETRA_MAP: - matlabArgs[i] = rcp_static_cast>, MuemexArg>(args[i])->convertToMatlab(); + matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); break; case XPETRA_ORDINAL_VECTOR: - matlabArgs[i] = rcp_static_cast>, MuemexArg>(args[i])->convertToMatlab(); + matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); break; case TPETRA_MULTIVECTOR_DOUBLE: - matlabArgs[i] = rcp_static_cast>>, MuemexArg>(args[i])->convertToMatlab(); + matlabArgs[i] = rcp_static_cast > >, MuemexArg>(args[i])->convertToMatlab(); break; case TPETRA_MULTIVECTOR_COMPLEX: - matlabArgs[i] = rcp_static_cast>>, MuemexArg>(args[i])->convertToMatlab(); + matlabArgs[i] = rcp_static_cast > >, MuemexArg>(args[i])->convertToMatlab(); break; case TPETRA_MATRIX_DOUBLE: - matlabArgs[i] = rcp_static_cast>>, MuemexArg>(args[i])->convertToMatlab(); + matlabArgs[i] = rcp_static_cast > >, MuemexArg>(args[i])->convertToMatlab(); break; case TPETRA_MATRIX_COMPLEX: - matlabArgs[i] = rcp_static_cast>>, MuemexArg>(args[i])->convertToMatlab(); + matlabArgs[i] = rcp_static_cast > >, MuemexArg>(args[i])->convertToMatlab(); break; case XPETRA_MATRIX_DOUBLE: - matlabArgs[i] = rcp_static_cast>, MuemexArg>(args[i])->convertToMatlab(); + matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); break; case XPETRA_MATRIX_COMPLEX: - matlabArgs[i] = rcp_static_cast>, MuemexArg>(args[i])->convertToMatlab(); + matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); break; case XPETRA_MULTIVECTOR_DOUBLE: - matlabArgs[i] = rcp_static_cast>, MuemexArg>(args[i])->convertToMatlab(); + matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); break; case XPETRA_MULTIVECTOR_COMPLEX: - matlabArgs[i] = rcp_static_cast>, MuemexArg>(args[i])->convertToMatlab(); + matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); break; case EPETRA_CRSMATRIX: - matlabArgs[i] = rcp_static_cast>, MuemexArg>(args[i])->convertToMatlab(); + matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); break; case EPETRA_MULTIVECTOR: - matlabArgs[i] = rcp_static_cast>, MuemexArg>(args[i])->convertToMatlab(); + matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); break; case AGGREGATES: - matlabArgs[i] = rcp_static_cast>, MuemexArg>(args[i])->convertToMatlab(); + matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); break; case AMALGAMATION_INFO: - matlabArgs[i] = rcp_static_cast>, MuemexArg>(args[i])->convertToMatlab(); + matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); break; case GRAPH: - matlabArgs[i] = rcp_static_cast>, MuemexArg>(args[i])->convertToMatlab(); + matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); #ifdef HAVE_MUELU_INTREPID2 case FIELDCONTAINER_ORDINAL: - matlabArgs[i] = rcp_static_cast>, MuemexArg>(args[i])->convertToMatlab(); + matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); break; #endif } - } - catch (std::exception& e) - { + } catch (std::exception& e) { mexPrintf("An error occurred while converting arg #%d to MATLAB:\n", i); std::cout << e.what() << std::endl; mexPrintf("Passing 0 instead.\n"); matlabArgs[i] = mxCreateDoubleScalar(0); } } - //now matlabArgs is populated with MATLAB data types + // now matlabArgs is populated with MATLAB data types int result = mexCallMATLAB(numOutputs, matlabOutput, args.size(), matlabArgs, function.c_str()); - if(result != 0) + if (result != 0) mexPrintf("Matlab encountered an error while running command through muemexCallbacks.\n"); - //now, if all went well, matlabOutput contains all the output to return to user - for(int i = 0; i < numOutputs; i++) - { - try - { + // now, if all went well, matlabOutput contains all the output to return to user + for (int i = 0; i < numOutputs; i++) { + try { output.push_back(convertMatlabVar(matlabOutput[i])); - } - catch(std::exception& e) - { + } catch (std::exception& e) { mexPrintf("An error occurred while converting output #%d from MATLAB:\n", i); std::cout << e.what() << std::endl; } @@ -245,90 +232,83 @@ std::vector> callMatlab(std::string function, int numOutputs, std /* More utility functions */ /******************************/ -template<> mxArray* createMatlabMultiVector(int numRows, int numCols) -{ +template <> +mxArray* createMatlabMultiVector(int numRows, int numCols) { return mxCreateDoubleMatrix(numRows, numCols, mxREAL); } -template<> mxArray* createMatlabMultiVector(int numRows, int numCols) -{ +template <> +mxArray* createMatlabMultiVector(int numRows, int numCols) { return mxCreateDoubleMatrix(numRows, numCols, mxCOMPLEX); } -mxArray* saveAmalInfo(RCP& amalInfo) -{ +mxArray* saveAmalInfo(RCP& amalInfo) { throw runtime_error("AmalgamationInfo not supported in MueMex yet."); return mxCreateDoubleScalar(0); } -bool isValidMatlabAggregates(const mxArray* mxa) -{ +bool isValidMatlabAggregates(const mxArray* mxa) { bool isValidAggregates = true; - if(!mxIsStruct(mxa)) + if (!mxIsStruct(mxa)) return false; - int numFields = mxGetNumberOfFields(mxa); //check that struct has correct # of fields - if(numFields != 5) + int numFields = mxGetNumberOfFields(mxa); // check that struct has correct # of fields + if (numFields != 5) isValidAggregates = false; - if(isValidAggregates) - { + if (isValidAggregates) { const char* mem1 = mxGetFieldNameByNumber(mxa, 0); - if(mem1 == NULL || strcmp(mem1, "nVertices") != 0) + if (mem1 == NULL || strcmp(mem1, "nVertices") != 0) isValidAggregates = false; const char* mem2 = mxGetFieldNameByNumber(mxa, 1); - if(mem2 == NULL || strcmp(mem2, "nAggregates") != 0) + if (mem2 == NULL || strcmp(mem2, "nAggregates") != 0) isValidAggregates = false; const char* mem3 = mxGetFieldNameByNumber(mxa, 2); - if(mem3 == NULL || strcmp(mem3, "vertexToAggID") != 0) + if (mem3 == NULL || strcmp(mem3, "vertexToAggID") != 0) isValidAggregates = false; const char* mem4 = mxGetFieldNameByNumber(mxa, 3); - if(mem3 == NULL || strcmp(mem4, "rootNodes") != 0) + if (mem3 == NULL || strcmp(mem4, "rootNodes") != 0) isValidAggregates = false; const char* mem5 = mxGetFieldNameByNumber(mxa, 4); - if(mem4 == NULL || strcmp(mem5, "aggSizes") != 0) + if (mem4 == NULL || strcmp(mem5, "aggSizes") != 0) isValidAggregates = false; } return isValidAggregates; } -bool isValidMatlabGraph(const mxArray* mxa) -{ +bool isValidMatlabGraph(const mxArray* mxa) { bool isValidGraph = true; - if(!mxIsStruct(mxa)) + if (!mxIsStruct(mxa)) return false; - int numFields = mxGetNumberOfFields(mxa); //check that struct has correct # of fields - if(numFields != 2) + int numFields = mxGetNumberOfFields(mxa); // check that struct has correct # of fields + if (numFields != 2) isValidGraph = false; - if(isValidGraph) - { + if (isValidGraph) { const char* mem1 = mxGetFieldNameByNumber(mxa, 0); - if(mem1 == NULL || strcmp(mem1, "edges") != 0) + if (mem1 == NULL || strcmp(mem1, "edges") != 0) isValidGraph = false; const char* mem2 = mxGetFieldNameByNumber(mxa, 1); - if(mem2 == NULL || strcmp(mem2, "boundaryNodes") != 0) - isValidGraph = false; + if (mem2 == NULL || strcmp(mem2, "boundaryNodes") != 0) + isValidGraph = false; } return isValidGraph; } -std::vector tokenizeList(const std::string& params) -{ +std::vector tokenizeList(const std::string& params) { using namespace std; vector rlist; const char* delims = ","; - char* copy = (char*) malloc(params.length() + 1); + char* copy = (char*)malloc(params.length() + 1); strcpy(copy, params.c_str()); - char* mark = (char*) strtok(copy, delims); - while(mark != NULL) - { - //Remove leading and trailing whitespace in token + char* mark = (char*)strtok(copy, delims); + while (mark != NULL) { + // Remove leading and trailing whitespace in token char* tail = mark + strlen(mark) - 1; - while(*mark == ' ') + while (*mark == ' ') mark++; - while(*tail == ' ' && tail > mark) + while (*tail == ' ' && tail > mark) tail--; tail++; *tail = 0; - string tok(mark); //copies the characters to string object + string tok(mark); // copies the characters to string object rlist.push_back(tok); mark = strtok(NULL, delims); } @@ -336,84 +316,76 @@ std::vector tokenizeList(const std::string& params) return rlist; } -Teuchos::RCP getInputParamList() -{ +Teuchos::RCP getInputParamList() { using namespace Teuchos; RCP validParamList = rcp(new ParameterList()); - validParamList->set>("A", Teuchos::null, "Factory for the matrix A."); - validParamList->set>("P", Teuchos::null, "Factory for the prolongator."); - validParamList->set>("R", Teuchos::null, "Factory for the restrictor."); - validParamList->set>("Ptent", Teuchos::null, "Factory for the tentative (unsmoothed) prolongator."); - validParamList->set>("Coordinates", Teuchos::null, "Factory for the node coordinates."); - validParamList->set>("Nullspace", Teuchos::null, "Factory for the nullspace."); - validParamList->set>("Aggregates", Teuchos::null, "Factory for the aggregates."); - validParamList->set>("UnamalgamationInfo", Teuchos::null, "Factory for amalgamation."); + validParamList->set >("A", Teuchos::null, "Factory for the matrix A."); + validParamList->set >("P", Teuchos::null, "Factory for the prolongator."); + validParamList->set >("R", Teuchos::null, "Factory for the restrictor."); + validParamList->set >("Ptent", Teuchos::null, "Factory for the tentative (unsmoothed) prolongator."); + validParamList->set >("Coordinates", Teuchos::null, "Factory for the node coordinates."); + validParamList->set >("Nullspace", Teuchos::null, "Factory for the nullspace."); + validParamList->set >("Aggregates", Teuchos::null, "Factory for the aggregates."); + validParamList->set >("UnamalgamationInfo", Teuchos::null, "Factory for amalgamation."); #ifdef HAVE_MUELU_INTREPID2 - validParamList->set>("pcoarsen: element to node map", Teuchos::null, "Generating factory of the element to node map"); + validParamList->set >("pcoarsen: element to node map", Teuchos::null, "Generating factory of the element to node map"); #endif return validParamList; } -Teuchos::RCP convertMatlabVar(const mxArray* mxa) -{ - switch(mxGetClassID(mxa)) - { +Teuchos::RCP convertMatlabVar(const mxArray* mxa) { + switch (mxGetClassID(mxa)) { case mxCHAR_CLASS: - //string + // string return rcp_implicit_cast(rcp(new MuemexData(mxa))); break; case mxLOGICAL_CLASS: - //boolean + // boolean return rcp_implicit_cast(rcp(new MuemexData(mxa))); break; case mxINT32_CLASS: - if(mxGetM(mxa) == 1 && mxGetN(mxa) == 1) - //individual integer + if (mxGetM(mxa) == 1 && mxGetN(mxa) == 1) + // individual integer return rcp_implicit_cast(rcp(new MuemexData(mxa))); - else if(mxGetM(mxa) != 1 || mxGetN(mxa) != 1) - //ordinal vector - return rcp_implicit_cast(rcp(new MuemexData>(mxa))); + else if (mxGetM(mxa) != 1 || mxGetN(mxa) != 1) + // ordinal vector + return rcp_implicit_cast(rcp(new MuemexData >(mxa))); else throw std::runtime_error("Error: Don't know what to do with integer array.\n"); break; case mxDOUBLE_CLASS: - if(mxGetM(mxa) == 1 && mxGetN(mxa) == 1) - { - if(mxIsComplex(mxa)) - //single double (scalar, real) + if (mxGetM(mxa) == 1 && mxGetN(mxa) == 1) { + if (mxIsComplex(mxa)) + // single double (scalar, real) return rcp_implicit_cast(rcp(new MuemexData(mxa))); else - //single complex scalar + // single complex scalar return rcp_implicit_cast(rcp(new MuemexData(mxa))); - } - else if(mxIsSparse(mxa)) //use a CRS matrix + } else if (mxIsSparse(mxa)) // use a CRS matrix { - //Default to Tpetra matrix for this - if(mxIsComplex(mxa)) - //complex matrix - return rcp_implicit_cast(rcp(new MuemexData>(mxa))); + // Default to Tpetra matrix for this + if (mxIsComplex(mxa)) + // complex matrix + return rcp_implicit_cast(rcp(new MuemexData >(mxa))); else - //real-valued matrix - return rcp_implicit_cast(rcp(new MuemexData>(mxa))); - } - else - { - //Default to Xpetra multivector for this case - if(mxIsComplex(mxa)) - return rcp_implicit_cast(rcp(new MuemexData>>(mxa))); + // real-valued matrix + return rcp_implicit_cast(rcp(new MuemexData >(mxa))); + } else { + // Default to Xpetra multivector for this case + if (mxIsComplex(mxa)) + return rcp_implicit_cast(rcp(new MuemexData > >(mxa))); else - return rcp_implicit_cast(rcp(new MuemexData>>(mxa))); + return rcp_implicit_cast(rcp(new MuemexData > >(mxa))); } break; - case mxSTRUCT_CLASS: - { - //the only thing that should get here currently is an Aggregates struct or Graph struct - //verify that it has the correct fields with the correct types - //also assume that aggregates data will not be stored in an array of more than 1 element. - if(isValidMatlabAggregates(mxa)) { - return rcp_implicit_cast(rcp(new MuemexData>(mxa))); - } else if(isValidMatlabGraph(mxa)) { - return rcp_implicit_cast(rcp(new MuemexData>(mxa))); + case mxSTRUCT_CLASS: { + // the only thing that should get here currently is an Aggregates struct or Graph struct + // verify that it has the correct fields with the correct types + // also assume that aggregates data will not be stored in an array of more than 1 element. + if (isValidMatlabAggregates(mxa)) { + return rcp_implicit_cast(rcp(new MuemexData >(mxa))); + } else if (isValidMatlabGraph(mxa)) { + return rcp_implicit_cast(rcp(new MuemexData >(mxa))); } else { throw runtime_error("Invalid aggregates or graph struct passed in from MATLAB."); return Teuchos::null; @@ -435,19 +407,19 @@ template int loadDataFromMatlab(const mxArray* mxa); template double loadDataFromMatlab(const mxArray* mxa); template complex_t loadDataFromMatlab(const mxArray* mxa); template string loadDataFromMatlab(const mxArray* mxa); -template RCP loadDataFromMatlab>(const mxArray* mxa); -template RCP loadDataFromMatlab>(const mxArray* mxa); -template RCP loadDataFromMatlab>(const mxArray* mxa); -template RCP loadDataFromMatlab>(const mxArray* mxa); -template RCP loadDataFromMatlab>(const mxArray* mxa); -template RCP loadDataFromMatlab>(const mxArray* mxa); -template RCP loadDataFromMatlab>(const mxArray* mxa); -template RCP loadDataFromMatlab>(const mxArray* mxa); -template RCP loadDataFromMatlab>(const mxArray* mxa); -template RCP loadDataFromMatlab>(const mxArray* mxa); -template RCP loadDataFromMatlab>(const mxArray* mxa); -template RCP loadDataFromMatlab>(const mxArray* mxa); -template RCP loadDataFromMatlab>(const mxArray* mxa); +template RCP loadDataFromMatlab >(const mxArray* mxa); +template RCP loadDataFromMatlab >(const mxArray* mxa); +template RCP loadDataFromMatlab >(const mxArray* mxa); +template RCP loadDataFromMatlab >(const mxArray* mxa); +template RCP loadDataFromMatlab >(const mxArray* mxa); +template RCP loadDataFromMatlab >(const mxArray* mxa); +template RCP loadDataFromMatlab >(const mxArray* mxa); +template RCP loadDataFromMatlab >(const mxArray* mxa); +template RCP loadDataFromMatlab >(const mxArray* mxa); +template RCP loadDataFromMatlab >(const mxArray* mxa); +template RCP loadDataFromMatlab >(const mxArray* mxa); +template RCP loadDataFromMatlab >(const mxArray* mxa); +template RCP loadDataFromMatlab >(const mxArray* mxa); template mxArray* saveDataToMatlab(bool& data); template mxArray* saveDataToMatlab(int& data); @@ -468,10 +440,10 @@ template mxArray* saveDataToMatlab(RCP& data); template mxArray* saveDataToMatlab(RCP& data); template mxArray* saveDataToMatlab(RCP& data); -template vector> processNeeds(const Factory* factory, string& needsParam, Level& lvl); -template vector> processNeeds(const Factory* factory, string& needsParam, Level& lvl); -template void processProvides(vector>& mexOutput, const Factory* factory, string& providesParam, Level& lvl); -template void processProvides(vector>& mexOutput, const Factory* factory, string& providesParam, Level& lvl); +template vector > processNeeds(const Factory* factory, string& needsParam, Level& lvl); +template vector > processNeeds(const Factory* factory, string& needsParam, Level& lvl); +template void processProvides(vector >& mexOutput, const Factory* factory, string& providesParam, Level& lvl); +template void processProvides(vector >& mexOutput, const Factory* factory, string& providesParam, Level& lvl); -}//end namespace -#endif // HAVE_MUELU_MATLAB +} // namespace MueLu +#endif // HAVE_MUELU_MATLAB diff --git a/packages/muelu/matlab/src/MueLu_MatlabUtils_decl.hpp b/packages/muelu/matlab/src/MueLu_MatlabUtils_decl.hpp index f75a89a92d2c..391ccca1acf8 100644 --- a/packages/muelu/matlab/src/MueLu_MatlabUtils_decl.hpp +++ b/packages/muelu/matlab/src/MueLu_MatlabUtils_decl.hpp @@ -77,12 +77,9 @@ #include "Kokkos_DynRankView.hpp" +namespace MueLu { -namespace MueLu -{ - -enum MuemexType -{ +enum MuemexType { INT, BOOL, DOUBLE, @@ -104,12 +101,13 @@ enum MuemexType AMALGAMATION_INFO, GRAPH #ifdef HAVE_MUELU_INTREPID2 -, FIELDCONTAINER_ORDINAL + , + FIELDCONTAINER_ORDINAL #endif }; typedef Tpetra::KokkosCompat::KokkosDeviceWrapperNode mm_node_t; -typedef typename Tpetra::Map<>::local_ordinal_type mm_LocalOrd; //these are used for LocalOrdinal and GlobalOrdinal of all xpetra/tpetra templated types +typedef typename Tpetra::Map<>::local_ordinal_type mm_LocalOrd; // these are used for LocalOrdinal and GlobalOrdinal of all xpetra/tpetra templated types typedef typename Tpetra::Map<>::global_ordinal_type mm_GlobalOrd; typedef std::complex complex_t; typedef Tpetra::Map<> muemex_map_type; @@ -131,91 +129,93 @@ typedef MueLu::AmalgamationInfo MAmalInfo; typedef MueLu::GraphBase MGraph; #ifdef HAVE_MUELU_INTREPID2 - typedef Kokkos::DynRankView FieldContainer_ordinal; +typedef Kokkos::DynRankView FieldContainer_ordinal; #endif -class MuemexArg -{ - public: - MuemexArg(MuemexType dataType) {type = dataType;} - MuemexType type; +class MuemexArg { + public: + MuemexArg(MuemexType dataType) { type = dataType; } + MuemexType type; }; -template -MuemexType getMuemexType(const T & data); - -template -class MuemexData : public MuemexArg -{ - public: - MuemexData(T& data); //Construct from pre-existing data, to pass to MATLAB. - MuemexData(T& data, MuemexType type); //Construct from pre-existing data, to pass to MATLAB. - MuemexData(const mxArray* mxa); //Construct from MATLAB array, to get from MATLAB. - mxArray* convertToMatlab(); //Create a MATLAB object and copy this data to it - T& getData(); //Set and get methods - void setData(T& data); - private: - T data; +template +MuemexType getMuemexType(const T& data); + +template +class MuemexData : public MuemexArg { + public: + MuemexData(T& data); // Construct from pre-existing data, to pass to MATLAB. + MuemexData(T& data, MuemexType type); // Construct from pre-existing data, to pass to MATLAB. + MuemexData(const mxArray* mxa); // Construct from MATLAB array, to get from MATLAB. + mxArray* convertToMatlab(); // Create a MATLAB object and copy this data to it + T& getData(); // Set and get methods + void setData(T& data); + + private: + T data; }; -template -MuemexType getMuemexType(const T & data); +template +MuemexType getMuemexType(const T& data); -template +template MuemexType getMuemexType(); -template +template T loadDataFromMatlab(const mxArray* mxa); -template +template mxArray* saveDataToMatlab(T& data); -//Add data to level. Set the keep flag on the data to "user-provided" so it's not deleted. -template -void addLevelVariable(const T& data, std::string& name, Level& lvl, const FactoryBase *fact = NoFactory::get()); +// Add data to level. Set the keep flag on the data to "user-provided" so it's not deleted. +template +void addLevelVariable(const T& data, std::string& name, Level& lvl, const FactoryBase* fact = NoFactory::get()); -template +template const T& getLevelVariable(std::string& name, Level& lvl); -//Functions used to put data through matlab factories - first arg is "this" pointer of matlab factory -template -std::vector> processNeeds(const Factory* factory, std::string& needsParam, Level& lvl); +// Functions used to put data through matlab factories - first arg is "this" pointer of matlab factory +template +std::vector > processNeeds(const Factory* factory, std::string& needsParam, Level& lvl); -template -void processProvides(std::vector>& mexOutput, const Factory* factory, std::string& providesParam, Level& lvl); +template +void processProvides(std::vector >& mexOutput, const Factory* factory, std::string& providesParam, Level& lvl); -//create a sparse array in Matlab -template mxArray* createMatlabSparse(int numRows, int numCols, int nnz); -template mxArray* createMatlabMultiVector(int numRows, int numCols); -template void fillMatlabArray(Scalar* array, const mxArray* mxa, int n); +// create a sparse array in Matlab +template +mxArray* createMatlabSparse(int numRows, int numCols, int nnz); +template +mxArray* createMatlabMultiVector(int numRows, int numCols); +template +void fillMatlabArray(Scalar* array, const mxArray* mxa, int n); int* mwIndex_to_int(int N, mwIndex* mwi_array); bool isValidMatlabAggregates(const mxArray* mxa); bool isValidMatlabGraph(const mxArray* mxa); std::vector tokenizeList(const std::string& param); -//The two callback functions that MueLu can call to run anything in MATLAB +// The two callback functions that MueLu can call to run anything in MATLAB void callMatlabNoArgs(std::string function); -std::vector> callMatlab(std::string function, int numOutputs, std::vector> args); +std::vector > callMatlab(std::string function, int numOutputs, std::vector > args); Teuchos::RCP getInputParamList(); Teuchos::RCP convertMatlabVar(const mxArray* mxa); // trim from start -static inline std::string <rim(std::string &s) { +static inline std::string& ltrim(std::string& s) { s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun(std::isspace)))); return s; } // trim from end -static inline std::string &rtrim(std::string &s) { +static inline std::string& rtrim(std::string& s) { s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun(std::isspace))).base(), s.end()); return s; } // trim from both ends -static inline std::string &trim(std::string &s) { +static inline std::string& trim(std::string& s) { return ltrim(rtrim(s)); } -}//end namespace +} // namespace MueLu -#endif //HAVE_MUELU_MATLAB error handler -#endif //MUELU_MATLABUTILS_DECL_HPP guard +#endif // HAVE_MUELU_MATLAB error handler +#endif // MUELU_MATLABUTILS_DECL_HPP guard diff --git a/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp b/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp index 88c542d77ed0..541650def661 100644 --- a/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp +++ b/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp @@ -65,301 +65,316 @@ extern bool rewrap_ints; /* getMuemexType */ /* ******************************* */ -template MuemexType getMuemexType(const T & data) {throw std::runtime_error("Unknown Type");} - -template<> MuemexType getMuemexType(const int & data) {return INT;} -template<> MuemexType getMuemexType() {return INT;} -template<> MuemexType getMuemexType() {return BOOL;} - -template<> MuemexType getMuemexType(const double & data) {return DOUBLE;} -template<> MuemexType getMuemexType() {return DOUBLE;} - -template<> MuemexType getMuemexType(const std::string & data) {return STRING;} -template<> MuemexType getMuemexType() {return STRING;} - -template<> MuemexType getMuemexType(const complex_t& data) {return COMPLEX;} -template<> MuemexType getMuemexType() {return COMPLEX;} - -template<> MuemexType getMuemexType(const RCP & data) {return XPETRA_MAP;} -template<> MuemexType getMuemexType >() {return XPETRA_MAP;} - -template<> MuemexType getMuemexType(const RCP & data) {return XPETRA_ORDINAL_VECTOR;} -template<> MuemexType getMuemexType>() {return XPETRA_ORDINAL_VECTOR;} - -template<> MuemexType getMuemexType(const RCP & data) {return TPETRA_MULTIVECTOR_DOUBLE;} -template<> MuemexType getMuemexType>() {return TPETRA_MULTIVECTOR_DOUBLE;} - -template<> MuemexType getMuemexType(const RCP& data) {return TPETRA_MULTIVECTOR_COMPLEX;} -template<> MuemexType getMuemexType>() {return TPETRA_MULTIVECTOR_COMPLEX;} - -template<> MuemexType getMuemexType(const RCP & data) {return TPETRA_MATRIX_DOUBLE;} -template<> MuemexType getMuemexType>() {return TPETRA_MATRIX_DOUBLE;} - -template<> MuemexType getMuemexType(const RCP & data) {return TPETRA_MATRIX_COMPLEX;} -template<> MuemexType getMuemexType>() {return TPETRA_MATRIX_COMPLEX;} - -template<> MuemexType getMuemexType(const RCP & data) {return XPETRA_MULTIVECTOR_DOUBLE;} -template<> MuemexType getMuemexType>() {return XPETRA_MULTIVECTOR_DOUBLE;} - -template<> MuemexType getMuemexType(const RCP & data) {return XPETRA_MULTIVECTOR_COMPLEX;} -template<> MuemexType getMuemexType>() {return XPETRA_MULTIVECTOR_COMPLEX;} - -template<> MuemexType getMuemexType(const RCP & data) {return XPETRA_MATRIX_DOUBLE;} -template<> MuemexType getMuemexType>() {return XPETRA_MATRIX_DOUBLE;} - -template<> MuemexType getMuemexType(const RCP & data) {return XPETRA_MATRIX_COMPLEX;} -template<> MuemexType getMuemexType>() {return XPETRA_MATRIX_COMPLEX;} - -template<> MuemexType getMuemexType(const RCP & data) {return EPETRA_CRSMATRIX;} -template<> MuemexType getMuemexType>() {return EPETRA_CRSMATRIX;} - -template<> MuemexType getMuemexType(const RCP & data) {return EPETRA_MULTIVECTOR;} -template<> MuemexType getMuemexType>() {return EPETRA_MULTIVECTOR;} - -template<> MuemexType getMuemexType(const RCP& data) {return AGGREGATES;} -template<> MuemexType getMuemexType>() {return AGGREGATES;} - -template<> MuemexType getMuemexType(const RCP& data) {return AMALGAMATION_INFO;} -template<> MuemexType getMuemexType>() {return AMALGAMATION_INFO;} - -template<> MuemexType getMuemexType(const RCP& data) {return GRAPH;} -template<> MuemexType getMuemexType>() {return GRAPH;} +template +MuemexType getMuemexType(const T& data) { throw std::runtime_error("Unknown Type"); } + +template <> +MuemexType getMuemexType(const int& data) { return INT; } +template <> +MuemexType getMuemexType() { return INT; } +template <> +MuemexType getMuemexType() { return BOOL; } + +template <> +MuemexType getMuemexType(const double& data) { return DOUBLE; } +template <> +MuemexType getMuemexType() { return DOUBLE; } + +template <> +MuemexType getMuemexType(const std::string& data) { return STRING; } +template <> +MuemexType getMuemexType() { return STRING; } + +template <> +MuemexType getMuemexType(const complex_t& data) { return COMPLEX; } +template <> +MuemexType getMuemexType() { return COMPLEX; } + +template <> +MuemexType getMuemexType(const RCP& data) { return XPETRA_MAP; } +template <> +MuemexType getMuemexType >() { return XPETRA_MAP; } + +template <> +MuemexType getMuemexType(const RCP& data) { return XPETRA_ORDINAL_VECTOR; } +template <> +MuemexType getMuemexType >() { return XPETRA_ORDINAL_VECTOR; } + +template <> +MuemexType getMuemexType(const RCP& data) { return TPETRA_MULTIVECTOR_DOUBLE; } +template <> +MuemexType getMuemexType >() { return TPETRA_MULTIVECTOR_DOUBLE; } + +template <> +MuemexType getMuemexType(const RCP& data) { return TPETRA_MULTIVECTOR_COMPLEX; } +template <> +MuemexType getMuemexType >() { return TPETRA_MULTIVECTOR_COMPLEX; } + +template <> +MuemexType getMuemexType(const RCP& data) { return TPETRA_MATRIX_DOUBLE; } +template <> +MuemexType getMuemexType >() { return TPETRA_MATRIX_DOUBLE; } + +template <> +MuemexType getMuemexType(const RCP& data) { return TPETRA_MATRIX_COMPLEX; } +template <> +MuemexType getMuemexType >() { return TPETRA_MATRIX_COMPLEX; } + +template <> +MuemexType getMuemexType(const RCP& data) { return XPETRA_MULTIVECTOR_DOUBLE; } +template <> +MuemexType getMuemexType >() { return XPETRA_MULTIVECTOR_DOUBLE; } + +template <> +MuemexType getMuemexType(const RCP& data) { return XPETRA_MULTIVECTOR_COMPLEX; } +template <> +MuemexType getMuemexType >() { return XPETRA_MULTIVECTOR_COMPLEX; } + +template <> +MuemexType getMuemexType(const RCP& data) { return XPETRA_MATRIX_DOUBLE; } +template <> +MuemexType getMuemexType >() { return XPETRA_MATRIX_DOUBLE; } + +template <> +MuemexType getMuemexType(const RCP& data) { return XPETRA_MATRIX_COMPLEX; } +template <> +MuemexType getMuemexType >() { return XPETRA_MATRIX_COMPLEX; } + +template <> +MuemexType getMuemexType(const RCP& data) { return EPETRA_CRSMATRIX; } +template <> +MuemexType getMuemexType >() { return EPETRA_CRSMATRIX; } + +template <> +MuemexType getMuemexType(const RCP& data) { return EPETRA_MULTIVECTOR; } +template <> +MuemexType getMuemexType >() { return EPETRA_MULTIVECTOR; } + +template <> +MuemexType getMuemexType(const RCP& data) { return AGGREGATES; } +template <> +MuemexType getMuemexType >() { return AGGREGATES; } + +template <> +MuemexType getMuemexType(const RCP& data) { return AMALGAMATION_INFO; } +template <> +MuemexType getMuemexType >() { return AMALGAMATION_INFO; } + +template <> +MuemexType getMuemexType(const RCP& data) { return GRAPH; } +template <> +MuemexType getMuemexType >() { return GRAPH; } #ifdef HAVE_MUELU_INTREPID2 -template<> MuemexType getMuemexType(const RCP& data) {return FIELDCONTAINER_ORDINAL;} -template<> MuemexType getMuemexType>() {return FIELDCONTAINER_ORDINAL;} +template <> +MuemexType getMuemexType(const RCP& data) { return FIELDCONTAINER_ORDINAL; } +template <> +MuemexType getMuemexType >() { return FIELDCONTAINER_ORDINAL; } #endif /* "prototypes" for specialized functions used in other specialized functions */ -template<> mxArray* createMatlabSparse(int numRows, int numCols, int nnz); -template<> mxArray* createMatlabSparse(int numRows, int numCols, int nnz); -template<> mxArray* createMatlabMultiVector(int numRows, int numCols); -template<> mxArray* createMatlabMultiVector(int numRows, int numCols); -template<> void fillMatlabArray(double* array, const mxArray* mxa, int n); -template<> void fillMatlabArray(complex_t* array, const mxArray* mxa, int n); -template<> mxArray* saveDataToMatlab(RCP& data); -template<> mxArray* saveDataToMatlab(RCP& data); -template<> mxArray* saveDataToMatlab(RCP& data); -template<> mxArray* saveDataToMatlab(RCP& data); +template <> +mxArray* createMatlabSparse(int numRows, int numCols, int nnz); +template <> +mxArray* createMatlabSparse(int numRows, int numCols, int nnz); +template <> +mxArray* createMatlabMultiVector(int numRows, int numCols); +template <> +mxArray* createMatlabMultiVector(int numRows, int numCols); +template <> +void fillMatlabArray(double* array, const mxArray* mxa, int n); +template <> +void fillMatlabArray(complex_t* array, const mxArray* mxa, int n); +template <> +mxArray* saveDataToMatlab(RCP& data); +template <> +mxArray* saveDataToMatlab(RCP& data); +template <> +mxArray* saveDataToMatlab(RCP& data); +template <> +mxArray* saveDataToMatlab(RCP& data); /* ******************************* */ /* loadDataFromMatlab */ /* ******************************* */ -template<> -int loadDataFromMatlab(const mxArray* mxa) -{ +template <> +int loadDataFromMatlab(const mxArray* mxa) { mxClassID probIDtype = mxGetClassID(mxa); int rv; - if(probIDtype == mxINT32_CLASS) - { - rv = *((int*) mxGetData(mxa)); - } - else if(probIDtype == mxLOGICAL_CLASS) - { - rv = (int) *((bool*) mxGetData(mxa)); - } - else if(probIDtype == mxDOUBLE_CLASS) - { - rv = (int) *((double*) mxGetData(mxa)); - } - else if(probIDtype == mxUINT32_CLASS) - { - rv = (int) *((unsigned int*) mxGetData(mxa)); - } - else - { + if (probIDtype == mxINT32_CLASS) { + rv = *((int*)mxGetData(mxa)); + } else if (probIDtype == mxLOGICAL_CLASS) { + rv = (int)*((bool*)mxGetData(mxa)); + } else if (probIDtype == mxDOUBLE_CLASS) { + rv = (int)*((double*)mxGetData(mxa)); + } else if (probIDtype == mxUINT32_CLASS) { + rv = (int)*((unsigned int*)mxGetData(mxa)); + } else { rv = -1; throw std::runtime_error("Error: Unrecognized numerical type."); } return rv; } -template<> -bool loadDataFromMatlab(const mxArray* mxa) -{ - return *((bool*) mxGetData(mxa)); +template <> +bool loadDataFromMatlab(const mxArray* mxa) { + return *((bool*)mxGetData(mxa)); } -template<> -double loadDataFromMatlab(const mxArray* mxa) -{ - return *((double*) mxGetPr(mxa)); +template <> +double loadDataFromMatlab(const mxArray* mxa) { + return *((double*)mxGetPr(mxa)); } -template<> -complex_t loadDataFromMatlab(const mxArray* mxa) -{ - double realpart = real(*((double*) mxGetPr(mxa))); - double imagpart = imag(*((double*) mxGetPi(mxa))); +template <> +complex_t loadDataFromMatlab(const mxArray* mxa) { + double realpart = real(*((double*)mxGetPr(mxa))); + double imagpart = imag(*((double*)mxGetPi(mxa))); return complex_t(realpart, imagpart); } -template<> -string loadDataFromMatlab(const mxArray* mxa) -{ +template <> +string loadDataFromMatlab(const mxArray* mxa) { string rv = ""; - if (mxGetClassID(mxa) != mxCHAR_CLASS) - { + if (mxGetClassID(mxa) != mxCHAR_CLASS) { throw runtime_error("Can't construct string from anything but a char array."); } rv = string(mxArrayToString(mxa)); return rv; } -template<> -RCP loadDataFromMatlab>(const mxArray* mxa) -{ +template <> +RCP loadDataFromMatlab >(const mxArray* mxa) { RCP > comm = rcp(new Teuchos::SerialComm()); - int nr = mxGetM(mxa); - int nc = mxGetN(mxa); - if(nr != 1) + int nr = mxGetM(mxa); + int nc = mxGetN(mxa); + if (nr != 1) throw std::runtime_error("A Xpetra::Map representation from MATLAB must be a single row vector."); - double* pr = mxGetPr(mxa); + double* pr = mxGetPr(mxa); mm_GlobalOrd numGlobalIndices = nc; std::vector localGIDs(numGlobalIndices); - for(int i = 0; i < int(numGlobalIndices); i++) { + for (int i = 0; i < int(numGlobalIndices); i++) { localGIDs[i] = Teuchos::as(pr[i]); } - const Teuchos::ArrayView localGIDs_view(&localGIDs[0],localGIDs.size()); + const Teuchos::ArrayView localGIDs_view(&localGIDs[0], localGIDs.size()); RCP map = Xpetra::MapFactory::Build( - Xpetra::UseTpetra, - Teuchos::OrdinalTraits::invalid(), - localGIDs_view, - 0, comm); + Xpetra::UseTpetra, + Teuchos::OrdinalTraits::invalid(), + localGIDs_view, + 0, comm); - if(map.is_null()) + if (map.is_null()) throw runtime_error("Failed to create Xpetra::Map."); return map; } -template<> -RCP loadDataFromMatlab>(const mxArray* mxa) -{ +template <> +RCP loadDataFromMatlab >(const mxArray* mxa) { RCP > comm = rcp(new Teuchos::SerialComm()); - if(mxGetN(mxa) != 1 && mxGetM(mxa) != 1) + if (mxGetN(mxa) != 1 && mxGetM(mxa) != 1) throw std::runtime_error("An OrdinalVector from MATLAB must be a single row or column vector."); - mm_GlobalOrd numGlobalIndices = mxGetM(mxa) * mxGetN(mxa); - RCP> map = Xpetra::MapFactory::Build(Xpetra::UseTpetra, numGlobalIndices, 0, comm); - if(mxGetClassID(mxa) != mxINT32_CLASS) + mm_GlobalOrd numGlobalIndices = mxGetM(mxa) * mxGetN(mxa); + RCP > map = Xpetra::MapFactory::Build(Xpetra::UseTpetra, numGlobalIndices, 0, comm); + if (mxGetClassID(mxa) != mxINT32_CLASS) throw std::runtime_error("Can only construct LOVector with int32 data."); - int* array = (int*) mxGetData(mxa); - if(map.is_null()) + int* array = (int*)mxGetData(mxa); + if (map.is_null()) throw runtime_error("Failed to create map for Xpetra ordinal vector."); RCP loVec = Xpetra::VectorFactory::Build(map, false); - if(loVec.is_null()) + if (loVec.is_null()) throw runtime_error("Failed to create ordinal vector with Xpetra::VectorFactory."); - for(int i = 0; i < int(numGlobalIndices); i++) - { + for (int i = 0; i < int(numGlobalIndices); i++) { loVec->replaceGlobalValue(i, 0, array[i]); } return loVec; } -template<> -RCP loadDataFromMatlab>(const mxArray* mxa) -{ - RCP> mv; - try - { - int nr = mxGetM(mxa); - int nc = mxGetN(mxa); - double* pr = mxGetPr(mxa); - RCP> comm = Tpetra::getDefaultComm(); - //numGlobalIndices for map constructor is the number of rows in matrix/vectors, right? - RCP map = rcp(new muemex_map_type(nr, (mm_GlobalOrd) 0, comm)); - //Allocate a new array of complex values to use with the multivector +template <> +RCP loadDataFromMatlab >(const mxArray* mxa) { + RCP > mv; + try { + int nr = mxGetM(mxa); + int nc = mxGetN(mxa); + double* pr = mxGetPr(mxa); + RCP > comm = Tpetra::getDefaultComm(); + // numGlobalIndices for map constructor is the number of rows in matrix/vectors, right? + RCP map = rcp(new muemex_map_type(nr, (mm_GlobalOrd)0, comm)); + // Allocate a new array of complex values to use with the multivector Teuchos::ArrayView arrView(pr, nr * nc); mv = rcp(new Tpetra::MultiVector(map, arrView, size_t(nr), size_t(nc))); - } - catch(std::exception& e) - { + } catch (std::exception& e) { mexPrintf("Error constructing Tpetra MultiVector.\n"); std::cout << e.what() << std::endl; } return mv; } -template<> -RCP loadDataFromMatlab>(const mxArray* mxa) -{ - RCP> mv; - try - { - int nr = mxGetM(mxa); - int nc = mxGetN(mxa); - double* pr = mxGetPr(mxa); - double* pi = mxGetPi(mxa); - RCP> comm = Tpetra::getDefaultComm(); - //numGlobalIndices for map constructor is the number of rows in matrix/vectors, right? - RCP map = rcp(new muemex_map_type(nr, (mm_GlobalOrd) 0, comm)); - //Allocate a new array of complex values to use with the multivector +template <> +RCP loadDataFromMatlab >(const mxArray* mxa) { + RCP > mv; + try { + int nr = mxGetM(mxa); + int nc = mxGetN(mxa); + double* pr = mxGetPr(mxa); + double* pi = mxGetPi(mxa); + RCP > comm = Tpetra::getDefaultComm(); + // numGlobalIndices for map constructor is the number of rows in matrix/vectors, right? + RCP map = rcp(new muemex_map_type(nr, (mm_GlobalOrd)0, comm)); + // Allocate a new array of complex values to use with the multivector complex_t* myArr = new complex_t[nr * nc]; - for(int n = 0; n < nc; n++) - { - for(int m = 0; m < nr; m++) - { + for (int n = 0; n < nc; n++) { + for (int m = 0; m < nr; m++) { myArr[n * nr + m] = complex_t(pr[n * nr + m], pi[n * nr + m]); } } Teuchos::ArrayView arrView(myArr, nr * nc); mv = rcp(new Tpetra::MultiVector(map, arrView, nr, nc)); - } - catch(std::exception& e) - { + } catch (std::exception& e) { mexPrintf("Error constructing Tpetra MultiVector.\n"); std::cout << e.what() << std::endl; } return mv; } -template<> -RCP loadDataFromMatlab>(const mxArray* mxa) -{ +template <> +RCP loadDataFromMatlab >(const mxArray* mxa) { bool success = false; RCP A; int* colptr = NULL; int* rowind = NULL; - try - { - RCP> comm = rcp(new Teuchos::SerialComm()); - //numGlobalIndices is just the number of rows in the matrix - const size_t numGlobalIndices = mxGetM(mxa); - RCP rowMap = rcp(new muemex_map_type(numGlobalIndices, 0, comm)); + try { + RCP > comm = rcp(new Teuchos::SerialComm()); + // numGlobalIndices is just the number of rows in the matrix + const size_t numGlobalIndices = mxGetM(mxa); + RCP rowMap = rcp(new muemex_map_type(numGlobalIndices, 0, comm)); RCP domainMap = rcp(new muemex_map_type(mxGetN(mxa), 0, comm)); - double* valueArray = mxGetPr(mxa); - int nc = mxGetN(mxa); - if(rewrap_ints) - { - //mwIndex_to_int allocates memory so must delete[] later + double* valueArray = mxGetPr(mxa); + int nc = mxGetN(mxa); + if (rewrap_ints) { + // mwIndex_to_int allocates memory so must delete[] later colptr = mwIndex_to_int(nc + 1, mxGetJc(mxa)); rowind = mwIndex_to_int(colptr[nc], mxGetIr(mxa)); + } else { + rowind = (int*)mxGetIr(mxa); + colptr = (int*)mxGetJc(mxa); } - else - { - rowind = (int*) mxGetIr(mxa); - colptr = (int*) mxGetJc(mxa); - } - //Need this to convert CSC colptrs to CRS row counts + // Need this to convert CSC colptrs to CRS row counts Teuchos::Array rowCounts(numGlobalIndices); - for(int i = 0; i < nc; i++) - { - for(int j = colptr[i]; j < colptr[i + 1]; j++) - { + for (int i = 0; i < nc; i++) { + for (int j = colptr[i]; j < colptr[i + 1]; j++) { rowCounts[rowind[j]]++; } } A = rcp(new Tpetra::CrsMatrix(rowMap, rowCounts())); - for(int i = 0; i < nc; i++) - { - for(int j = colptr[i]; j < colptr[i + 1]; j++) - { + for (int i = 0; i < nc; i++) { + for (int j = colptr[i]; j < colptr[i + 1]; j++) { //'array' of 1 element, containing column (in global matrix). Teuchos::ArrayView cols = Teuchos::ArrayView(&i, 1); //'array' of 1 element, containing value @@ -368,175 +383,146 @@ RCP loadDataFromMatlab>(co } } A->fillComplete(domainMap, rowMap); - if(rewrap_ints) - { - delete[] rowind; rowind = NULL; - delete[] colptr; colptr = NULL; + if (rewrap_ints) { + delete[] rowind; + rowind = NULL; + delete[] colptr; + colptr = NULL; } success = true; - } - catch(std::exception& e) - { - if(rewrap_ints) - { - if(rowind!=NULL) delete[] rowind; - if(colptr!=NULL) delete[] colptr; + } catch (std::exception& e) { + if (rewrap_ints) { + if (rowind != NULL) delete[] rowind; + if (colptr != NULL) delete[] colptr; rowind = NULL; colptr = NULL; } mexPrintf("Error while constructing Tpetra matrix:\n"); std::cout << e.what() << std::endl; } - if(!success) + if (!success) mexErrMsgTxt("An error occurred while setting up a Tpetra matrix.\n"); return A; } -template<> -RCP loadDataFromMatlab>(const mxArray* mxa) -{ +template <> +RCP loadDataFromMatlab >(const mxArray* mxa) { RCP A; - //Create a map in order to create the matrix (taken from muelu basic example - complex) - try - { - RCP> comm = Tpetra::getDefaultComm(); + // Create a map in order to create the matrix (taken from muelu basic example - complex) + try { + RCP > comm = Tpetra::getDefaultComm(); const Tpetra::global_size_t numGlobalIndices = mxGetM(mxa); - const mm_GlobalOrd indexBase = 0; - RCP rowMap = rcp(new muemex_map_type(numGlobalIndices, indexBase, comm)); - RCP domainMap = rcp(new muemex_map_type(mxGetN(mxa), indexBase, comm)); - double* realArray = mxGetPr(mxa); - double* imagArray = mxGetPi(mxa); + const mm_GlobalOrd indexBase = 0; + RCP rowMap = rcp(new muemex_map_type(numGlobalIndices, indexBase, comm)); + RCP domainMap = rcp(new muemex_map_type(mxGetN(mxa), indexBase, comm)); + double* realArray = mxGetPr(mxa); + double* imagArray = mxGetPi(mxa); int* colptr; int* rowind; int nc = mxGetN(mxa); - if(rewrap_ints) - { - //mwIndex_to_int allocates memory so must delete[] later + if (rewrap_ints) { + // mwIndex_to_int allocates memory so must delete[] later colptr = mwIndex_to_int(nc + 1, mxGetJc(mxa)); rowind = mwIndex_to_int(colptr[nc], mxGetIr(mxa)); + } else { + rowind = (int*)mxGetIr(mxa); + colptr = (int*)mxGetJc(mxa); } - else - { - rowind = (int*) mxGetIr(mxa); - colptr = (int*) mxGetJc(mxa); - } - //Need this to convert CSC colptrs to CRS row counts + // Need this to convert CSC colptrs to CRS row counts Teuchos::Array rowCounts(numGlobalIndices); - for(int i = 0; i < nc; i++) - { - for(int j = colptr[i]; j < colptr[i + 1]; j++) - { + for (int i = 0; i < nc; i++) { + for (int j = colptr[i]; j < colptr[i + 1]; j++) { rowCounts[rowind[j]]++; } } A = rcp(new Tpetra::CrsMatrix(rowMap, rowCounts())); - for(int i = 0; i < nc; i++) - { - for(int j = colptr[i]; j < colptr[i + 1]; j++) - { - //here assuming that complex_t will always be defined as std::complex - //use 'value' over and over again with Teuchos::ArrayViews to insert into matrix - complex_t value = std::complex(realArray[j], imagArray[j]); + for (int i = 0; i < nc; i++) { + for (int j = colptr[i]; j < colptr[i + 1]; j++) { + // here assuming that complex_t will always be defined as std::complex + // use 'value' over and over again with Teuchos::ArrayViews to insert into matrix + complex_t value = std::complex(realArray[j], imagArray[j]); Teuchos::ArrayView cols = Teuchos::ArrayView(&i, 1); - Teuchos::ArrayView vals = Teuchos::ArrayView(&value, 1); + Teuchos::ArrayView vals = Teuchos::ArrayView(&value, 1); A->insertGlobalValues(rowind[j], cols, vals); } } A->fillComplete(domainMap, rowMap); - if(rewrap_ints) - { + if (rewrap_ints) { delete[] rowind; delete[] colptr; } - } - catch(std::exception& e) - { + } catch (std::exception& e) { mexPrintf("Error while constructing tpetra matrix:\n"); std::cout << e.what() << std::endl; } return A; } -template<> -RCP> loadDataFromMatlab>>(const mxArray* mxa) -{ - RCP> tmat = loadDataFromMatlab>>(mxa); +template <> +RCP > loadDataFromMatlab > >(const mxArray* mxa) { + RCP > tmat = loadDataFromMatlab > >(mxa); return MueLu::TpetraCrs_To_XpetraMatrix(tmat); } -template<> -RCP> loadDataFromMatlab>>(const mxArray* mxa) -{ - RCP> tmat = loadDataFromMatlab>>(mxa); +template <> +RCP > loadDataFromMatlab > >(const mxArray* mxa) { + RCP > tmat = loadDataFromMatlab > >(mxa); return MueLu::TpetraCrs_To_XpetraMatrix(tmat); } -template<> -RCP> loadDataFromMatlab>>(const mxArray* mxa) -{ - RCP> tpetraMV = loadDataFromMatlab>>(mxa); +template <> +RCP > loadDataFromMatlab > >(const mxArray* mxa) { + RCP > tpetraMV = loadDataFromMatlab > >(mxa); return MueLu::TpetraMultiVector_To_XpetraMultiVector(tpetraMV); } -template<> -RCP> loadDataFromMatlab>>(const mxArray* mxa) -{ - RCP> tpetraMV = loadDataFromMatlab>>(mxa); +template <> +RCP > loadDataFromMatlab > >(const mxArray* mxa) { + RCP > tpetraMV = loadDataFromMatlab > >(mxa); return MueLu::TpetraMultiVector_To_XpetraMultiVector(tpetraMV); } -template<> -RCP loadDataFromMatlab>(const mxArray* mxa) -{ +template <> +RCP loadDataFromMatlab >(const mxArray* mxa) { RCP matrix; - try - { + try { int* colptr; int* rowind; double* vals = mxGetPr(mxa); - int nr = mxGetM(mxa); - int nc = mxGetN(mxa); - if(rewrap_ints) - { + int nr = mxGetM(mxa); + int nc = mxGetN(mxa); + if (rewrap_ints) { colptr = mwIndex_to_int(nc + 1, mxGetJc(mxa)); rowind = mwIndex_to_int(colptr[nc], mxGetIr(mxa)); - } - else - { - rowind = (int*) mxGetIr(mxa); - colptr = (int*) mxGetJc(mxa); + } else { + rowind = (int*)mxGetIr(mxa); + colptr = (int*)mxGetJc(mxa); } Epetra_SerialComm Comm; Epetra_Map RangeMap(nr, 0, Comm); Epetra_Map DomainMap(nc, 0, Comm); matrix = rcp(new Epetra_CrsMatrix(Epetra_DataAccess::Copy, RangeMap, DomainMap, 0)); /* Do the matrix assembly */ - for(int i = 0; i < nc; i++) - { - for(int j = colptr[i]; j < colptr[i + 1]; j++) - { - //global row, # of entries, value array, column indices array + for (int i = 0; i < nc; i++) { + for (int j = colptr[i]; j < colptr[i + 1]; j++) { + // global row, # of entries, value array, column indices array matrix->InsertGlobalValues(rowind[j], 1, &vals[j], &i); } } matrix->FillComplete(DomainMap, RangeMap); - if(rewrap_ints) - { - delete [] rowind; - delete [] colptr; + if (rewrap_ints) { + delete[] rowind; + delete[] colptr; } - } - catch(std::exception& e) - { + } catch (std::exception& e) { mexPrintf("An error occurred while setting up an Epetra matrix:\n"); std::cout << e.what() << std::endl; } return matrix; } -template<> -RCP loadDataFromMatlab>(const mxArray* mxa) -{ +template <> +RCP loadDataFromMatlab >(const mxArray* mxa) { int nr = mxGetM(mxa); int nc = mxGetN(mxa); Epetra_SerialComm Comm; @@ -544,133 +530,127 @@ RCP loadDataFromMatlab>(const mxArra return rcp(new Epetra_MultiVector(Epetra_DataAccess::Copy, map, mxGetPr(mxa), nr, nc)); } -template<> -RCP loadDataFromMatlab>(const mxArray* mxa) -{ - if(mxGetNumberOfElements(mxa) != 1) +template <> +RCP loadDataFromMatlab >(const mxArray* mxa) { + if (mxGetNumberOfElements(mxa) != 1) throw runtime_error("Aggregates must be individual structs in MATLAB."); - if(!mxIsStruct(mxa)) + if (!mxIsStruct(mxa)) throw runtime_error("Trying to pull aggregates from non-struct MATLAB object."); - //assume that in matlab aggregate structs will only be stored in a 1x1 array - //mxa must have the same fields as the ones declared in constructAggregates function in muelu.m for this to work - const int correctNumFields = 5; //change if more fields are added to the aggregates representation in constructAggregates in muelu.m - if(mxGetNumberOfFields(mxa) != correctNumFields) + // assume that in matlab aggregate structs will only be stored in a 1x1 array + // mxa must have the same fields as the ones declared in constructAggregates function in muelu.m for this to work + const int correctNumFields = 5; // change if more fields are added to the aggregates representation in constructAggregates in muelu.m + if (mxGetNumberOfFields(mxa) != correctNumFields) throw runtime_error("Aggregates structure has wrong number of fields."); - //Pull MuemexData types back out - int nVert = *(int*) mxGetData(mxGetField(mxa, 0, "nVertices")); - int nAgg = *(int*) mxGetData(mxGetField(mxa, 0, "nAggregates")); - //Now have all the data needed to fully reconstruct the aggregate - //Use similar approach as UserAggregationFactory (which is written for >1 thread but will just be serial here) - RCP> comm = Teuchos::DefaultComm::getComm(); - int myRank = comm->getRank(); - Xpetra::UnderlyingLib lib = Xpetra::UseTpetra; - RCP> map = Xpetra::MapFactory::Build(lib, nVert, 0, comm); - RCP agg = rcp(new MAggregates(map)); + // Pull MuemexData types back out + int nVert = *(int*)mxGetData(mxGetField(mxa, 0, "nVertices")); + int nAgg = *(int*)mxGetData(mxGetField(mxa, 0, "nAggregates")); + // Now have all the data needed to fully reconstruct the aggregate + // Use similar approach as UserAggregationFactory (which is written for >1 thread but will just be serial here) + RCP > comm = Teuchos::DefaultComm::getComm(); + int myRank = comm->getRank(); + Xpetra::UnderlyingLib lib = Xpetra::UseTpetra; + RCP > map = Xpetra::MapFactory::Build(lib, nVert, 0, comm); + RCP agg = rcp(new MAggregates(map)); agg->SetNumAggregates(nAgg); - //Get handles for the vertex2AggId and procwinner arrays in reconstituted aggregates object - //this is serial so all procwinner values will be same (0) - ArrayRCP vertex2AggId = agg->GetVertex2AggId()->getDataNonConst(0); //the '0' means first (and only) column of multivector, since is just vector - ArrayRCP procWinner = agg->GetProcWinner()->getDataNonConst(0); - //mm_LocalOrd and int are equivalent, so is ok to talk about aggSize with just 'int' - //Deep copy the entire vertex2AggID and isRoot arrays, which are both nVert items long - //At the same time, set ProcWinner - mxArray* vertToAggID_in = mxGetField(mxa, 0, "vertexToAggID"); - int* vertToAggID_inArray = (int*) mxGetData(vertToAggID_in); - mxArray* rootNodes_in = mxGetField(mxa, 0, "rootNodes"); - int* rootNodes_inArray = (int*) mxGetData(rootNodes_in); - for(int i = 0; i < nVert; i++) - { + // Get handles for the vertex2AggId and procwinner arrays in reconstituted aggregates object + // this is serial so all procwinner values will be same (0) + ArrayRCP vertex2AggId = agg->GetVertex2AggId()->getDataNonConst(0); // the '0' means first (and only) column of multivector, since is just vector + ArrayRCP procWinner = agg->GetProcWinner()->getDataNonConst(0); + // mm_LocalOrd and int are equivalent, so is ok to talk about aggSize with just 'int' + // Deep copy the entire vertex2AggID and isRoot arrays, which are both nVert items long + // At the same time, set ProcWinner + mxArray* vertToAggID_in = mxGetField(mxa, 0, "vertexToAggID"); + int* vertToAggID_inArray = (int*)mxGetData(vertToAggID_in); + mxArray* rootNodes_in = mxGetField(mxa, 0, "rootNodes"); + int* rootNodes_inArray = (int*)mxGetData(rootNodes_in); + for (int i = 0; i < nVert; i++) { vertex2AggId[i] = vertToAggID_inArray[i]; - procWinner[i] = myRank; //all nodes are going to be on the same proc - agg->SetIsRoot(i, false); //the ones that are root will be set in next loop + procWinner[i] = myRank; // all nodes are going to be on the same proc + agg->SetIsRoot(i, false); // the ones that are root will be set in next loop } - for(int i = 0; i < nAgg; i++) //rootNodesToCopy is an array of node IDs which are the roots of their aggs + for (int i = 0; i < nAgg; i++) // rootNodesToCopy is an array of node IDs which are the roots of their aggs { agg->SetIsRoot(rootNodes_inArray[i], true); } - //Now recompute the aggSize array the results in the object + // Now recompute the aggSize array the results in the object agg->ComputeAggregateSizes(true); agg->AggregatesCrossProcessors(false); return agg; } -template<> -RCP loadDataFromMatlab>(const mxArray* mxa) -{ +template <> +RCP loadDataFromMatlab >(const mxArray* mxa) { RCP amal; throw runtime_error("AmalgamationInfo not supported in Muemex yet."); return amal; } -template<> -RCP loadDataFromMatlab>(const mxArray* mxa) -{ - //mxa must be struct with logical sparse matrix called 'edges' and Nx1 int32 array 'boundaryNodes' - mxArray* edges = mxGetField(mxa, 0, "edges"); +template <> +RCP loadDataFromMatlab >(const mxArray* mxa) { + // mxa must be struct with logical sparse matrix called 'edges' and Nx1 int32 array 'boundaryNodes' + mxArray* edges = mxGetField(mxa, 0, "edges"); mxArray* boundaryNodes = mxGetField(mxa, 0, "boundaryNodes"); - if(edges == NULL) + if (edges == NULL) throw runtime_error("Graph structure in MATLAB must have a field called 'edges' (logical sparse matrix)"); - if(boundaryNodes == NULL) + if (boundaryNodes == NULL) throw runtime_error("Graph structure in MATLAB must have a field called 'boundaryNodes' (int32 array containing list of boundary nodes)"); - int* boundaryList = (int*) mxGetData(boundaryNodes); - if(!mxIsSparse(edges) || mxGetClassID(edges) != mxLOGICAL_CLASS) + int* boundaryList = (int*)mxGetData(boundaryNodes); + if (!mxIsSparse(edges) || mxGetClassID(edges) != mxLOGICAL_CLASS) throw runtime_error("Graph edges must be stored as a logical sparse matrix."); // Note that Matlab stores sparse matrices in column major format. - mwIndex* matlabColPtrs = mxGetJc(edges); + mwIndex* matlabColPtrs = mxGetJc(edges); mwIndex* matlabRowIndices = mxGetIr(edges); - mm_GlobalOrd nRows = (mm_GlobalOrd) mxGetM(edges); + mm_GlobalOrd nRows = (mm_GlobalOrd)mxGetM(edges); // Create and populate row-major CRS data structures for Xpetra::TpetraCrsGraph. // calculate number of nonzeros in each row Teuchos::Array entriesPerRow(nRows); - int nnz = matlabColPtrs[mxGetN(edges)]; //last entry in matlabColPtrs - for(int i = 0; i < nnz; i++) + int nnz = matlabColPtrs[mxGetN(edges)]; // last entry in matlabColPtrs + for (int i = 0; i < nnz; i++) entriesPerRow[matlabRowIndices[i]]++; // Populate usual row index array. We don't need this for the Xpetra Graph ctor, but // it's convenient for building up the column index array, which the ctor does need. - Teuchos::Array rows(nRows+1); + Teuchos::Array rows(nRows + 1); rows[0] = 0; - for(int i = 0; i < nRows; i++) - rows[i+1] = rows[i] + entriesPerRow[i]; - Teuchos::Array cols(nnz); //column index array - Teuchos::Array insertionsPerRow(nRows,0); //track of #insertions done per row + for (int i = 0; i < nRows; i++) + rows[i + 1] = rows[i] + entriesPerRow[i]; + Teuchos::Array cols(nnz); // column index array + Teuchos::Array insertionsPerRow(nRows, 0); // track of #insertions done per row int ncols = mxGetN(edges); - for (int colNum=0; colNum(matlabColPtrs[colNum+1]); ++j) { - int rowNum = matlabRowIndices[j]; - cols[ rows[rowNum] + insertionsPerRow[rowNum] ] = colNum; + for (int j = ci; j < Teuchos::as(matlabColPtrs[colNum + 1]); ++j) { + int rowNum = matlabRowIndices[j]; + cols[rows[rowNum] + insertionsPerRow[rowNum]] = colNum; insertionsPerRow[rowNum]++; } } - //Find maximum + // Find maximum int maxNzPerRow = 0; - for(int i = 0; i < nRows; i++) { - if(maxNzPerRow < entriesPerRow[i]) + for (int i = 0; i < nRows; i++) { + if (maxNzPerRow < entriesPerRow[i]) maxNzPerRow = entriesPerRow[i]; } - RCP> comm = rcp(new Teuchos::SerialComm()); + RCP > comm = rcp(new Teuchos::SerialComm()); typedef Xpetra::TpetraMap MMap; RCP map = rcp(new MMap(nRows, 0, comm)); typedef Xpetra::TpetraCrsGraph TpetraGraph; - RCP tgraph = rcp(new TpetraGraph(map, (size_t) maxNzPerRow)); - //Populate tgraph in compressed-row format. Must get each row individually... - for(int i = 0; i < nRows; ++i) { - tgraph->insertGlobalIndices((mm_GlobalOrd) i, cols(rows[i],entriesPerRow[i])); + RCP tgraph = rcp(new TpetraGraph(map, (size_t)maxNzPerRow)); + // Populate tgraph in compressed-row format. Must get each row individually... + for (int i = 0; i < nRows; ++i) { + tgraph->insertGlobalIndices((mm_GlobalOrd)i, cols(rows[i], entriesPerRow[i])); } tgraph->fillComplete(map, map); RCP mgraph = rcp(new MueLu::Graph(tgraph)); - //Set boundary nodes + // Set boundary nodes int numBoundaryNodes = mxGetNumberOfElements(boundaryNodes); - bool* boundaryFlags = new bool[nRows]; - for(int i = 0; i < nRows; i++) - { + bool* boundaryFlags = new bool[nRows]; + for (int i = 0; i < nRows; i++) { boundaryFlags[i] = false; } - for(int i = 0; i < numBoundaryNodes; i++) - { + for (int i = 0; i < numBoundaryNodes; i++) { boundaryFlags[boundaryList[i]] = true; } ArrayRCP boundaryNodesInput(boundaryFlags, 0, nRows, true); @@ -678,24 +658,20 @@ RCP loadDataFromMatlab>(const mxArray* mxa) return mgraph; } - #ifdef HAVE_MUELU_INTREPID2 -template<> -RCP loadDataFromMatlab>(const mxArray* mxa) -{ - if(mxGetClassID(mxa) != mxINT32_CLASS) +template <> +RCP loadDataFromMatlab >(const mxArray* mxa) { + if (mxGetClassID(mxa) != mxINT32_CLASS) throw runtime_error("FieldContainer must have integer storage entries"); - int *data = (int *) mxGetData(mxa); - int nr = mxGetM(mxa); - int nc = mxGetN(mxa); + int* data = (int*)mxGetData(mxa); + int nr = mxGetM(mxa); + int nc = mxGetN(mxa); - RCP fc = rcp(new FieldContainer_ordinal("FC from Matlab",nr,nc)); - for(int col = 0; col < nc; col++) - { - for(int row = 0; row < nr; row++) - { - (*fc)(row,col) = data[col * nr + row]; + RCP fc = rcp(new FieldContainer_ordinal("FC from Matlab", nr, nc)); + for (int col = 0; col < nc; col++) { + for (int row = 0; row < nr; row++) { + (*fc)(row, col) = data[col * nr + row]; } } return fc; @@ -706,336 +682,301 @@ RCP loadDataFromMatlab>(cons /* saveDataToMatlab */ /* ******************************* */ -template<> -mxArray* saveDataToMatlab(int& data) -{ - mwSize dims[] = {1, 1}; - mxArray* mxa = mxCreateNumericArray(2, dims, mxINT32_CLASS, mxREAL); - *((int*) mxGetData(mxa)) = data; +template <> +mxArray* saveDataToMatlab(int& data) { + mwSize dims[] = {1, 1}; + mxArray* mxa = mxCreateNumericArray(2, dims, mxINT32_CLASS, mxREAL); + *((int*)mxGetData(mxa)) = data; return mxa; } -template<> -mxArray* saveDataToMatlab(bool& data) -{ - mwSize dims[] = {1, 1}; - mxArray* mxa = mxCreateLogicalArray(2, dims); - *((bool*) mxGetData(mxa)) = data; +template <> +mxArray* saveDataToMatlab(bool& data) { + mwSize dims[] = {1, 1}; + mxArray* mxa = mxCreateLogicalArray(2, dims); + *((bool*)mxGetData(mxa)) = data; return mxa; } -template<> -mxArray* saveDataToMatlab(double& data) -{ +template <> +mxArray* saveDataToMatlab(double& data) { return mxCreateDoubleScalar(data); } -template<> -mxArray* saveDataToMatlab(complex_t& data) -{ - mwSize dims[] = {1, 1}; - mxArray* mxa = mxCreateNumericArray(2, dims, mxDOUBLE_CLASS, mxCOMPLEX); - *((double*) mxGetPr(mxa)) = real(data); - *((double*) mxGetPi(mxa)) = imag(data); +template <> +mxArray* saveDataToMatlab(complex_t& data) { + mwSize dims[] = {1, 1}; + mxArray* mxa = mxCreateNumericArray(2, dims, mxDOUBLE_CLASS, mxCOMPLEX); + *((double*)mxGetPr(mxa)) = real(data); + *((double*)mxGetPi(mxa)) = imag(data); return mxa; } -template<> -mxArray* saveDataToMatlab(string& data) -{ +template <> +mxArray* saveDataToMatlab(string& data) { return mxCreateString(data.c_str()); } -template<> -mxArray* saveDataToMatlab(RCP& data) -{ - //Precondition: Memory has already been allocated by MATLAB for the array. - int nc = data->getGlobalNumElements(); - int nr = 1; +template <> +mxArray* saveDataToMatlab(RCP& data) { + // Precondition: Memory has already been allocated by MATLAB for the array. + int nc = data->getGlobalNumElements(); + int nr = 1; mxArray* output = createMatlabMultiVector(nr, nc); - double* array = (double*) malloc(sizeof(double) * nr * nc); - for(int col = 0; col < nc; col++) - { + double* array = (double*)malloc(sizeof(double) * nr * nc); + for (int col = 0; col < nc; col++) { mm_GlobalOrd gid = data->getGlobalElement(col); - array[col] = Teuchos::as(gid); + array[col] = Teuchos::as(gid); } fillMatlabArray(array, output, nc * nr); free(array); return output; } -template<> -mxArray* saveDataToMatlab(RCP& data) -{ +template <> +mxArray* saveDataToMatlab(RCP& data) { mwSize len = data->getGlobalLength(); - //create a single column vector - mwSize dimensions[] = {len, 1}; - mxArray* rv = mxCreateNumericArray(2, dimensions, mxINT32_CLASS, mxREAL); - int* dataPtr = (int*) mxGetData(rv); + // create a single column vector + mwSize dimensions[] = {len, 1}; + mxArray* rv = mxCreateNumericArray(2, dimensions, mxINT32_CLASS, mxREAL); + int* dataPtr = (int*)mxGetData(rv); ArrayRCP arr = data->getData(0); - for(int i = 0; i < int(data->getGlobalLength()); i++) - { + for (int i = 0; i < int(data->getGlobalLength()); i++) { dataPtr[i] = arr[i]; } return rv; } -template<> -mxArray* saveDataToMatlab(RCP>& data) -{ +template <> +mxArray* saveDataToMatlab(RCP >& data) { RCP xmv = MueLu::TpetraMultiVector_To_XpetraMultiVector(data); return saveDataToMatlab(xmv); } -template<> -mxArray* saveDataToMatlab(RCP>& data) -{ +template <> +mxArray* saveDataToMatlab(RCP >& data) { RCP xmv = MueLu::TpetraMultiVector_To_XpetraMultiVector(data); return saveDataToMatlab(xmv); } -template<> -mxArray* saveDataToMatlab(RCP& data) -{ - RCP> xmat = TpetraCrs_To_XpetraMatrix(data); +template <> +mxArray* saveDataToMatlab(RCP& data) { + RCP > xmat = TpetraCrs_To_XpetraMatrix(data); return saveDataToMatlab(xmat); } -template<> -mxArray* saveDataToMatlab(RCP& data) -{ - RCP> xmat = TpetraCrs_To_XpetraMatrix(data); +template <> +mxArray* saveDataToMatlab(RCP& data) { + RCP > xmat = TpetraCrs_To_XpetraMatrix(data); return saveDataToMatlab(xmat); } -template<> -mxArray* saveDataToMatlab(RCP& data) -{ +template <> +mxArray* saveDataToMatlab(RCP& data) { typedef double Scalar; // Compute global constants, if we need them Teuchos::rcp_const_cast(data->getCrsGraph())->computeGlobalConstants(); - int nr = data->getGlobalNumRows(); - int nc = data->getGlobalNumCols(); - int nnz = data->getGlobalNumEntries(); + int nr = data->getGlobalNumRows(); + int nc = data->getGlobalNumCols(); + int nnz = data->getGlobalNumEntries(); #ifdef VERBOSE_OUTPUT RCP fancyStream = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); mat->describe(*fancyStream, Teuchos::VERB_EXTREME); #endif mxArray* mxa = createMatlabSparse(nr, nc, nnz); - mwIndex* ir = mxGetIr(mxa); - mwIndex* jc = mxGetJc(mxa); - for(int i = 0; i < nc + 1; i++) - { + mwIndex* ir = mxGetIr(mxa); + mwIndex* jc = mxGetJc(mxa); + for (int i = 0; i < nc + 1; i++) { jc[i] = 0; } size_t maxEntriesPerRow = data->getGlobalMaxNumRowEntries(); - if(maxEntriesPerRow == Teuchos::OrdinalTraits::invalid() || maxEntriesPerRow == 0) maxEntriesPerRow = data->getLocalMaxNumRowEntries(); + if (maxEntriesPerRow == Teuchos::OrdinalTraits::invalid() || maxEntriesPerRow == 0) maxEntriesPerRow = data->getLocalMaxNumRowEntries(); int* rowProgress = new int[nc]; - //The array that will be copied to Pr and (if complex) Pi later + // The array that will be copied to Pr and (if complex) Pi later Scalar* sparseVals = new Scalar[nnz]; size_t numEntries; - if(data->isLocallyIndexed()) - { + if (data->isLocallyIndexed()) { Scalar* rowValArray = new Scalar[maxEntriesPerRow]; Teuchos::ArrayView rowVals(rowValArray, maxEntriesPerRow); mm_LocalOrd* rowIndicesArray = new mm_LocalOrd[maxEntriesPerRow]; Teuchos::ArrayView rowIndices(rowIndicesArray, maxEntriesPerRow); - for(mm_LocalOrd m = 0; m < nr; m++) //All rows in the Xpetra matrix + for (mm_LocalOrd m = 0; m < nr; m++) // All rows in the Xpetra matrix { - data->getLocalRowCopy(m, rowIndices, rowVals, numEntries); //Get the row - for(mm_LocalOrd entry = 0; entry < int(numEntries); entry++) //All entries in row + data->getLocalRowCopy(m, rowIndices, rowVals, numEntries); // Get the row + for (mm_LocalOrd entry = 0; entry < int(numEntries); entry++) // All entries in row { - jc[rowIndices[entry] + 1]++; //for each entry, increase jc for the entry's column + jc[rowIndices[entry] + 1]++; // for each entry, increase jc for the entry's column } } - //now jc holds the number of elements in each column, but needs cumulative sum over all previous columns also + // now jc holds the number of elements in each column, but needs cumulative sum over all previous columns also int entriesAccum = 0; - for(int n = 0; n <= nc; n++) - { + for (int n = 0; n <= nc; n++) { int temp = entriesAccum; entriesAccum += jc[n]; jc[n] += temp; } - //Jc now populated with colptrs - for(int i = 0; i < nc; i++) - { + // Jc now populated with colptrs + for (int i = 0; i < nc; i++) { rowProgress[i] = 0; } - //Row progress values like jc but keep track as the MATLAB matrix is being filled in - for(mm_LocalOrd m = 0; m < nr; m++) //rows + // Row progress values like jc but keep track as the MATLAB matrix is being filled in + for (mm_LocalOrd m = 0; m < nr; m++) // rows { data->getLocalRowCopy(m, rowIndices, rowVals, numEntries); - for(mm_LocalOrd i = 0; i < int(numEntries); i++) //entries in row m (NOT columns) + for (mm_LocalOrd i = 0; i < int(numEntries); i++) // entries in row m (NOT columns) { - //row is m, col is rowIndices[i], val is rowVals[i] - mm_LocalOrd col = rowIndices[i]; - sparseVals[jc[col] + rowProgress[col]] = rowVals[i]; //Set value - ir[jc[col] + rowProgress[col]] = m; //Set row at which value occurs + // row is m, col is rowIndices[i], val is rowVals[i] + mm_LocalOrd col = rowIndices[i]; + sparseVals[jc[col] + rowProgress[col]] = rowVals[i]; // Set value + ir[jc[col] + rowProgress[col]] = m; // Set row at which value occurs rowProgress[col]++; } } delete[] rowIndicesArray; - } - else - { + } else { Teuchos::ArrayView rowIndices; Teuchos::ArrayView rowVals; - for(mm_GlobalOrd m = 0; m < nr; m++) - { + for (mm_GlobalOrd m = 0; m < nr; m++) { data->getGlobalRowView(m, rowIndices, rowVals); - for(mm_GlobalOrd n = 0; n < rowIndices.size(); n++) - { + for (mm_GlobalOrd n = 0; n < rowIndices.size(); n++) { jc[rowIndices[n] + 1]++; } } - //Last element of jc is just nnz + // Last element of jc is just nnz jc[nc] = nnz; - //Jc now populated with colptrs - for(int i = 0; i < nc; i++) - { + // Jc now populated with colptrs + for (int i = 0; i < nc; i++) { rowProgress[i] = 0; } int entriesAccum = 0; - for(int n = 0; n <= nc; n++) - { + for (int n = 0; n <= nc; n++) { int temp = entriesAccum; entriesAccum += jc[n]; jc[n] += temp; } - //Row progress values like jc but keep track as the MATLAB matrix is being filled in - for(mm_GlobalOrd m = 0; m < nr; m++) //rows + // Row progress values like jc but keep track as the MATLAB matrix is being filled in + for (mm_GlobalOrd m = 0; m < nr; m++) // rows { data->getGlobalRowView(m, rowIndices, rowVals); - for(mm_LocalOrd i = 0; i < rowIndices.size(); i++) //entries in row m + for (mm_LocalOrd i = 0; i < rowIndices.size(); i++) // entries in row m { - mm_GlobalOrd col = rowIndices[i]; //row is m, col is rowIndices[i], val is rowVals[i] - sparseVals[jc[col] + rowProgress[col]] = rowVals[i]; //Set value - ir[jc[col] + rowProgress[col]] = m; //Set row at which value occurs + mm_GlobalOrd col = rowIndices[i]; // row is m, col is rowIndices[i], val is rowVals[i] + sparseVals[jc[col] + rowProgress[col]] = rowVals[i]; // Set value + ir[jc[col] + rowProgress[col]] = m; // Set row at which value occurs rowProgress[col]++; } } } - //finally, copy sparseVals into pr (and pi, if complex) + // finally, copy sparseVals into pr (and pi, if complex) fillMatlabArray(sparseVals, mxa, nnz); delete[] sparseVals; delete[] rowProgress; return mxa; } -template<> -mxArray* saveDataToMatlab(RCP& data) -{ +template <> +mxArray* saveDataToMatlab(RCP& data) { typedef complex_t Scalar; // Compute global constants, if we need them Teuchos::rcp_const_cast(data->getCrsGraph())->computeGlobalConstants(); - int nr = data->getGlobalNumRows(); - int nc = data->getGlobalNumCols(); - int nnz = data->getGlobalNumEntries(); + int nr = data->getGlobalNumRows(); + int nc = data->getGlobalNumCols(); + int nnz = data->getGlobalNumEntries(); #ifdef VERBOSE_OUTPUT RCP fancyStream = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); mat->describe(*fancyStream, Teuchos::VERB_EXTREME); #endif mxArray* mxa = createMatlabSparse(nr, nc, nnz); - mwIndex* ir = mxGetIr(mxa); - mwIndex* jc = mxGetJc(mxa); - for(int i = 0; i < nc + 1; i++) - { + mwIndex* ir = mxGetIr(mxa); + mwIndex* jc = mxGetJc(mxa); + for (int i = 0; i < nc + 1; i++) { jc[i] = 0; } size_t maxEntriesPerRow = data->getGlobalMaxNumRowEntries(); - int* rowProgress = new int[nc]; - //The array that will be copied to Pr and (if complex) Pi later + int* rowProgress = new int[nc]; + // The array that will be copied to Pr and (if complex) Pi later Scalar* sparseVals = new Scalar[nnz]; size_t numEntries; - if(data->isLocallyIndexed()) - { + if (data->isLocallyIndexed()) { Scalar* rowValArray = new Scalar[maxEntriesPerRow]; Teuchos::ArrayView rowVals(rowValArray, maxEntriesPerRow); mm_LocalOrd* rowIndicesArray = new mm_LocalOrd[maxEntriesPerRow]; Teuchos::ArrayView rowIndices(rowIndicesArray, maxEntriesPerRow); - for(mm_LocalOrd m = 0; m < nr; m++) //All rows in the Xpetra matrix + for (mm_LocalOrd m = 0; m < nr; m++) // All rows in the Xpetra matrix { - data->getLocalRowCopy(m, rowIndices, rowVals, numEntries); //Get the row - for(mm_LocalOrd entry = 0; entry < int(numEntries); entry++) //All entries in row + data->getLocalRowCopy(m, rowIndices, rowVals, numEntries); // Get the row + for (mm_LocalOrd entry = 0; entry < int(numEntries); entry++) // All entries in row { - jc[rowIndices[entry] + 1]++; //for each entry, increase jc for the entry's column + jc[rowIndices[entry] + 1]++; // for each entry, increase jc for the entry's column } } - //now jc holds the number of elements in each column, but needs cumulative sum over all previous columns also + // now jc holds the number of elements in each column, but needs cumulative sum over all previous columns also int entriesAccum = 0; - for(int n = 0; n <= nc; n++) - { + for (int n = 0; n <= nc; n++) { int temp = entriesAccum; entriesAccum += jc[n]; jc[n] += temp; } - //Jc now populated with colptrs - for(int i = 0; i < nc; i++) - { + // Jc now populated with colptrs + for (int i = 0; i < nc; i++) { rowProgress[i] = 0; } - //Row progress values like jc but keep track as the MATLAB matrix is being filled in - for(mm_LocalOrd m = 0; m < nr; m++) //rows + // Row progress values like jc but keep track as the MATLAB matrix is being filled in + for (mm_LocalOrd m = 0; m < nr; m++) // rows { data->getLocalRowCopy(m, rowIndices, rowVals, numEntries); - for(mm_LocalOrd i = 0; i < int(numEntries); i++) //entries in row m (NOT columns) + for (mm_LocalOrd i = 0; i < int(numEntries); i++) // entries in row m (NOT columns) { - //row is m, col is rowIndices[i], val is rowVals[i] - mm_LocalOrd col = rowIndices[i]; - sparseVals[jc[col] + rowProgress[col]] = rowVals[i]; //Set value - ir[jc[col] + rowProgress[col]] = m; //Set row at which value occurs + // row is m, col is rowIndices[i], val is rowVals[i] + mm_LocalOrd col = rowIndices[i]; + sparseVals[jc[col] + rowProgress[col]] = rowVals[i]; // Set value + ir[jc[col] + rowProgress[col]] = m; // Set row at which value occurs rowProgress[col]++; } } delete[] rowIndicesArray; - } - else - { + } else { Teuchos::ArrayView rowIndices; Teuchos::ArrayView rowVals; - for(mm_GlobalOrd m = 0; m < nr; m++) - { + for (mm_GlobalOrd m = 0; m < nr; m++) { data->getGlobalRowView(m, rowIndices, rowVals); - for(mm_GlobalOrd n = 0; n < rowIndices.size(); n++) - { + for (mm_GlobalOrd n = 0; n < rowIndices.size(); n++) { jc[rowIndices[n] + 1]++; } } - //Last element of jc is just nnz + // Last element of jc is just nnz jc[nc] = nnz; - //Jc now populated with colptrs - for(int i = 0; i < nc; i++) - { + // Jc now populated with colptrs + for (int i = 0; i < nc; i++) { rowProgress[i] = 0; } int entriesAccum = 0; - for(int n = 0; n <= nc; n++) - { + for (int n = 0; n <= nc; n++) { int temp = entriesAccum; entriesAccum += jc[n]; jc[n] += temp; } - //Row progress values like jc but keep track as the MATLAB matrix is being filled in - for(mm_GlobalOrd m = 0; m < nr; m++) //rows + // Row progress values like jc but keep track as the MATLAB matrix is being filled in + for (mm_GlobalOrd m = 0; m < nr; m++) // rows { data->getGlobalRowView(m, rowIndices, rowVals); - for(mm_LocalOrd i = 0; i < rowIndices.size(); i++) //entries in row m + for (mm_LocalOrd i = 0; i < rowIndices.size(); i++) // entries in row m { - mm_GlobalOrd col = rowIndices[i]; //row is m, col is rowIndices[i], val is rowVals[i] - sparseVals[jc[col] + rowProgress[col]] = rowVals[i]; //Set value - ir[jc[col] + rowProgress[col]] = m; //Set row at which value occurs + mm_GlobalOrd col = rowIndices[i]; // row is m, col is rowIndices[i], val is rowVals[i] + sparseVals[jc[col] + rowProgress[col]] = rowVals[i]; // Set value + ir[jc[col] + rowProgress[col]] = m; // Set row at which value occurs rowProgress[col]++; } } } - //finally, copy sparseVals into pr (and pi, if complex) + // finally, copy sparseVals into pr (and pi, if complex) fillMatlabArray(sparseVals, mxa, nnz); delete[] sparseVals; delete[] rowProgress; @@ -1065,19 +1006,16 @@ mxArray* saveDataToMatlab(RCP -mxArray* saveDataToMatlab(RCP>& data) -{ - //Precondition: Memory has already been allocated by MATLAB for the array. - int nr = data->getGlobalLength(); - int nc = data->getNumVectors(); +template <> +mxArray* saveDataToMatlab(RCP >& data) { + // Precondition: Memory has already been allocated by MATLAB for the array. + int nr = data->getGlobalLength(); + int nc = data->getNumVectors(); mxArray* output = createMatlabMultiVector(nr, nc); - double* array = (double*) malloc(sizeof(double) * nr * nc); - for(int col = 0; col < nc; col++) - { + double* array = (double*)malloc(sizeof(double) * nr * nc); + for (int col = 0; col < nc; col++) { Teuchos::ArrayRCP colData = data->getData(col); - for(int row = 0; row < nr; row++) - { + for (int row = 0; row < nr; row++) { array[col * nr + row] = colData[row]; } } @@ -1086,19 +1024,16 @@ mxArray* saveDataToMatlab(RCP -mxArray* saveDataToMatlab(RCP>& data) -{ - //Precondition: Memory has already been allocated by MATLAB for the array. - int nr = data->getGlobalLength(); - int nc = data->getNumVectors(); - mxArray* output = createMatlabMultiVector(nr, nc); - complex_t* array = (complex_t*) malloc(sizeof(complex_t) * nr * nc); - for(int col = 0; col < nc; col++) - { +template <> +mxArray* saveDataToMatlab(RCP >& data) { + // Precondition: Memory has already been allocated by MATLAB for the array. + int nr = data->getGlobalLength(); + int nc = data->getNumVectors(); + mxArray* output = createMatlabMultiVector(nr, nc); + complex_t* array = (complex_t*)malloc(sizeof(complex_t) * nr * nc); + for (int col = 0; col < nc; col++) { Teuchos::ArrayRCP colData = data->getData(col); - for(int row = 0; row < nr; row++) - { + for (int row = 0; row < nr; row++) { array[col * nr + row] = colData[row]; } } @@ -1107,206 +1042,180 @@ mxArray* saveDataToMatlab(RCP -mxArray* saveDataToMatlab(RCP& data) -{ +template <> +mxArray* saveDataToMatlab(RCP& data) { RCP xmat = EpetraCrs_To_XpetraMatrix(data); return saveDataToMatlab(xmat); } -template<> -mxArray* saveDataToMatlab(RCP& data) -{ +template <> +mxArray* saveDataToMatlab(RCP& data) { mxArray* output = mxCreateDoubleMatrix(data->GlobalLength(), data->NumVectors(), mxREAL); double* dataPtr = mxGetPr(output); data->ExtractCopy(dataPtr, data->GlobalLength()); return output; } -template<> -mxArray* saveDataToMatlab(RCP& data) -{ - //Set up array of inputs for matlab constructAggregates +template <> +mxArray* saveDataToMatlab(RCP& data) { + // Set up array of inputs for matlab constructAggregates int numNodes = data->GetVertex2AggId()->getData(0).size(); - int numAggs = data->GetNumAggregates(); + int numAggs = data->GetNumAggregates(); mxArray* dataIn[5]; - mwSize singleton[] = {1, 1}; - dataIn[0] = mxCreateNumericArray(2, singleton, mxINT32_CLASS, mxREAL); - *((int*) mxGetData(dataIn[0])) = numNodes; - dataIn[1] = mxCreateNumericArray(2, singleton, mxINT32_CLASS, mxREAL); - *((int*) mxGetData(dataIn[1])) = numAggs; - mwSize nodeArrayDims[] = {(mwSize) numNodes, 1}; //dimensions for Nx1 array, where N is number of nodes (vert2Agg) - dataIn[2] = mxCreateNumericArray(2, nodeArrayDims, mxINT32_CLASS, mxREAL); - int* vtaid = (int*) mxGetData(dataIn[2]); + mwSize singleton[] = {1, 1}; + dataIn[0] = mxCreateNumericArray(2, singleton, mxINT32_CLASS, mxREAL); + *((int*)mxGetData(dataIn[0])) = numNodes; + dataIn[1] = mxCreateNumericArray(2, singleton, mxINT32_CLASS, mxREAL); + *((int*)mxGetData(dataIn[1])) = numAggs; + mwSize nodeArrayDims[] = {(mwSize)numNodes, 1}; // dimensions for Nx1 array, where N is number of nodes (vert2Agg) + dataIn[2] = mxCreateNumericArray(2, nodeArrayDims, mxINT32_CLASS, mxREAL); + int* vtaid = (int*)mxGetData(dataIn[2]); ArrayRCP vertexToAggID = data->GetVertex2AggId()->getData(0); - for(int i = 0; i < numNodes; i++) - { + for (int i = 0; i < numNodes; i++) { vtaid[i] = vertexToAggID[i]; } - mwSize aggArrayDims[] = {(mwSize) numAggs, 1}; //dims for Nx1 array, where N is number of aggregates (rootNodes, aggSizes) - dataIn[3] = mxCreateNumericArray(2, aggArrayDims, mxINT32_CLASS, mxREAL); - //First, find out if the aggregates even have 1 root node per aggregate. If not, assume roots are invalid and assign ourselves + mwSize aggArrayDims[] = {(mwSize)numAggs, 1}; // dims for Nx1 array, where N is number of aggregates (rootNodes, aggSizes) + dataIn[3] = mxCreateNumericArray(2, aggArrayDims, mxINT32_CLASS, mxREAL); + // First, find out if the aggregates even have 1 root node per aggregate. If not, assume roots are invalid and assign ourselves int totalRoots = 0; - for(int i = 0; i < numNodes; i++) - { - if(data->IsRoot(i)) + for (int i = 0; i < numNodes; i++) { + if (data->IsRoot(i)) totalRoots++; } bool reassignRoots = false; - if(totalRoots != numAggs) - { - cout << endl << "Warning: Number of root nodes and number of aggregates do not match." << endl; - cout << "Will reassign root nodes when writing aggregates to matlab." << endl << endl; + if (totalRoots != numAggs) { + cout << endl + << "Warning: Number of root nodes and number of aggregates do not match." << endl; + cout << "Will reassign root nodes when writing aggregates to matlab." << endl + << endl; reassignRoots = true; } - int* rn = (int*) mxGetData(dataIn[3]); //list of root nodes (in no particular order) + int* rn = (int*)mxGetData(dataIn[3]); // list of root nodes (in no particular order) { - if(reassignRoots) - { - //For each aggregate, just pick the first node we see in it and set it as root - int lastFoundNode = 0; //heuristic for speed, a node in aggregate N+1 is likely to come very soon after a node in agg N - for(int i = 0; i < numAggs; i++) - { + if (reassignRoots) { + // For each aggregate, just pick the first node we see in it and set it as root + int lastFoundNode = 0; // heuristic for speed, a node in aggregate N+1 is likely to come very soon after a node in agg N + for (int i = 0; i < numAggs; i++) { rn[i] = -1; - for(int j = lastFoundNode; j < lastFoundNode + numNodes; j++) - { + for (int j = lastFoundNode; j < lastFoundNode + numNodes; j++) { int index = j % numNodes; - if(vertexToAggID[index] == i) - { - rn[i] = index; + if (vertexToAggID[index] == i) { + rn[i] = index; lastFoundNode = index; } } TEUCHOS_TEST_FOR_EXCEPTION(rn[i] == -1, runtime_error, "Invalid aggregates: Couldn't find any node in aggregate #" << i << "."); } - } - else - { - int i = 0; //iterates over aggregate IDs - for(int j = 0; j < numNodes; j++) - { - if(data->IsRoot(j)) - { - if(i == numAggs) + } else { + int i = 0; // iterates over aggregate IDs + for (int j = 0; j < numNodes; j++) { + if (data->IsRoot(j)) { + if (i == numAggs) throw runtime_error("Cannot store invalid aggregates in MATLAB - more root nodes than aggregates."); - rn[i] = j; //now we know this won't go out of bounds (rn's underlying matlab array is numAggs in length) + rn[i] = j; // now we know this won't go out of bounds (rn's underlying matlab array is numAggs in length) i++; } } - if(i + 1 < numAggs) + if (i + 1 < numAggs) throw runtime_error("Cannot store invalid aggregates in MATLAB - fewer root nodes than aggregates."); } } - dataIn[4] = mxCreateNumericArray(1, aggArrayDims, mxINT32_CLASS, mxREAL); - int* as = (int*) mxGetData(dataIn[4]); //list of aggregate sizes + dataIn[4] = mxCreateNumericArray(1, aggArrayDims, mxINT32_CLASS, mxREAL); + int* as = (int*)mxGetData(dataIn[4]); // list of aggregate sizes ArrayRCP aggSizes = data->ComputeAggregateSizes(); - for(int i = 0; i < numAggs; i++) - { + for (int i = 0; i < numAggs; i++) { as[i] = aggSizes[i]; } mxArray* matlabAggs[1]; int result = mexCallMATLAB(1, matlabAggs, 5, dataIn, "constructAggregates"); - if(result != 0) + if (result != 0) throw runtime_error("Matlab encountered an error while constructing aggregates struct."); return matlabAggs[0]; } -template<> -mxArray* saveDataToMatlab(RCP& data) -{ +template <> +mxArray* saveDataToMatlab(RCP& data) { throw runtime_error("AmalgamationInfo not supported in MueMex yet."); return mxCreateDoubleScalar(0); } -template<> -mxArray* saveDataToMatlab(RCP& data) -{ - int numEntries = (int) data->GetGlobalNumEdges(); - int numRows = (int) data->GetDomainMap()->getGlobalNumElements(); //assume numRows == numCols - mxArray* mat = mxCreateSparseLogicalMatrix(numRows, numRows, numEntries); - mxLogical* outData = (mxLogical*) mxGetData(mat); - mwIndex* rowInds = mxGetIr(mat); - mwIndex* colPtrs = mxGetJc(mat); +template <> +mxArray* saveDataToMatlab(RCP& data) { + int numEntries = (int)data->GetGlobalNumEdges(); + int numRows = (int)data->GetDomainMap()->getGlobalNumElements(); // assume numRows == numCols + mxArray* mat = mxCreateSparseLogicalMatrix(numRows, numRows, numEntries); + mxLogical* outData = (mxLogical*)mxGetData(mat); + mwIndex* rowInds = mxGetIr(mat); + mwIndex* colPtrs = mxGetJc(mat); mm_LocalOrd* dataCopy = new mm_LocalOrd[numEntries]; - mm_LocalOrd* iter = dataCopy; - int* entriesPerRow = new int[numRows]; - int* entriesPerCol = new int[numRows]; - for(int i = 0; i < numRows; i++) - { + mm_LocalOrd* iter = dataCopy; + int* entriesPerRow = new int[numRows]; + int* entriesPerCol = new int[numRows]; + for (int i = 0; i < numRows; i++) { entriesPerRow[i] = 0; entriesPerCol[i] = 0; } - for(int i = 0; i < numRows; i++) - { - ArrayView neighbors = data->getNeighborVertices(i); //neighbors has the column indices for row i + for (int i = 0; i < numRows; i++) { + ArrayView neighbors = data->getNeighborVertices(i); // neighbors has the column indices for row i memcpy(iter, neighbors.getRawPtr(), sizeof(mm_LocalOrd) * neighbors.size()); entriesPerRow[i] = neighbors.size(); - for(int j = 0; j < neighbors.size(); j++) - { + for (int j = 0; j < neighbors.size(); j++) { entriesPerCol[neighbors[j]]++; } iter += neighbors.size(); } - mwIndex** rowIndsByColumn = new mwIndex*[numRows]; //rowIndsByColumn[0] points to array of row indices in column 1 + mwIndex** rowIndsByColumn = new mwIndex*[numRows]; // rowIndsByColumn[0] points to array of row indices in column 1 mxLogical** valuesByColumn = new mxLogical*[numRows]; - int* numEnteredPerCol = new int[numRows]; - int accum = 0; - for(int i = 0; i < numRows; i++) - { + int* numEnteredPerCol = new int[numRows]; + int accum = 0; + for (int i = 0; i < numRows; i++) { rowIndsByColumn[i] = &rowInds[accum]; - //cout << "Entries in column " << i << " start at offset " << accum << endl; + // cout << "Entries in column " << i << " start at offset " << accum << endl; valuesByColumn[i] = &outData[accum]; accum += entriesPerCol[i]; - if(accum > numEntries) + if (accum > numEntries) throw runtime_error("potato"); } - for(int i = 0; i < numRows; i++) - { - numEnteredPerCol[i] = 0; //rowIndsByColumn[n][numEnteredPerCol[n]] gives the next place to put a row index + for (int i = 0; i < numRows; i++) { + numEnteredPerCol[i] = 0; // rowIndsByColumn[n][numEnteredPerCol[n]] gives the next place to put a row index } - //entriesPerCol now has Jc information (col offsets) - accum = 0; //keep track of cumulative index in dataCopy - for(int row = 0; row < numRows; row++) - { - for(int entryInRow = 0; entryInRow < entriesPerRow[row]; entryInRow++) - { + // entriesPerCol now has Jc information (col offsets) + accum = 0; // keep track of cumulative index in dataCopy + for (int row = 0; row < numRows; row++) { + for (int entryInRow = 0; entryInRow < entriesPerRow[row]; entryInRow++) { int col = dataCopy[accum]; accum++; rowIndsByColumn[col][numEnteredPerCol[col]] = row; - valuesByColumn[col][numEnteredPerCol[col]] = (mxLogical) 1; + valuesByColumn[col][numEnteredPerCol[col]] = (mxLogical)1; numEnteredPerCol[col]++; } } - accum = 0; //keep track of total entries over all columns - for(int col = 0; col < numRows; col++) - { + accum = 0; // keep track of total entries over all columns + for (int col = 0; col < numRows; col++) { colPtrs[col] = accum; accum += entriesPerCol[col]; } - colPtrs[numRows] = accum; //the last entry in jc, which is equivalent to numEntries + colPtrs[numRows] = accum; // the last entry in jc, which is equivalent to numEntries delete[] numEnteredPerCol; delete[] rowIndsByColumn; delete[] valuesByColumn; delete[] dataCopy; delete[] entriesPerRow; delete[] entriesPerCol; - //Construct list of boundary nodes + // Construct list of boundary nodes const ArrayRCP boundaryFlags = data->GetBoundaryNodeMap(); - int numBoundaryNodes = 0; - for(int i = 0; i < boundaryFlags.size(); i++) - { - if(boundaryFlags[i]) + int numBoundaryNodes = 0; + for (int i = 0; i < boundaryFlags.size(); i++) { + if (boundaryFlags[i]) numBoundaryNodes++; } cout << "Graph has " << numBoundaryNodes << " Dirichlet boundary nodes." << endl; - mwSize dims[] = {(mwSize) numBoundaryNodes, 1}; + mwSize dims[] = {(mwSize)numBoundaryNodes, 1}; mxArray* boundaryList = mxCreateNumericArray(2, dims, mxINT32_CLASS, mxREAL); - int* dest = (int*) mxGetData(boundaryList); - int* destIter = dest; - for(int i = 0; i < boundaryFlags.size(); i++) - { - if(boundaryFlags[i]) - { + int* dest = (int*)mxGetData(boundaryList); + int* destIter = dest; + for (int i = 0; i < boundaryFlags.size(); i++) { + if (boundaryFlags[i]) { *destIter = i; destIter++; } @@ -1318,63 +1227,57 @@ mxArray* saveDataToMatlab(RCP& data) } #ifdef HAVE_MUELU_INTREPID2 -template<> -mxArray* saveDataToMatlab(RCP& data) -{ +template <> +mxArray* saveDataToMatlab(RCP& data) { int rank = data->rank(); // NOTE: Only supports rank 2 arrays - if(rank!=2) + if (rank != 2) throw std::runtime_error("Error: Only rank two FieldContainers are supported."); int nr = data->extent(0); int nc = data->extent(1); - mwSize dims[]={(mwSize)nr,(mwSize)nc}; - mxArray* mxa = mxCreateNumericArray(2,dims, mxINT32_CLASS, mxREAL); - int *array = (int*) mxGetData(mxa); + mwSize dims[] = {(mwSize)nr, (mwSize)nc}; + mxArray* mxa = mxCreateNumericArray(2, dims, mxINT32_CLASS, mxREAL); + int* array = (int*)mxGetData(mxa); - for(int col = 0; col < nc; col++) - { - for(int row = 0; row < nr; row++) - { - array[col * nr + row] = (*data)(row,col); + for (int col = 0; col < nc; col++) { + for (int row = 0; row < nr; row++) { + array[col * nr + row] = (*data)(row, col); } } return mxa; } #endif - -template -MuemexData::MuemexData(const mxArray* mxa) : MuemexArg(getMuemexType()) -{ +template +MuemexData::MuemexData(const mxArray* mxa) + : MuemexArg(getMuemexType()) { data = loadDataFromMatlab(mxa); } -template -mxArray* MuemexData::convertToMatlab() -{ +template +mxArray* MuemexData::convertToMatlab() { return saveDataToMatlab(data); } -template -MuemexData::MuemexData(T& dataToCopy, MuemexType dataType) : MuemexArg(dataType) -{ +template +MuemexData::MuemexData(T& dataToCopy, MuemexType dataType) + : MuemexArg(dataType) { data = dataToCopy; } -template -MuemexData::MuemexData(T& dataToCopy) : MuemexData(dataToCopy, getMuemexType(dataToCopy)) {} +template +MuemexData::MuemexData(T& dataToCopy) + : MuemexData(dataToCopy, getMuemexType(dataToCopy)) {} -template -T& MuemexData::getData() -{ +template +T& MuemexData::getData() { return data; } -template -void MuemexData::setData(T& newData) -{ +template +void MuemexData::setData(T& newData) { this->data = newData; } @@ -1382,150 +1285,107 @@ void MuemexData::setData(T& newData) /* More Template Functions */ /* ***************************** */ -template -void addLevelVariable(const T& data, std::string& name, Level& lvl, const Factory * fact) -{ +template +void addLevelVariable(const T& data, std::string& name, Level& lvl, const Factory* fact) { lvl.AddKeepFlag(name, fact, MueLu::UserData); lvl.Set(name, data, fact); } -template -const T& getLevelVariable(std::string& name, Level& lvl) -{ - try - { +template +const T& getLevelVariable(std::string& name, Level& lvl) { + try { return lvl.Get(name); - } - catch(std::exception& e) - { + } catch (std::exception& e) { throw std::runtime_error("Requested custom variable " + name + " is not in the level."); } } -//Functions used to put data through matlab factories - first arg is "this" pointer of matlab factory -template -std::vector> processNeeds(const Factory* factory, std::string& needsParam, Level& lvl) -{ +// Functions used to put data through matlab factories - first arg is "this" pointer of matlab factory +template +std::vector > processNeeds(const Factory* factory, std::string& needsParam, Level& lvl) { using namespace std; using namespace Teuchos; - typedef RCP> Matrix_t; - typedef RCP> MultiVector_t; - typedef RCP> Aggregates_t; - typedef RCP> AmalgamationInfo_t; + typedef RCP > Matrix_t; + typedef RCP > MultiVector_t; + typedef RCP > Aggregates_t; + typedef RCP > AmalgamationInfo_t; typedef RCP Graph_t; vector needsList = tokenizeList(needsParam); - vector> args; - for(size_t i = 0; i < needsList.size(); i++) - { - if(needsList[i] == "A" || needsList[i] == "P" || needsList[i] == "R" || needsList[i]=="Ptent") - { + vector > args; + for (size_t i = 0; i < needsList.size(); i++) { + if (needsList[i] == "A" || needsList[i] == "P" || needsList[i] == "R" || needsList[i] == "Ptent") { Matrix_t mydata = lvl.Get(needsList[i], factory->GetFactory(needsList[i]).get()); args.push_back(rcp(new MuemexData(mydata))); - } - else if(needsList[i] == "Nullspace" || needsList[i] == "Coordinates") - { + } else if (needsList[i] == "Nullspace" || needsList[i] == "Coordinates") { MultiVector_t mydata = lvl.Get(needsList[i], factory->GetFactory(needsList[i]).get()); args.push_back(rcp(new MuemexData(mydata))); - } - else if(needsList[i] == "Aggregates") - { + } else if (needsList[i] == "Aggregates") { Aggregates_t mydata = lvl.Get(needsList[i], factory->GetFactory(needsList[i]).get()); args.push_back(rcp(new MuemexData(mydata))); - } - else if(needsList[i] == "UnAmalgamationInfo") - { + } else if (needsList[i] == "UnAmalgamationInfo") { AmalgamationInfo_t mydata = lvl.Get(needsList[i], factory->GetFactory(needsList[i]).get()); args.push_back(rcp(new MuemexData(mydata))); - } - else if(needsList[i] == "Level") - { + } else if (needsList[i] == "Level") { int levelNum = lvl.GetLevelID(); args.push_back(rcp(new MuemexData(levelNum))); - } - else if(needsList[i] == "Graph") - { + } else if (needsList[i] == "Graph") { Graph_t mydata = lvl.Get(needsList[i], factory->GetFactory(needsList[i]).get()); args.push_back(rcp(new MuemexData(mydata))); - } - else - { + } else { vector words; string badNameMsg = "Custom Muemex variables in \"Needs\" list require a type and a name, e.g. \"double myVal\". \n Leading and trailing spaces are OK. \n Don't know how to handle \"" + needsList[i] + "\".\n"; - //compare type without case sensitivity - char* buf = (char*) malloc(needsList[i].size() + 1); + // compare type without case sensitivity + char* buf = (char*)malloc(needsList[i].size() + 1); strcpy(buf, needsList[i].c_str()); - for(char* iter = buf; *iter != ' '; iter++) - { - if(*iter == 0) - { + for (char* iter = buf; *iter != ' '; iter++) { + if (*iter == 0) { free(buf); throw runtime_error(badNameMsg); } - *iter = (char) tolower(*iter); + *iter = (char)tolower(*iter); } const char* wordDelim = " "; - char* mark = strtok(buf, wordDelim); - while(mark != NULL) - { + char* mark = strtok(buf, wordDelim); + while (mark != NULL) { string wordStr(mark); words.push_back(wordStr); mark = strtok(NULL, wordDelim); } - if(words.size() != 2) - { + if (words.size() != 2) { free(buf); throw runtime_error(badNameMsg); } - char* typeStr = (char*) words[0].c_str(); - if(strstr(typeStr, "ordinalvector")) - { - typedef RCP> LOVector_t; + char* typeStr = (char*)words[0].c_str(); + if (strstr(typeStr, "ordinalvector")) { + typedef RCP > LOVector_t; LOVector_t mydata = getLevelVariable(needsList[i], lvl); args.push_back(rcp(new MuemexData(mydata))); - } - else if(strstr(typeStr, "map")) - { - typedef RCP> Map_t; + } else if (strstr(typeStr, "map")) { + typedef RCP > Map_t; Map_t mydata = getLevelVariable(needsList[i], lvl); args.push_back(rcp(new MuemexData(mydata))); - } - else if(strstr(typeStr, "scalar")) - { + } else if (strstr(typeStr, "scalar")) { Scalar mydata = getLevelVariable(needsList[i], lvl); args.push_back(rcp(new MuemexData(mydata))); - } - else if(strstr(typeStr, "double")) - { + } else if (strstr(typeStr, "double")) { double mydata = getLevelVariable(needsList[i], lvl); args.push_back(rcp(new MuemexData(mydata))); - } - else if(strstr(typeStr, "complex")) - { + } else if (strstr(typeStr, "complex")) { complex_t mydata = getLevelVariable(needsList[i], lvl); args.push_back(rcp(new MuemexData(mydata))); - } - else if(strstr(typeStr, "matrix")) - { + } else if (strstr(typeStr, "matrix")) { Matrix_t mydata = getLevelVariable(needsList[i], lvl); args.push_back(rcp(new MuemexData(mydata))); - } - else if(strstr(typeStr, "multivector")) - { + } else if (strstr(typeStr, "multivector")) { MultiVector_t mydata = getLevelVariable(needsList[i], lvl); args.push_back(rcp(new MuemexData(mydata))); - } - else if(strstr(typeStr, "int")) - { + } else if (strstr(typeStr, "int")) { int mydata = getLevelVariable(needsList[i], lvl); args.push_back(rcp(new MuemexData(mydata))); - } - else if(strstr(typeStr, "string")) - { + } else if (strstr(typeStr, "string")) { string mydata = getLevelVariable(needsList[i], lvl); args.push_back(rcp(new MuemexData(mydata))); - } - else - { + } else { free(buf); throw std::runtime_error(words[0] + " is not a known variable type."); } @@ -1535,128 +1395,90 @@ std::vector> processNeeds(const Factory* factory, std::s return args; } -template -void processProvides(std::vector>& mexOutput, const Factory* factory, std::string& providesParam, Level& lvl) -{ +template +void processProvides(std::vector >& mexOutput, const Factory* factory, std::string& providesParam, Level& lvl) { using namespace std; using namespace Teuchos; - typedef RCP> Matrix_t; - typedef RCP> MultiVector_t; - typedef RCP> Aggregates_t; - typedef RCP> AmalgamationInfo_t; + typedef RCP > Matrix_t; + typedef RCP > MultiVector_t; + typedef RCP > Aggregates_t; + typedef RCP > AmalgamationInfo_t; typedef RCP Graph_t; vector provides = tokenizeList(providesParam); - for(size_t i = 0; i < size_t(provides.size()); i++) - { - if(provides[i] == "A" || provides[i] == "P" || provides[i] == "R" || provides[i]=="Ptent") - { - RCP> mydata = Teuchos::rcp_static_cast>(mexOutput[i]); + for (size_t i = 0; i < size_t(provides.size()); i++) { + if (provides[i] == "A" || provides[i] == "P" || provides[i] == "R" || provides[i] == "Ptent") { + RCP > mydata = Teuchos::rcp_static_cast >(mexOutput[i]); lvl.Set(provides[i], mydata->getData(), factory); - } - else if(provides[i] == "Nullspace" || provides[i] == "Coordinates") - { - RCP> mydata = Teuchos::rcp_static_cast>(mexOutput[i]); + } else if (provides[i] == "Nullspace" || provides[i] == "Coordinates") { + RCP > mydata = Teuchos::rcp_static_cast >(mexOutput[i]); lvl.Set(provides[i], mydata->getData(), factory); - } - else if(provides[i] == "Aggregates") - { - RCP> mydata = Teuchos::rcp_static_cast>(mexOutput[i]); + } else if (provides[i] == "Aggregates") { + RCP > mydata = Teuchos::rcp_static_cast >(mexOutput[i]); lvl.Set(provides[i], mydata->getData(), factory); - } - else if(provides[i] == "UnAmalgamationInfo") - { - RCP> mydata = Teuchos::rcp_static_cast>(mexOutput[i]); + } else if (provides[i] == "UnAmalgamationInfo") { + RCP > mydata = Teuchos::rcp_static_cast >(mexOutput[i]); lvl.Set(provides[i], mydata->getData(), factory); - } - else if(provides[i] == "Graph") - { - RCP> mydata = Teuchos::rcp_static_cast>(mexOutput[i]); + } else if (provides[i] == "Graph") { + RCP > mydata = Teuchos::rcp_static_cast >(mexOutput[i]); lvl.Set(provides[i], mydata->getData(), factory); - } - else - { + } else { vector words; string badNameMsg = "Custom Muemex variables in \"Provides\" list require a type and a name, e.g. \"double myVal\". \n Leading and trailing spaces are OK. \n Don't know how to handle \"" + provides[i] + "\".\n"; - //compare type without case sensitivity - char* buf = (char*) malloc(provides[i].size() + 1); + // compare type without case sensitivity + char* buf = (char*)malloc(provides[i].size() + 1); strcpy(buf, provides[i].c_str()); - for(char* iter = buf; *iter != ' '; iter++) - { - if(*iter == 0) - { + for (char* iter = buf; *iter != ' '; iter++) { + if (*iter == 0) { free(buf); throw runtime_error(badNameMsg); } - *iter = (char) tolower(*iter); + *iter = (char)tolower(*iter); } const char* wordDelim = " "; - char* mark = strtok(buf, wordDelim); - while(mark != NULL) - { + char* mark = strtok(buf, wordDelim); + while (mark != NULL) { string wordStr(mark); words.push_back(wordStr); mark = strtok(NULL, wordDelim); } - if(words.size() != 2) - { + if (words.size() != 2) { free(buf); throw runtime_error(badNameMsg); } const char* typeStr = words[0].c_str(); - if(strstr(typeStr, "ordinalvector")) - { - typedef RCP> LOVector_t; - RCP> mydata = Teuchos::rcp_static_cast>(mexOutput[i]); + if (strstr(typeStr, "ordinalvector")) { + typedef RCP > LOVector_t; + RCP > mydata = Teuchos::rcp_static_cast >(mexOutput[i]); addLevelVariable(mydata->getData(), words[1], lvl, factory); - } - else if(strstr(typeStr, "map")) - { - typedef RCP> Map_t; - RCP> mydata = Teuchos::rcp_static_cast>(mexOutput[i]); + } else if (strstr(typeStr, "map")) { + typedef RCP > Map_t; + RCP > mydata = Teuchos::rcp_static_cast >(mexOutput[i]); addLevelVariable(mydata->getData(), words[1], lvl, factory); - } - else if(strstr(typeStr, "scalar")) - { - RCP> mydata = Teuchos::rcp_static_cast>(mexOutput[i]); + } else if (strstr(typeStr, "scalar")) { + RCP > mydata = Teuchos::rcp_static_cast >(mexOutput[i]); addLevelVariable(mydata->getData(), words[1], lvl, factory); - } - else if(strstr(typeStr, "double")) - { - RCP> mydata = Teuchos::rcp_static_cast>(mexOutput[i]); + } else if (strstr(typeStr, "double")) { + RCP > mydata = Teuchos::rcp_static_cast >(mexOutput[i]); addLevelVariable(mydata->getData(), words[1], lvl, factory); - } - else if(strstr(typeStr, "complex")) - { - RCP> mydata = Teuchos::rcp_static_cast>(mexOutput[i]); + } else if (strstr(typeStr, "complex")) { + RCP > mydata = Teuchos::rcp_static_cast >(mexOutput[i]); addLevelVariable(mydata->getData(), words[1], lvl, factory); - } - else if(strstr(typeStr, "matrix")) - { - RCP> mydata = Teuchos::rcp_static_cast>(mexOutput[i]); + } else if (strstr(typeStr, "matrix")) { + RCP > mydata = Teuchos::rcp_static_cast >(mexOutput[i]); addLevelVariable(mydata->getData(), words[1], lvl, factory); - } - else if(strstr(typeStr, "multivector")) - { - RCP> mydata = Teuchos::rcp_static_cast>(mexOutput[i]); + } else if (strstr(typeStr, "multivector")) { + RCP > mydata = Teuchos::rcp_static_cast >(mexOutput[i]); addLevelVariable(mydata->getData(), words[1], lvl, factory); - } - else if(strstr(typeStr, "int")) - { - RCP> mydata = Teuchos::rcp_static_cast>(mexOutput[i]); + } else if (strstr(typeStr, "int")) { + RCP > mydata = Teuchos::rcp_static_cast >(mexOutput[i]); addLevelVariable(mydata->getData(), words[1], lvl, factory); - } - else if(strstr(typeStr, "bool")) - { + } else if (strstr(typeStr, "bool")) { RCP > mydata = Teuchos::rcp_static_cast >(mexOutput[i]); addLevelVariable(mydata->getData(), words[1], lvl, factory); - } - else if(strstr(typeStr, "string")) - { - RCP> mydata = Teuchos::rcp_static_cast>(mexOutput[i]); + } else if (strstr(typeStr, "string")) { + RCP > mydata = Teuchos::rcp_static_cast >(mexOutput[i]); addLevelVariable(mydata->getData(), words[1], lvl, factory); - } - else - { + } else { free(buf); throw std::runtime_error(words[0] + " is not a known variable type."); } @@ -1667,27 +1489,26 @@ void processProvides(std::vector>& mexOutput, const Fact // Throwable Stubs for long long -template<> -std::vector> processNeeds(const Factory* factory, std::string& needsParam, Level& lvl) { +template <> +std::vector > processNeeds(const Factory* factory, std::string& needsParam, Level& lvl) { throw std::runtime_error("Muemex does not support long long for global indices"); } -template<> -std::vector> processNeeds(const Factory* factory, std::string& needsParam, Level& lvl) { +template <> +std::vector > processNeeds(const Factory* factory, std::string& needsParam, Level& lvl) { throw std::runtime_error("Muemex does not support long long for global indices"); } -template<> -void processProvides(std::vector>& mexOutput, const Factory* factory, std::string& providesParam, Level& lvl) { +template <> +void processProvides(std::vector >& mexOutput, const Factory* factory, std::string& providesParam, Level& lvl) { throw std::runtime_error("Muemex does not support long long for global indices"); } -template<> -void processProvides(std::vector>& mexOutput, const Factory* factory, std::string& providesParam, Level& lvl) { +template <> +void processProvides(std::vector >& mexOutput, const Factory* factory, std::string& providesParam, Level& lvl) { throw std::runtime_error("Muemex does not support long long for global indices"); } - -}// end namespace -#endif //HAVE_MUELU_MATLAB error handler -#endif //MUELU_MATLABUTILS_DEF_HPP guard +} // namespace MueLu +#endif // HAVE_MUELU_MATLAB error handler +#endif // MUELU_MATLABUTILS_DEF_HPP guard diff --git a/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory.cpp b/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory.cpp index 338992b88dec..36fbf7a0e607 100644 --- a/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory.cpp +++ b/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory.cpp @@ -44,7 +44,6 @@ // // @HEADER - #include "MueLu_ExplicitInstantiation.hpp" #include "MueLu_SingleLevelMatlabFactory_def.hpp" @@ -52,8 +51,8 @@ #include "TpetraCore_ETIHelperMacros.h" #ifdef HAVE_MUELU_MATLAB -#define MUELU_LOCAL_INSTANT(S,LO,GO,N) \ - template class MueLu::SingleLevelMatlabFactory; +#define MUELU_LOCAL_INSTANT(S, LO, GO, N) \ + template class MueLu::SingleLevelMatlabFactory; TPETRA_ETI_MANGLING_TYPEDEFS() diff --git a/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_decl.hpp b/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_decl.hpp index 6e6298bf8284..fb0be250c6e5 100644 --- a/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_decl.hpp +++ b/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_decl.hpp @@ -65,65 +65,64 @@ #ifdef HAVE_MUELU_MATLAB #include "mex.h" - namespace MueLu { - /*! - @class SingleLevelMatlabFactory - @ingroup MueMexClasses - @brief Factory for interacting with Matlab - */ - template - class SingleLevelMatlabFactory : public SingleLevelFactoryBase { +/*! + @class SingleLevelMatlabFactory + @ingroup MueMexClasses + @brief Factory for interacting with Matlab +*/ +template +class SingleLevelMatlabFactory : public SingleLevelFactoryBase { #undef MUELU_SINGLELEVELMATLABFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - SingleLevelMatlabFactory(); + SingleLevelMatlabFactory(); - virtual ~SingleLevelMatlabFactory() { } + virtual ~SingleLevelMatlabFactory() {} - //@} + //@} - //! @name Input - //@{ - RCP GetValidParameterList() const; + //! @name Input + //@{ + RCP GetValidParameterList() const; - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ - void Build(Level& currentLevel) const; - //@} + //! @name Build methods. + //@{ + void Build(Level& currentLevel) const; + //@} - //! @ name Description - //@{ - std::string description() const; - //@} + //! @ name Description + //@{ + std::string description() const; + //@} - private: - //@{ + private: + //@{ - mutable bool hasDeclaredInput_; + mutable bool hasDeclaredInput_; - //@} + //@} - //@{ + //@{ - //! List of arguments to the MATLAB function, in order. These args must correspond to MueLu "Needs" objects. - mutable std::vector needs_; + //! List of arguments to the MATLAB function, in order. These args must correspond to MueLu "Needs" objects. + mutable std::vector needs_; - //@} + //@} - }; //class SingleLevelMatlabFactory +}; // class SingleLevelMatlabFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_SINGLELEVELMATLABFACTORY_SHORT -#endif // HAVE_MUELU_MATLAB -#endif // MUELU SINGLELEVELMATLABFACTORY_DECL_HPP +#endif // HAVE_MUELU_MATLAB +#endif // MUELU SINGLELEVELMATLABFACTORY_DECL_HPP diff --git a/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_def.hpp b/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_def.hpp index fe1a943c8688..849e059ed673 100644 --- a/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_def.hpp +++ b/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_def.hpp @@ -54,76 +54,68 @@ #include "MueLu_SingleLevelMatlabFactory_decl.hpp" #include "MueLu_MatlabUtils_decl.hpp" - #ifdef HAVE_MUELU_MATLAB #include "mex.h" namespace MueLu { - template - SingleLevelMatlabFactory::SingleLevelMatlabFactory() - : hasDeclaredInput_(false) { } +template +SingleLevelMatlabFactory::SingleLevelMatlabFactory() + : hasDeclaredInput_(false) {} - template - RCP SingleLevelMatlabFactory::GetValidParameterList() const - { - RCP validParamList = getInputParamList(); - validParamList->set("Provides" , "" ,"A comma-separated list of objects provided by the SingleLevelMatlabFactory"); - validParamList->set("Needs" , "", "A comma-separated list of objects needed by the SingleLevelMatlabFactory"); - validParamList->set("Function" , "" , "The name of the Matlab MEX function to call for Build()"); - return validParamList; - } +template +RCP SingleLevelMatlabFactory::GetValidParameterList() const { + RCP validParamList = getInputParamList(); + validParamList->set("Provides", "", "A comma-separated list of objects provided by the SingleLevelMatlabFactory"); + validParamList->set("Needs", "", "A comma-separated list of objects needed by the SingleLevelMatlabFactory"); + validParamList->set("Function", "", "The name of the Matlab MEX function to call for Build()"); + return validParamList; +} - template - void SingleLevelMatlabFactory::DeclareInput(Level ¤tLevel) const - { - const Teuchos::ParameterList& pL = GetParameterList(); - needs_ = tokenizeList(pL.get("Needs")); - // Declare inputs - for(size_t i = 0; i < needs_.size(); i++) - { - if(!IsParamMuemexVariable(needs_[i]) && needs_[i] != "Level") - this->Input(currentLevel, needs_[i]); - } - hasDeclaredInput_ = true; +template +void SingleLevelMatlabFactory::DeclareInput(Level& currentLevel) const { + const Teuchos::ParameterList& pL = GetParameterList(); + needs_ = tokenizeList(pL.get("Needs")); + // Declare inputs + for (size_t i = 0; i < needs_.size(); i++) { + if (!IsParamMuemexVariable(needs_[i]) && needs_[i] != "Level") + this->Input(currentLevel, needs_[i]); } + hasDeclaredInput_ = true; +} - template - void SingleLevelMatlabFactory::Build(Level& currentLevel) const - { - FactoryMonitor m(*this, "Build", currentLevel); +template +void SingleLevelMatlabFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); - const Teuchos::ParameterList& pL = GetParameterList(); - using Teuchos::rcp; - using Teuchos::rcp; - using namespace std; - // NOTE: mexOutput[0] is the "Provides." Might want to modify to allow for additional outputs - string needsList = pL.get("Needs"); - vector> InputArgs = processNeeds(this, needsList, currentLevel); - string providesList = pL.get("Provides"); - size_t numProvides = tokenizeList(providesList).size(); - // Call mex function - string matlabFunction = pL.get("Function"); - if(!matlabFunction.length()) - throw std::runtime_error("Invalid matlab function name"); - vector > mexOutput = callMatlab(matlabFunction, numProvides, InputArgs); - // Set output in level - processProvides(mexOutput, this, providesList, currentLevel); - } - - template - std::string SingleLevelMatlabFactory::description() const { - std::ostringstream out; - const Teuchos::ParameterList& pL = GetParameterList(); - out << "SingleLevelMatlabFactory["<("Function")<<"]"; - return out.str(); - } + const Teuchos::ParameterList& pL = GetParameterList(); + using Teuchos::rcp; + using namespace std; + // NOTE: mexOutput[0] is the "Provides." Might want to modify to allow for additional outputs + string needsList = pL.get("Needs"); + vector> InputArgs = processNeeds(this, needsList, currentLevel); + string providesList = pL.get("Provides"); + size_t numProvides = tokenizeList(providesList).size(); + // Call mex function + string matlabFunction = pL.get("Function"); + if (!matlabFunction.length()) + throw std::runtime_error("Invalid matlab function name"); + vector> mexOutput = callMatlab(matlabFunction, numProvides, InputArgs); + // Set output in level + processProvides(mexOutput, this, providesList, currentLevel); +} +template +std::string SingleLevelMatlabFactory::description() const { + std::ostringstream out; + const Teuchos::ParameterList& pL = GetParameterList(); + out << "SingleLevelMatlabFactory[" << pL.get("Function") << "]"; + return out.str(); +} -} //namespace MueLu +} // namespace MueLu #define MUELU_SINGLELEVELMATLABFACTORY_SHORT -#endif // HAVE_MUELU_MATLAB - -#endif // MUELU_SINGLELEVELMATLABFACTORY_DEF_HPP +#endif // HAVE_MUELU_MATLAB +#endif // MUELU_SINGLELEVELMATLABFACTORY_DEF_HPP diff --git a/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory.cpp b/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory.cpp index 6e3e836d0203..38249d1da71e 100644 --- a/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory.cpp +++ b/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory.cpp @@ -44,7 +44,6 @@ // // @HEADER - #include "MueLu_ExplicitInstantiation.hpp" #include "MueLu_TwoLevelMatlabFactory_def.hpp" @@ -52,8 +51,8 @@ #include "TpetraCore_ETIHelperMacros.h" #ifdef HAVE_MUELU_MATLAB -#define MUELU_LOCAL_INSTANT(S,LO,GO,N) \ - template class MueLu::TwoLevelMatlabFactory; +#define MUELU_LOCAL_INSTANT(S, LO, GO, N) \ + template class MueLu::TwoLevelMatlabFactory; TPETRA_ETI_MANGLING_TYPEDEFS() diff --git a/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_decl.hpp b/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_decl.hpp index dde2b72a60a8..2bf2e3a9df98 100644 --- a/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_decl.hpp +++ b/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_decl.hpp @@ -66,67 +66,66 @@ #ifdef HAVE_MUELU_MATLAB #include "mex.h" - namespace MueLu { - /*! - @class TwoLevelMatlabFactory - @ingroup MueMexClasses - @brief Factory for interacting with Matlab - */ - template - class TwoLevelMatlabFactory : public TwoLevelFactoryBase { +/*! + @class TwoLevelMatlabFactory + @ingroup MueMexClasses + @brief Factory for interacting with Matlab +*/ +template +class TwoLevelMatlabFactory : public TwoLevelFactoryBase { #undef MUELU_TWOLEVELMATLABFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - TwoLevelMatlabFactory(); + TwoLevelMatlabFactory(); - virtual ~TwoLevelMatlabFactory() { } + virtual ~TwoLevelMatlabFactory() {} - //@} + //@} - //! @name Input - //@{ - RCP GetValidParameterList() const; + //! @name Input + //@{ + RCP GetValidParameterList() const; - void DeclareInput(Level& fineLevel, Level& coarseLevel) const; + void DeclareInput(Level& fineLevel, Level& coarseLevel) const; - //@} + //@} - //! @name Build methods. - //@{ - void Build(Level& fineLevel, Level& coarseLevel) const; - //@} + //! @name Build methods. + //@{ + void Build(Level& fineLevel, Level& coarseLevel) const; + //@} - //! @ name Description - //@{ - std::string description() const; - //@} - private: - //@{ + //! @ name Description + //@{ + std::string description() const; + //@} + private: + //@{ - mutable bool hasDeclaredInput_; + mutable bool hasDeclaredInput_; - //@} + //@} - //@{ + //@{ - //! List of arguments to the MATLAB function, in order. These args must correspond to MueLu "Needs" objects for the fine level. These must be listed before coarse needs. - mutable std::vector needsFine_; + //! List of arguments to the MATLAB function, in order. These args must correspond to MueLu "Needs" objects for the fine level. These must be listed before coarse needs. + mutable std::vector needsFine_; - //! List of arguments to the MATLAB function, in order. These args must correspond to MueLu "Needs" objects for the coarse level. These must be listed after fine needs. - mutable std::vector needsCoarse_; + //! List of arguments to the MATLAB function, in order. These args must correspond to MueLu "Needs" objects for the coarse level. These must be listed after fine needs. + mutable std::vector needsCoarse_; - //@} + //@} - }; //class TwoLevelMatlabFactory +}; // class TwoLevelMatlabFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_TWOLEVELMATLABFACTORY_SHORT -#endif // HAVE_MUELU_MATLAB -#endif // MUELU TWOLEVELMATLABFACTORY_DECL_HPP +#endif // HAVE_MUELU_MATLAB +#endif // MUELU TWOLEVELMATLABFACTORY_DECL_HPP diff --git a/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_def.hpp b/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_def.hpp index ce98877e7bc8..d221e0cd6dcf 100644 --- a/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_def.hpp +++ b/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_def.hpp @@ -56,88 +56,83 @@ #include - #ifdef HAVE_MUELU_MATLAB #include "mex.h" namespace MueLu { - template - TwoLevelMatlabFactory::TwoLevelMatlabFactory() - : hasDeclaredInput_(false) { } - - template - RCP TwoLevelMatlabFactory::GetValidParameterList() const { - RCP validParamList = getInputParamList(); - validParamList->set("Provides" , "" ,"A comma-separated list of objects provided on the coarse level by the TwoLevelMatlabFactory"); - validParamList->set("Needs Fine" , "", "A comma-separated list of objects needed on the fine level by the TwoLevelMatlabFactory"); - validParamList->set("Needs Coarse" , "", "A comma-separated list of objects needed on the coarse level by the TwoLevelMatlabFactory"); - validParamList->set("Function" , "" , "The name of the Matlab MEX function to call for Build()"); - return validParamList; - } - - template - void TwoLevelMatlabFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { - using namespace std; - const Teuchos::ParameterList& pL = GetParameterList(); - // Get needs strings - const std::string str_nf = pL.get("Needs Fine"); - const std::string str_nc = pL.get("Needs Coarse"); - needsFine_ = tokenizeList(str_nf); - needsCoarse_ = tokenizeList(str_nc); - for(auto fineNeed : needsFine_) - { - if(!IsParamMuemexVariable(fineNeed) && fineNeed != "Level") - this->Input(fineLevel, fineNeed); - } - for(auto coarseNeed : needsCoarse_) - { - if(!IsParamMuemexVariable(coarseNeed) && coarseNeed != "Level") - this->Input(coarseLevel, coarseNeed); - } - hasDeclaredInput_ = true; - } - - template - void TwoLevelMatlabFactory::Build(Level& fineLevel, Level& coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); - - const Teuchos::ParameterList& pL = GetParameterList(); - using Teuchos::rcp; - using Teuchos::RCP; - using namespace std; - string needsFine = pL.get("Needs Fine"); - string needsCoarse = pL.get("Needs Coarse"); - vector> InputArgs = processNeeds(this, needsFine, fineLevel); - vector> InputArgsCoarse = processNeeds(this, needsCoarse, coarseLevel); - //Add coarse args to the end of InputArgs - InputArgs.reserve(InputArgs.size() + InputArgsCoarse.size()); - InputArgs.insert(InputArgs.begin(), InputArgsCoarse.begin(), InputArgsCoarse.end()); - - // Determine output - string provides = pL.get("Provides"); - size_t numProvides = tokenizeList(provides).size(); - // Call mex function - string matlabFunction = pL.get("Function"); - if(!matlabFunction.length()) - throw runtime_error("Invalid matlab function name"); - vector> mexOutput = callMatlab(matlabFunction, numProvides, InputArgs); - processProvides(mexOutput, this, provides, coarseLevel); +template +TwoLevelMatlabFactory::TwoLevelMatlabFactory() + : hasDeclaredInput_(false) {} + +template +RCP TwoLevelMatlabFactory::GetValidParameterList() const { + RCP validParamList = getInputParamList(); + validParamList->set("Provides", "", "A comma-separated list of objects provided on the coarse level by the TwoLevelMatlabFactory"); + validParamList->set("Needs Fine", "", "A comma-separated list of objects needed on the fine level by the TwoLevelMatlabFactory"); + validParamList->set("Needs Coarse", "", "A comma-separated list of objects needed on the coarse level by the TwoLevelMatlabFactory"); + validParamList->set("Function", "", "The name of the Matlab MEX function to call for Build()"); + return validParamList; +} + +template +void TwoLevelMatlabFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { + using namespace std; + const Teuchos::ParameterList& pL = GetParameterList(); + // Get needs strings + const std::string str_nf = pL.get("Needs Fine"); + const std::string str_nc = pL.get("Needs Coarse"); + needsFine_ = tokenizeList(str_nf); + needsCoarse_ = tokenizeList(str_nc); + for (auto fineNeed : needsFine_) { + if (!IsParamMuemexVariable(fineNeed) && fineNeed != "Level") + this->Input(fineLevel, fineNeed); } - - template - std::string TwoLevelMatlabFactory::description() const { - std::ostringstream out; - const Teuchos::ParameterList& pL = GetParameterList(); - out << "TwoLevelMatlabFactory["<("Function")<<"]"; - return out.str(); + for (auto coarseNeed : needsCoarse_) { + if (!IsParamMuemexVariable(coarseNeed) && coarseNeed != "Level") + this->Input(coarseLevel, coarseNeed); } - - -} //namespace MueLu + hasDeclaredInput_ = true; +} + +template +void TwoLevelMatlabFactory::Build(Level& fineLevel, Level& coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); + + const Teuchos::ParameterList& pL = GetParameterList(); + using Teuchos::rcp; + using Teuchos::RCP; + using namespace std; + string needsFine = pL.get("Needs Fine"); + string needsCoarse = pL.get("Needs Coarse"); + vector> InputArgs = processNeeds(this, needsFine, fineLevel); + vector> InputArgsCoarse = processNeeds(this, needsCoarse, coarseLevel); + // Add coarse args to the end of InputArgs + InputArgs.reserve(InputArgs.size() + InputArgsCoarse.size()); + InputArgs.insert(InputArgs.begin(), InputArgsCoarse.begin(), InputArgsCoarse.end()); + + // Determine output + string provides = pL.get("Provides"); + size_t numProvides = tokenizeList(provides).size(); + // Call mex function + string matlabFunction = pL.get("Function"); + if (!matlabFunction.length()) + throw runtime_error("Invalid matlab function name"); + vector> mexOutput = callMatlab(matlabFunction, numProvides, InputArgs); + processProvides(mexOutput, this, provides, coarseLevel); +} + +template +std::string TwoLevelMatlabFactory::description() const { + std::ostringstream out; + const Teuchos::ParameterList& pL = GetParameterList(); + out << "TwoLevelMatlabFactory[" << pL.get("Function") << "]"; + return out.str(); +} + +} // namespace MueLu #define MUELU_TWOLEVELMATLABFACTORY_SHORT -#endif // HAVE_MUELU_MATLAB - -#endif // MUELU_TWOLEVELMATLABFACTORY_DEF_HPP +#endif // HAVE_MUELU_MATLAB +#endif // MUELU_TWOLEVELMATLABFACTORY_DEF_HPP diff --git a/packages/muelu/research/caglusa/MueLu_IOhelpers.hpp b/packages/muelu/research/caglusa/MueLu_IOhelpers.hpp index 4b67209a05a2..4bd682c4844a 100644 --- a/packages/muelu/research/caglusa/MueLu_IOhelpers.hpp +++ b/packages/muelu/research/caglusa/MueLu_IOhelpers.hpp @@ -4,137 +4,131 @@ #include #include - namespace MueLu { - using Teuchos::RCP; - using Teuchos::rcp; - using Teuchos::rcp_dynamic_cast; - - template - struct IOhelpers { - - static - Teuchos::RCP > - Read(const std::string& filename, - const RCP > rowMap, - RCP > colMap, - const RCP > domainMap = Teuchos::null, - const RCP > rangeMap = Teuchos::null, - const bool callFillComplete = true, - const bool binary = false, - const bool readLocal = false) { - using IO = Xpetra::IO; - Teuchos::RCP > A; - if (readLocal) - A = IO::ReadLocal(filename, rowMap, colMap, domainMap, rangeMap, callFillComplete, binary); - else - A = IO::Read(filename, rowMap, colMap, domainMap, rangeMap, callFillComplete, binary); - return A; +using Teuchos::RCP; +using Teuchos::rcp; +using Teuchos::rcp_dynamic_cast; + +template +struct IOhelpers { + static Teuchos::RCP > + Read(const std::string& filename, + const RCP > rowMap, + RCP > colMap, + const RCP > domainMap = Teuchos::null, + const RCP > rangeMap = Teuchos::null, + const bool callFillComplete = true, + const bool binary = false, + const bool readLocal = false) { + using IO = Xpetra::IO; + Teuchos::RCP > A; + if (readLocal) + A = IO::ReadLocal(filename, rowMap, colMap, domainMap, rangeMap, callFillComplete, binary); + else + A = IO::Read(filename, rowMap, colMap, domainMap, rangeMap, callFillComplete, binary); + return A; + } + + static Teuchos::RCP > + Read(std::string& filename, + RCP >& comm) { + Teuchos::ParameterList hierarchicalParams; + Teuchos::updateParametersFromXmlFileAndBroadcast(filename, Teuchos::Ptr(&hierarchicalParams), *comm); + auto op = Read(hierarchicalParams, comm); + return op; + } + + static Teuchos::RCP > + Read(Teuchos::ParameterList& hierarchicalParams, + RCP >& comm) { + using HOp = Xpetra::HierarchicalOperator; + using blocked_matrix_type = typename HOp::blocked_matrix_type; + using blocked_map_type = typename blocked_matrix_type::blocked_map_type; + using matrix_type = typename HOp::matrix_type; + using map_type = typename HOp::map_type; + using lo_vec_type = typename blocked_map_type::lo_vec_type; + + auto lib = Xpetra::UseTpetra; + RCP op; + RCP map, near_colmap, clusterCoeffMap, ghosted_clusterCoeffMap, clusterMap, ghosted_clusterMap; + RCP nearField, basisMatrix, kernelApproximations, kernelBlockGraph; + + std::vector > transferMatrices; + RCP clusterSizes; + RCP blockedClusterMap, ghosted_blockedClusterMap; + RCP blockKernelApproximations; + + const bool readBinary = hierarchicalParams.get("read binary", false); + const bool readLocal = hierarchicalParams.get("read local", false); + + using IO = Xpetra::IO; + + // row, domain and range map of the operator + map = IO::ReadMap(hierarchicalParams.get("map"), lib, comm, readBinary); + // colmap of near field + near_colmap = IO::ReadMap(hierarchicalParams.get("near colmap"), lib, comm, readBinary); + if (hierarchicalParams.isType("coefficient map")) { + // 1-to-1 map for the cluster coefficients + clusterCoeffMap = IO::ReadMap(hierarchicalParams.get("coefficient map"), lib, comm, readBinary); + // overlapping map for the cluster coefficients + ghosted_clusterCoeffMap = IO::ReadMap(hierarchicalParams.get("ghosted coefficient map"), lib, comm, readBinary); + // 1-to-1 map for the clusters + clusterMap = IO::ReadMap(hierarchicalParams.get("cluster map"), lib, comm, readBinary); + // overlapping map for the clusters + ghosted_clusterMap = IO::ReadMap(hierarchicalParams.get("ghosted cluster map"), lib, comm, readBinary); + + // blocked cluster map + clusterSizes = Xpetra::IO::ReadMultiVector(hierarchicalParams.get("gid_cluster_to_gid_coeff"), clusterMap)->getVectorNonConst(0); + blockedClusterMap = rcp(new blocked_map_type(clusterCoeffMap, clusterSizes)); } - static - Teuchos::RCP > - Read(std::string& filename, - RCP< const Teuchos::Comm >& comm) { - Teuchos::ParameterList hierarchicalParams; - Teuchos::updateParametersFromXmlFileAndBroadcast(filename, Teuchos::Ptr(&hierarchicalParams), *comm); - auto op = Read(hierarchicalParams, comm); - return op; - } + // near field interactions + nearField = Read(hierarchicalParams.get("near field matrix"), map, near_colmap, map, map, true, readBinary, readLocal); - static - Teuchos::RCP > - Read(Teuchos::ParameterList& hierarchicalParams, - RCP< const Teuchos::Comm >& comm) { - using HOp = Xpetra::HierarchicalOperator; - using blocked_matrix_type = typename HOp::blocked_matrix_type; - using blocked_map_type = typename blocked_matrix_type::blocked_map_type; - using matrix_type = typename HOp::matrix_type; - using map_type = typename HOp::map_type; - using lo_vec_type = typename blocked_map_type::lo_vec_type; - - auto lib = Xpetra::UseTpetra; - RCP op; - RCP map, near_colmap, clusterCoeffMap, ghosted_clusterCoeffMap, clusterMap, ghosted_clusterMap; - RCP nearField, basisMatrix, kernelApproximations, kernelBlockGraph; - - std::vector > transferMatrices; - RCP clusterSizes; - RCP blockedClusterMap, ghosted_blockedClusterMap; - RCP blockKernelApproximations; - - const bool readBinary = hierarchicalParams.get("read binary", false); - const bool readLocal = hierarchicalParams.get("read local", false); - - using IO = Xpetra::IO; - - // row, domain and range map of the operator - map = IO::ReadMap(hierarchicalParams.get("map"), lib, comm, readBinary); - // colmap of near field - near_colmap = IO::ReadMap(hierarchicalParams.get("near colmap"), lib, comm, readBinary); - if (hierarchicalParams.isType("coefficient map")) { - // 1-to-1 map for the cluster coefficients - clusterCoeffMap = IO::ReadMap(hierarchicalParams.get("coefficient map"), lib, comm, readBinary); - // overlapping map for the cluster coefficients - ghosted_clusterCoeffMap = IO::ReadMap(hierarchicalParams.get("ghosted coefficient map"), lib, comm, readBinary); - // 1-to-1 map for the clusters - clusterMap = IO::ReadMap(hierarchicalParams.get("cluster map"), lib, comm, readBinary); - // overlapping map for the clusters - ghosted_clusterMap = IO::ReadMap(hierarchicalParams.get("ghosted cluster map"), lib, comm, readBinary); - - // blocked cluster map - clusterSizes = Xpetra::IO::ReadMultiVector(hierarchicalParams.get("gid_cluster_to_gid_coeff"), clusterMap)->getVectorNonConst(0); - blockedClusterMap = rcp(new blocked_map_type(clusterCoeffMap, clusterSizes)); - } + if (hierarchicalParams.isType("coefficient map")) { + // far field basis expansion coefficients + basisMatrix = IOhelpers::Read(hierarchicalParams.get("basis expansion coefficient matrix"), map, clusterCoeffMap, clusterCoeffMap, map, true, readBinary, readLocal); - // near field interactions - nearField = Read(hierarchicalParams.get("near field matrix"), map, near_colmap, map, map, true, readBinary, readLocal); - - if (hierarchicalParams.isType("coefficient map")) { - // far field basis expansion coefficients - basisMatrix = IOhelpers::Read(hierarchicalParams.get("basis expansion coefficient matrix"), map, clusterCoeffMap, clusterCoeffMap, map, true, readBinary, readLocal); - - // far field interactions - kernelApproximations = IOhelpers::Read(hierarchicalParams.get("far field interaction matrix"), clusterCoeffMap, ghosted_clusterCoeffMap, clusterCoeffMap, clusterCoeffMap, true, readBinary, readLocal); - // block graph of far field interactions - kernelBlockGraph = IOhelpers::Read(hierarchicalParams.get("far field interaction matrix")+".block", clusterMap, ghosted_clusterMap, clusterMap, clusterMap, true, readBinary, readLocal); - - { - auto import = kernelBlockGraph->getCrsGraph()->getImporter(); - RCP ghosted_clusterSizes = Xpetra::VectorFactory::Build(ghosted_clusterMap); - ghosted_clusterSizes->doImport(*clusterSizes, *import, Xpetra::INSERT); - ghosted_blockedClusterMap = rcp(new blocked_map_type(ghosted_clusterCoeffMap, ghosted_clusterSizes)); - } - - blockKernelApproximations = rcp(new blocked_matrix_type(kernelApproximations, kernelBlockGraph, blockedClusterMap, ghosted_blockedClusterMap)); - - // Transfer matrices - auto transfersList = hierarchicalParams.sublist("shift coefficient matrices"); - for (int i = 0; i < transfersList.numParams(); i++) { - std::string filename = transfersList.get(std::to_string(i)); - auto transferPoint = IOhelpers::Read(filename, clusterCoeffMap, clusterCoeffMap, clusterCoeffMap, clusterCoeffMap, true, readBinary, readLocal); - auto transferBlock = IOhelpers::Read(filename+".block", clusterMap, clusterMap, clusterMap, clusterMap, true, readBinary, readLocal); - auto transfer = rcp(new blocked_matrix_type(transferPoint, transferBlock, blockedClusterMap)); - transferMatrices.push_back(transfer); - } - } + // far field interactions + kernelApproximations = IOhelpers::Read(hierarchicalParams.get("far field interaction matrix"), clusterCoeffMap, ghosted_clusterCoeffMap, clusterCoeffMap, clusterCoeffMap, true, readBinary, readLocal); + // block graph of far field interactions + kernelBlockGraph = IOhelpers::Read(hierarchicalParams.get("far field interaction matrix") + ".block", clusterMap, ghosted_clusterMap, clusterMap, clusterMap, true, readBinary, readLocal); - RCP params; - if (hierarchicalParams.isSublist("params")) { - params = rcp(new Teuchos::ParameterList(hierarchicalParams.sublist("params"))); + { + auto import = kernelBlockGraph->getCrsGraph()->getImporter(); + RCP ghosted_clusterSizes = Xpetra::VectorFactory::Build(ghosted_clusterMap); + ghosted_clusterSizes->doImport(*clusterSizes, *import, Xpetra::INSERT); + ghosted_blockedClusterMap = rcp(new blocked_map_type(ghosted_clusterCoeffMap, ghosted_clusterSizes)); } - if (hierarchicalParams.isType("coefficient map")) { - op = rcp(new HOp(nearField, blockKernelApproximations, basisMatrix, transferMatrices, params)); + blockKernelApproximations = rcp(new blocked_matrix_type(kernelApproximations, kernelBlockGraph, blockedClusterMap, ghosted_blockedClusterMap)); - return op; - } else - return nearField; + // Transfer matrices + auto transfersList = hierarchicalParams.sublist("shift coefficient matrices"); + for (int i = 0; i < transfersList.numParams(); i++) { + std::string filename = transfersList.get(std::to_string(i)); + auto transferPoint = IOhelpers::Read(filename, clusterCoeffMap, clusterCoeffMap, clusterCoeffMap, clusterCoeffMap, true, readBinary, readLocal); + auto transferBlock = IOhelpers::Read(filename + ".block", clusterMap, clusterMap, clusterMap, clusterMap, true, readBinary, readLocal); + auto transfer = rcp(new blocked_matrix_type(transferPoint, transferBlock, blockedClusterMap)); + transferMatrices.push_back(transfer); + } + } + + RCP params; + if (hierarchicalParams.isSublist("params")) { + params = rcp(new Teuchos::ParameterList(hierarchicalParams.sublist("params"))); } - }; + if (hierarchicalParams.isType("coefficient map")) { + op = rcp(new HOp(nearField, blockKernelApproximations, basisMatrix, transferMatrices, params)); + + return op; + } else + return nearField; + } +}; -} +} // namespace MueLu #endif diff --git a/packages/muelu/research/caglusa/Tpetra_BlockedMap_decl.hpp b/packages/muelu/research/caglusa/Tpetra_BlockedMap_decl.hpp index 497294a2e77e..a04d34127bd2 100644 --- a/packages/muelu/research/caglusa/Tpetra_BlockedMap_decl.hpp +++ b/packages/muelu/research/caglusa/Tpetra_BlockedMap_decl.hpp @@ -1,37 +1,33 @@ #ifndef TPETRA_BLOCKEDMAP_DECL_HPP #define TPETRA_BLOCKEDMAP_DECL_HPP - #include #include #include #include - namespace Tpetra { - template - class BlockedMap { - - public: - using map_type = Tpetra::Map; - using lo_vec_type = Tpetra::Vector; - - BlockedMap(const Teuchos::RCP& pointMap, - const Teuchos::RCP& blockSizes); - - // private: - Teuchos::RCP pointMap_; - Teuchos::RCP blockMap_; - Teuchos::RCP blockSizes_; - Kokkos::View offsets_; - LocalOrdinal minClusterSize_; - LocalOrdinal maxClusterSize_; - - }; - -} - -#endif // TPETRA_BLOCKEDMAP_DECL_HPP +template +class BlockedMap { + public: + using map_type = Tpetra::Map; + using lo_vec_type = Tpetra::Vector; + + BlockedMap(const Teuchos::RCP& pointMap, + const Teuchos::RCP& blockSizes); + + // private: + Teuchos::RCP pointMap_; + Teuchos::RCP blockMap_; + Teuchos::RCP blockSizes_; + Kokkos::View offsets_; + LocalOrdinal minClusterSize_; + LocalOrdinal maxClusterSize_; +}; + +} // namespace Tpetra + +#endif // TPETRA_BLOCKEDMAP_DECL_HPP diff --git a/packages/muelu/research/caglusa/Tpetra_BlockedMap_def.hpp b/packages/muelu/research/caglusa/Tpetra_BlockedMap_def.hpp index 49e63cc99023..e654a557db41 100644 --- a/packages/muelu/research/caglusa/Tpetra_BlockedMap_def.hpp +++ b/packages/muelu/research/caglusa/Tpetra_BlockedMap_def.hpp @@ -3,37 +3,34 @@ #include - namespace Tpetra { - template - BlockedMap:: - BlockedMap(const Teuchos::RCP& pointMap, - const Teuchos::RCP& blockSizes) - : - pointMap_(pointMap), - blockMap_(blockSizes->getMap()), - blockSizes_(blockSizes) - { - auto lclBlockSizes = blockSizes_->getLocalViewHost(Tpetra::Access::ReadOnly); - LocalOrdinal minClusterSize = Teuchos::OrdinalTraits::max(); - LocalOrdinal maxClusterSize = 0; - offsets_ = Kokkos::View("offsets", blockMap_->getLocalNumElements()+1); - auto offsets_h = Kokkos::create_mirror_view(offsets_); - offsets_h(0) = 0; - for (size_t blockNum = 0; blockNum < blockMap_->getLocalNumElements(); ++blockNum) { - offsets_h(blockNum+1) = offsets_h(blockNum) + lclBlockSizes(blockNum, 0); - minClusterSize = std::min(minClusterSize, lclBlockSizes(blockNum, 0)); - maxClusterSize = std::max(maxClusterSize, lclBlockSizes(blockNum, 0)); - } - Kokkos::deep_copy(offsets_, offsets_h); - TEUCHOS_ASSERT_EQUALITY(offsets_h(blockMap_->getLocalNumElements()), pointMap->getLocalNumElements()); - minClusterSize_ = minClusterSize; - maxClusterSize_ = maxClusterSize; +template +BlockedMap:: + BlockedMap(const Teuchos::RCP& pointMap, + const Teuchos::RCP& blockSizes) + : pointMap_(pointMap) + , blockMap_(blockSizes->getMap()) + , blockSizes_(blockSizes) { + auto lclBlockSizes = blockSizes_->getLocalViewHost(Tpetra::Access::ReadOnly); + LocalOrdinal minClusterSize = Teuchos::OrdinalTraits::max(); + LocalOrdinal maxClusterSize = 0; + offsets_ = Kokkos::View("offsets", blockMap_->getLocalNumElements() + 1); + auto offsets_h = Kokkos::create_mirror_view(offsets_); + offsets_h(0) = 0; + for (size_t blockNum = 0; blockNum < blockMap_->getLocalNumElements(); ++blockNum) { + offsets_h(blockNum + 1) = offsets_h(blockNum) + lclBlockSizes(blockNum, 0); + minClusterSize = std::min(minClusterSize, lclBlockSizes(blockNum, 0)); + maxClusterSize = std::max(maxClusterSize, lclBlockSizes(blockNum, 0)); } - + Kokkos::deep_copy(offsets_, offsets_h); + TEUCHOS_ASSERT_EQUALITY(offsets_h(blockMap_->getLocalNumElements()), pointMap->getLocalNumElements()); + minClusterSize_ = minClusterSize; + maxClusterSize_ = maxClusterSize; } -#endif // TPETRA_BLOCKEDMAP_DEF_HPP +} // namespace Tpetra + +#endif // TPETRA_BLOCKEDMAP_DEF_HPP diff --git a/packages/muelu/research/caglusa/Tpetra_BlockedMatrix_decl.hpp b/packages/muelu/research/caglusa/Tpetra_BlockedMatrix_decl.hpp index 8a4554d7ea5e..5242f96853fa 100644 --- a/packages/muelu/research/caglusa/Tpetra_BlockedMatrix_decl.hpp +++ b/packages/muelu/research/caglusa/Tpetra_BlockedMatrix_decl.hpp @@ -6,45 +6,42 @@ #include #include - namespace Tpetra { - template ::scalar_type, - class LocalOrdinal = typename Tpetra::Operator::local_ordinal_type, - class GlobalOrdinal = typename Tpetra::Operator::global_ordinal_type, - class Node = typename Tpetra::Operator::node_type> - class BlockedMatrix { - - public: - using matrix_type = Tpetra::CrsMatrix; - using blocked_map_type = BlockedMap; - using lo_vec_type = typename blocked_map_type::lo_vec_type; - - BlockedMatrix(const Teuchos::RCP& pointA, - const Teuchos::RCP& blockA, - const Teuchos::RCP& blockMap, - const Teuchos::RCP& ghosted_blockMap=Teuchos::null); - - void apply(const Tpetra::MultiVector& X, - Tpetra::MultiVector& Y, - Teuchos::ETransp mode = Teuchos::NO_TRANS, - Scalar alpha = Teuchos::ScalarTraits::one(), - Scalar beta = Teuchos::ScalarTraits::zero()) const; - - void localApply(const Tpetra::MultiVector& X, - Tpetra::MultiVector& Y, - Teuchos::ETransp mode = Teuchos::NO_TRANS, - Scalar alpha = Teuchos::ScalarTraits::one(), - Scalar beta = Teuchos::ScalarTraits::zero()) const; +template ::scalar_type, + class LocalOrdinal = typename Tpetra::Operator::local_ordinal_type, + class GlobalOrdinal = typename Tpetra::Operator::global_ordinal_type, + class Node = typename Tpetra::Operator::node_type> +class BlockedMatrix { + public: + using matrix_type = Tpetra::CrsMatrix; + using blocked_map_type = BlockedMap; + using lo_vec_type = typename blocked_map_type::lo_vec_type; + + BlockedMatrix(const Teuchos::RCP& pointA, + const Teuchos::RCP& blockA, + const Teuchos::RCP& blockMap, + const Teuchos::RCP& ghosted_blockMap = Teuchos::null); + + void apply(const Tpetra::MultiVector& X, + Tpetra::MultiVector& Y, + Teuchos::ETransp mode = Teuchos::NO_TRANS, + Scalar alpha = Teuchos::ScalarTraits::one(), + Scalar beta = Teuchos::ScalarTraits::zero()) const; + + void localApply(const Tpetra::MultiVector& X, + Tpetra::MultiVector& Y, + Teuchos::ETransp mode = Teuchos::NO_TRANS, + Scalar alpha = Teuchos::ScalarTraits::one(), + Scalar beta = Teuchos::ScalarTraits::zero()) const; // private: - Teuchos::RCP pointA_; - Teuchos::RCP blockA_; - Teuchos::RCP blockMap_; - Teuchos::RCP ghosted_blockMap_; - - }; + Teuchos::RCP pointA_; + Teuchos::RCP blockA_; + Teuchos::RCP blockMap_; + Teuchos::RCP ghosted_blockMap_; +}; -} +} // namespace Tpetra -#endif // TPETRA_BLOCKEDMATRIX_DECL_HPP +#endif // TPETRA_BLOCKEDMATRIX_DECL_HPP diff --git a/packages/muelu/research/caglusa/Tpetra_BlockedMatrix_def.hpp b/packages/muelu/research/caglusa/Tpetra_BlockedMatrix_def.hpp index 398a2fdff680..392df660e046 100644 --- a/packages/muelu/research/caglusa/Tpetra_BlockedMatrix_def.hpp +++ b/packages/muelu/research/caglusa/Tpetra_BlockedMatrix_def.hpp @@ -1,61 +1,55 @@ #ifndef TPETRA_BLOCKEDMATRIX_DEF_HPP #define TPETRA_BLOCKEDMATRIX_DEF_HPP - namespace Tpetra { - template - BlockedMatrix:: - BlockedMatrix(const Teuchos::RCP& pointA, - const Teuchos::RCP& blockA, - const Teuchos::RCP& blockMap, - const Teuchos::RCP& ghosted_blockMap) - : - pointA_(pointA), - blockA_(blockA), - blockMap_(blockMap), - ghosted_blockMap_(ghosted_blockMap) - { - TEUCHOS_ASSERT(blockA_->getDomainMap()->isSameAs(*blockA_->getRangeMap())); - TEUCHOS_ASSERT(blockA_->getDomainMap()->isSameAs(*blockA_->getRowMap())); - TEUCHOS_ASSERT(blockA_->getDomainMap()->isSameAs(*blockMap_->blockMap_)); - - TEUCHOS_ASSERT(pointA_->getDomainMap()->isSameAs(*pointA_->getRangeMap())); - TEUCHOS_ASSERT(pointA_->getDomainMap()->isSameAs(*pointA_->getRowMap())); - TEUCHOS_ASSERT(pointA_->getDomainMap()->isSameAs(*blockMap_->pointMap_)); - - } +template +BlockedMatrix:: + BlockedMatrix(const Teuchos::RCP& pointA, + const Teuchos::RCP& blockA, + const Teuchos::RCP& blockMap, + const Teuchos::RCP& ghosted_blockMap) + : pointA_(pointA) + , blockA_(blockA) + , blockMap_(blockMap) + , ghosted_blockMap_(ghosted_blockMap) { + TEUCHOS_ASSERT(blockA_->getDomainMap()->isSameAs(*blockA_->getRangeMap())); + TEUCHOS_ASSERT(blockA_->getDomainMap()->isSameAs(*blockA_->getRowMap())); + TEUCHOS_ASSERT(blockA_->getDomainMap()->isSameAs(*blockMap_->blockMap_)); + + TEUCHOS_ASSERT(pointA_->getDomainMap()->isSameAs(*pointA_->getRangeMap())); + TEUCHOS_ASSERT(pointA_->getDomainMap()->isSameAs(*pointA_->getRowMap())); + TEUCHOS_ASSERT(pointA_->getDomainMap()->isSameAs(*blockMap_->pointMap_)); +} - template - void - BlockedMatrix:: - apply(const Tpetra::MultiVector& X, - Tpetra::MultiVector& Y, - Teuchos::ETransp mode, - Scalar alpha, - Scalar beta) const { - pointA_->apply(X, Y, mode, alpha, beta); - } +template +void BlockedMatrix:: + apply(const Tpetra::MultiVector& X, + Tpetra::MultiVector& Y, + Teuchos::ETransp mode, + Scalar alpha, + Scalar beta) const { + pointA_->apply(X, Y, mode, alpha, beta); +} - template - void - BlockedMatrix:: - localApply(const Tpetra::MultiVector& X, - Tpetra::MultiVector& Y, - Teuchos::ETransp mode, - Scalar alpha, - Scalar beta) const { - pointA_->localApply(X, Y, mode, alpha, beta); - } +template +void BlockedMatrix:: + localApply(const Tpetra::MultiVector& X, + Tpetra::MultiVector& Y, + Teuchos::ETransp mode, + Scalar alpha, + Scalar beta) const { + pointA_->localApply(X, Y, mode, alpha, beta); } +} // namespace Tpetra -#endif // TPETRA_BLOCKEDMATRIX_DEF_HPP +#endif // TPETRA_BLOCKEDMATRIX_DEF_HPP diff --git a/packages/muelu/research/caglusa/Tpetra_HierarchicalOperator_decl.hpp b/packages/muelu/research/caglusa/Tpetra_HierarchicalOperator_decl.hpp index c96c286a439e..c583d7e05127 100644 --- a/packages/muelu/research/caglusa/Tpetra_HierarchicalOperator_decl.hpp +++ b/packages/muelu/research/caglusa/Tpetra_HierarchicalOperator_decl.hpp @@ -9,368 +9,364 @@ #include #include - namespace Tpetra { - /* - - A container class for hierarchical matrices of different types. - In particular, both H- and H2-matrices are supported. - - The unknowns of the kernel approximations are collected in the clusterMap. - For H-matrices, this is just a concatenation. - For H2-matrices, the map also contains the intermediate clusters that might be needed in upward/downward pass. - - - H = nearField - + basisMatrix * - ((I+transferMatrices[K-1]^T) * ... * (I+transferMatrices[0]^T)) * - kernelApproximations * - ((I+transferMatrices[0]) * ... * (I+transferMatrices[K-1])) * - basisMatrix^T - - nearField and basisMatrix are standard (point) CRS matrices. - kernelApproximations and transferMatrices[.] are blocked CRS matrices - - I is the identity matrix and is not explicitely saved. - - Maps: - map (standard): domain and range of H; - domain, range, row of nearField - clusterMap (blocked map): domain, range, row, column of transferMatrices; - domain, range, row of kernelApproximations - ghosted_clusterMap (blocked map): column of kernelApproximations - - - For H-matrices: - K = 0, i.e. there are no transfer matrices - - For H2-matrices: - upward and downward pass in the cluster hierarchy are encoded in transfer matrices - */ - - template ::scalar_type, - class LocalOrdinal = typename Tpetra::Operator::local_ordinal_type, - class GlobalOrdinal = typename Tpetra::Operator::global_ordinal_type, - class Node = typename Tpetra::Operator::node_type> - class HierarchicalOperator : public Tpetra::RowMatrix { - - public: - using matrix_type = Tpetra::CrsMatrix; - using mv_type = Tpetra::MultiVector; - using map_type = Tpetra::Map; - - //! The RowMatrix representing the base class of CrsMatrix - using row_matrix_type = RowMatrix; - - using impl_scalar_type = typename row_matrix_type::impl_scalar_type; - using mag_type = typename Kokkos::ArithTraits::mag_type; - - using local_inds_device_view_type = - typename row_matrix_type::local_inds_device_view_type; - using local_inds_host_view_type = - typename row_matrix_type::local_inds_host_view_type; - using nonconst_local_inds_host_view_type = - typename row_matrix_type::nonconst_local_inds_host_view_type; - - using global_inds_device_view_type = - typename row_matrix_type::global_inds_device_view_type; - using global_inds_host_view_type = - typename row_matrix_type::global_inds_host_view_type; - using nonconst_global_inds_host_view_type = - typename row_matrix_type::nonconst_global_inds_host_view_type; - - using values_device_view_type = - typename row_matrix_type::values_device_view_type; - using values_host_view_type = - typename row_matrix_type::values_host_view_type; - using nonconst_values_host_view_type = - typename row_matrix_type::nonconst_values_host_view_type; - - using blocked_matrix_type = BlockedMatrix; - using blocked_map_type = BlockedMap; - - //! @name Constructor/Destructor - //@{ - - //! Constructor - HierarchicalOperator(const Teuchos::RCP& nearField, - const Teuchos::RCP& kernelApproximations, - const Teuchos::RCP& basisMatrix, - std::vector >& transferMatrices, - const Teuchos::RCP& params=Teuchos::null); - - //! Returns the Tpetra::Map object associated with the domain of this operator. - Teuchos::RCP > getDomainMap() const { - return nearField_->getDomainMap(); - } - - //! Returns the Tpetra::Map object associated with the range of this operator. - Teuchos::RCP > getRangeMap() const { - return nearField_->getRangeMap(); - } - - //! Returns in Y the result of a Tpetra::Operator applied to a Tpetra::MultiVector X. - /*! - \param[in] X - Tpetra::MultiVector of dimension NumVectors to multiply with matrix. - \param[out] Y -Tpetra::MultiVector of dimension NumVectors containing result. - */ - void apply(const Tpetra::MultiVector& X, - Tpetra::MultiVector& Y, - Teuchos::ETransp mode = Teuchos::NO_TRANS, - Scalar alpha = Teuchos::ScalarTraits::one(), - Scalar beta = Teuchos::ScalarTraits::zero()) const; - - - Teuchos::RCP > restrict(const Teuchos::RCP& P); - - Teuchos::RCP toMatrix(); - - double getCompression() { - size_t nnz = (nearField_->getGlobalNumEntries() + - kernelApproximations_->pointA_->getGlobalNumEntries() + - basisMatrix_->getGlobalNumEntries()); - for (size_t i = 0; i < transferMatrices_.size(); i++) - nnz += transferMatrices_[i]->pointA_->getGlobalNumEntries(); - return Teuchos::as(nnz) / (getDomainMap()->getGlobalNumElements()*getDomainMap()->getGlobalNumElements()); - } - - Teuchos::RCP nearFieldMatrix() { - return nearField_; - } - - // Fake RowMatrix interface - Teuchos::RCP > getRowMap() const { - return nearField_->getRowMap(); - } - - Teuchos::RCP > getColMap() const { - return nearField_->getColMap(); - } - - Teuchos::RCP > getComm() const { - return nearField_->getDomainMap()->getComm(); - } - - Teuchos::RCP > getGraph() const { - return nearField_->getCrsGraph(); - } - - global_size_t getGlobalNumRows() const { - return nearField_->getGlobalNumRows(); - } - - global_size_t getGlobalNumCols() const { - return nearField_->getGlobalNumCols(); - } - - size_t getLocalNumRows() const { - return nearField_->getLocalNumRows(); - } - - size_t getLocalNumCols() const { - return nearField_->getLocalNumCols(); - } - - GlobalOrdinal getIndexBase() const { - return nearField_->getIndexBase(); - } - - global_size_t getGlobalNumEntries() const { - return nearField_->getGlobalNumEntries(); - } - - size_t getLocalNumEntries() const { - return nearField_->getLocalNumEntries(); - } - - size_t getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const { - throw std::runtime_error("Not implemented."); - } - - size_t getNumEntriesInLocalRow (LocalOrdinal localRow) const { - throw std::runtime_error("Not implemented."); - } - - size_t getGlobalMaxNumRowEntries () const { - throw std::runtime_error("Not implemented."); - } - - LocalOrdinal getBlockSize () const { - throw std::runtime_error("Not implemented."); - } - - size_t getLocalMaxNumRowEntries () const { - throw std::runtime_error("Not implemented."); - } - - bool hasColMap () const { - return false; - } - - bool isLocallyIndexed() const { - return true; - } - - bool isGloballyIndexed() const { - return true; - } - - bool isFillComplete() const { - return true; - } - - bool supportsRowViews() const { - return false; - } - - void - getGlobalRowCopy (GlobalOrdinal GlobalRow, - nonconst_global_inds_host_view_type &Indices, - nonconst_values_host_view_type &Values, - size_t& NumEntries) const { - throw std::runtime_error("Not implemented."); - } - - void - getLocalRowCopy (LocalOrdinal LocalRow, - nonconst_local_inds_host_view_type &Indices, - nonconst_values_host_view_type &Values, - size_t& NumEntries) const { - throw std::runtime_error("Not implemented."); - } - - void - getGlobalRowView (GlobalOrdinal GlobalRow, - global_inds_host_view_type &indices, - values_host_view_type &values) const { - throw std::runtime_error("Not implemented."); - } - - void - getLocalRowView (LocalOrdinal LocalRow, - local_inds_host_view_type & indices, - values_host_view_type & values) const { - throw std::runtime_error("Not implemented."); - } - - void getLocalDiagCopy (Vector &diag) const { - nearField_->getLocalDiagCopy(diag); - } - - void leftScale (const Vector& x) { - throw std::runtime_error("Not implemented."); - } - - void rightScale (const Vector& x) { - throw std::runtime_error("Not implemented."); - } - - mag_type getFrobeniusNorm() const { - return 0.; - } - - void describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel) const { - describe(out, verbLevel, true); - } - - void describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel, const bool printHeader) const { - using std::setw; - using std::endl; - const size_t numRows = nearField_->getRowMap()->getGlobalNumElements(); - const size_t nnzNearField = nearField_->getGlobalNumEntries(); - const double nnzNearPerRow = Teuchos::as(nnzNearField)/numRows; - const size_t nnzKernelApprox = kernelApproximations_->pointA_->getGlobalNumEntries(); - const size_t numClusterPairs = kernelApproximations_->blockA_->getGlobalNumEntries(); - const size_t nnzBasis = basisMatrix_->getGlobalNumEntries(); - size_t numTransfers = transferMatrices_.size(); - size_t nnzTransfer = 0; - for (size_t i = 0; ipointA_->getGlobalNumEntries(); - const size_t nnzTotal = nnzNearField+nnzKernelApprox+nnzBasis+nnzTransfer; - const double nnzTotalPerRow = Teuchos::as(nnzTotal)/numRows; - std::ostringstream oss; - oss << std::left; - if (printHeader) - oss << setw(9) << "rows" << setw(12) - << "nnz(near)" << setw(14) - << "nnz(near)/row" << setw(12) - << "nnz(basis)" << setw(15) - << "#cluster pairs" << setw(12) - << "nnz(kernel)" << setw(14) - << "#transfers" << setw(14) - << "nnz(transfer)" << setw(12) - << "nnz(total)" << setw(14) - << "nnz(total)/row" << endl; - oss << setw(9) << numRows << setw(12) - << nnzNearField << setw(14) - << nnzNearPerRow << setw(12) - << nnzBasis << setw(15) - << numClusterPairs << setw(12) - << nnzKernelApprox << setw(14) - << numTransfers << setw(14) - << nnzTransfer << setw(12) - << nnzTotal << setw(14) - << nnzTotalPerRow << endl; - out << oss.str(); - } - - bool hasFarField() const { - return kernelApproximations_->blockA_->getGlobalNumEntries() > 0; - } - - bool hasTransferMatrices() const { - return transferMatrices_.size() > 0; - } - - bool denserThanDenseMatrix() const { - const size_t numRows = nearField_->getRowMap()->getGlobalNumElements(); - const size_t nnzNearField = nearField_->getGlobalNumEntries(); - // const double nnzNearPerRow = Teuchos::as(nnzNearField)/numRows; - const size_t nnzKernelApprox = kernelApproximations_->pointA_->getGlobalNumEntries(); - // const size_t numClusterPairs = kernelApproximations_->blockA_->getGlobalNumEntries(); - const size_t nnzBasis = basisMatrix_->getGlobalNumEntries(); - size_t nnzTransfer = 0; - for (size_t i = 0; ipointA_->getGlobalNumEntries(); - const size_t nnzTotal = nnzNearField+nnzKernelApprox+nnzBasis+nnzTransfer; - const double nnzTotalPerRow = Teuchos::as(nnzTotal)/numRows; - - return (nnzTotalPerRow >= numRows); - } - - private: - - void allocateMemory(size_t numVectors) const; - - void applyWithTransposes(const Tpetra::MultiVector& X, - Tpetra::MultiVector& Y, - Teuchos::ETransp mode = Teuchos::NO_TRANS, - Scalar alpha = Teuchos::ScalarTraits::one(), - Scalar beta = Teuchos::ScalarTraits::zero()) const; - - void applyWithoutTransposes(const Tpetra::MultiVector& X, - Tpetra::MultiVector& Y, - Teuchos::ETransp mode = Teuchos::NO_TRANS, - Scalar alpha = Teuchos::ScalarTraits::one(), - Scalar beta = Teuchos::ScalarTraits::zero()) const; - - bool canApplyWithoutTransposes_; - std::string coarseningCriterion_; - bool debugOutput_; - - Teuchos::RCP nearField_; - Teuchos::RCP kernelApproximations_; - Teuchos::RCP basisMatrix_; - Teuchos::RCP basisMatrixT_; - std::vector > transferMatrices_; - std::vector > transferMatricesT_; - Teuchos::RCP clusterCoeffMap_; - mutable Teuchos::RCP coefficients_, coefficients2_; - mutable Teuchos::RCP X_colmap_, coefficients_colmap_; - - Teuchos::RCP params_; - }; -} - -#endif // TPETRA_HIERARCHICALOPERATOR_DECL_HPP +/* + +A container class for hierarchical matrices of different types. +In particular, both H- and H2-matrices are supported. + +The unknowns of the kernel approximations are collected in the clusterMap. +For H-matrices, this is just a concatenation. +For H2-matrices, the map also contains the intermediate clusters that might be needed in upward/downward pass. + + +H = nearField + + basisMatrix * + ((I+transferMatrices[K-1]^T) * ... * (I+transferMatrices[0]^T)) * + kernelApproximations * + ((I+transferMatrices[0]) * ... * (I+transferMatrices[K-1])) * + basisMatrix^T + +nearField and basisMatrix are standard (point) CRS matrices. +kernelApproximations and transferMatrices[.] are blocked CRS matrices + +I is the identity matrix and is not explicitely saved. + +Maps: +map (standard): domain and range of H; + domain, range, row of nearField +clusterMap (blocked map): domain, range, row, column of transferMatrices; + domain, range, row of kernelApproximations +ghosted_clusterMap (blocked map): column of kernelApproximations + + +For H-matrices: +K = 0, i.e. there are no transfer matrices + +For H2-matrices: +upward and downward pass in the cluster hierarchy are encoded in transfer matrices +*/ + +template ::scalar_type, + class LocalOrdinal = typename Tpetra::Operator::local_ordinal_type, + class GlobalOrdinal = typename Tpetra::Operator::global_ordinal_type, + class Node = typename Tpetra::Operator::node_type> +class HierarchicalOperator : public Tpetra::RowMatrix { + public: + using matrix_type = Tpetra::CrsMatrix; + using mv_type = Tpetra::MultiVector; + using map_type = Tpetra::Map; + + //! The RowMatrix representing the base class of CrsMatrix + using row_matrix_type = RowMatrix; + + using impl_scalar_type = typename row_matrix_type::impl_scalar_type; + using mag_type = typename Kokkos::ArithTraits::mag_type; + + using local_inds_device_view_type = + typename row_matrix_type::local_inds_device_view_type; + using local_inds_host_view_type = + typename row_matrix_type::local_inds_host_view_type; + using nonconst_local_inds_host_view_type = + typename row_matrix_type::nonconst_local_inds_host_view_type; + + using global_inds_device_view_type = + typename row_matrix_type::global_inds_device_view_type; + using global_inds_host_view_type = + typename row_matrix_type::global_inds_host_view_type; + using nonconst_global_inds_host_view_type = + typename row_matrix_type::nonconst_global_inds_host_view_type; + + using values_device_view_type = + typename row_matrix_type::values_device_view_type; + using values_host_view_type = + typename row_matrix_type::values_host_view_type; + using nonconst_values_host_view_type = + typename row_matrix_type::nonconst_values_host_view_type; + + using blocked_matrix_type = BlockedMatrix; + using blocked_map_type = BlockedMap; + + //! @name Constructor/Destructor + //@{ + + //! Constructor + HierarchicalOperator(const Teuchos::RCP& nearField, + const Teuchos::RCP& kernelApproximations, + const Teuchos::RCP& basisMatrix, + std::vector >& transferMatrices, + const Teuchos::RCP& params = Teuchos::null); + + //! Returns the Tpetra::Map object associated with the domain of this operator. + Teuchos::RCP > getDomainMap() const { + return nearField_->getDomainMap(); + } + + //! Returns the Tpetra::Map object associated with the range of this operator. + Teuchos::RCP > getRangeMap() const { + return nearField_->getRangeMap(); + } + + //! Returns in Y the result of a Tpetra::Operator applied to a Tpetra::MultiVector X. + /*! + \param[in] X - Tpetra::MultiVector of dimension NumVectors to multiply with matrix. + \param[out] Y -Tpetra::MultiVector of dimension NumVectors containing result. + */ + void apply(const Tpetra::MultiVector& X, + Tpetra::MultiVector& Y, + Teuchos::ETransp mode = Teuchos::NO_TRANS, + Scalar alpha = Teuchos::ScalarTraits::one(), + Scalar beta = Teuchos::ScalarTraits::zero()) const; + + Teuchos::RCP > restrict(const Teuchos::RCP& P); + + Teuchos::RCP toMatrix(); + + double getCompression() { + size_t nnz = (nearField_->getGlobalNumEntries() + + kernelApproximations_->pointA_->getGlobalNumEntries() + + basisMatrix_->getGlobalNumEntries()); + for (size_t i = 0; i < transferMatrices_.size(); i++) + nnz += transferMatrices_[i]->pointA_->getGlobalNumEntries(); + return Teuchos::as(nnz) / (getDomainMap()->getGlobalNumElements() * getDomainMap()->getGlobalNumElements()); + } + + Teuchos::RCP nearFieldMatrix() { + return nearField_; + } + + // Fake RowMatrix interface + Teuchos::RCP > getRowMap() const { + return nearField_->getRowMap(); + } + + Teuchos::RCP > getColMap() const { + return nearField_->getColMap(); + } + + Teuchos::RCP > getComm() const { + return nearField_->getDomainMap()->getComm(); + } + + Teuchos::RCP > getGraph() const { + return nearField_->getCrsGraph(); + } + + global_size_t getGlobalNumRows() const { + return nearField_->getGlobalNumRows(); + } + + global_size_t getGlobalNumCols() const { + return nearField_->getGlobalNumCols(); + } + + size_t getLocalNumRows() const { + return nearField_->getLocalNumRows(); + } + + size_t getLocalNumCols() const { + return nearField_->getLocalNumCols(); + } + + GlobalOrdinal getIndexBase() const { + return nearField_->getIndexBase(); + } + + global_size_t getGlobalNumEntries() const { + return nearField_->getGlobalNumEntries(); + } + + size_t getLocalNumEntries() const { + return nearField_->getLocalNumEntries(); + } + + size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const { + throw std::runtime_error("Not implemented."); + } + + size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const { + throw std::runtime_error("Not implemented."); + } + + size_t getGlobalMaxNumRowEntries() const { + throw std::runtime_error("Not implemented."); + } + + LocalOrdinal getBlockSize() const { + throw std::runtime_error("Not implemented."); + } + + size_t getLocalMaxNumRowEntries() const { + throw std::runtime_error("Not implemented."); + } + + bool hasColMap() const { + return false; + } + + bool isLocallyIndexed() const { + return true; + } + + bool isGloballyIndexed() const { + return true; + } + + bool isFillComplete() const { + return true; + } + + bool supportsRowViews() const { + return false; + } + + void + getGlobalRowCopy(GlobalOrdinal GlobalRow, + nonconst_global_inds_host_view_type& Indices, + nonconst_values_host_view_type& Values, + size_t& NumEntries) const { + throw std::runtime_error("Not implemented."); + } + + void + getLocalRowCopy(LocalOrdinal LocalRow, + nonconst_local_inds_host_view_type& Indices, + nonconst_values_host_view_type& Values, + size_t& NumEntries) const { + throw std::runtime_error("Not implemented."); + } + + void + getGlobalRowView(GlobalOrdinal GlobalRow, + global_inds_host_view_type& indices, + values_host_view_type& values) const { + throw std::runtime_error("Not implemented."); + } + + void + getLocalRowView(LocalOrdinal LocalRow, + local_inds_host_view_type& indices, + values_host_view_type& values) const { + throw std::runtime_error("Not implemented."); + } + + void getLocalDiagCopy(Vector& diag) const { + nearField_->getLocalDiagCopy(diag); + } + + void leftScale(const Vector& x) { + throw std::runtime_error("Not implemented."); + } + + void rightScale(const Vector& x) { + throw std::runtime_error("Not implemented."); + } + + mag_type getFrobeniusNorm() const { + return 0.; + } + + void describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel) const { + describe(out, verbLevel, true); + } + + void describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel, const bool printHeader) const { + using std::endl; + using std::setw; + const size_t numRows = nearField_->getRowMap()->getGlobalNumElements(); + const size_t nnzNearField = nearField_->getGlobalNumEntries(); + const double nnzNearPerRow = Teuchos::as(nnzNearField) / numRows; + const size_t nnzKernelApprox = kernelApproximations_->pointA_->getGlobalNumEntries(); + const size_t numClusterPairs = kernelApproximations_->blockA_->getGlobalNumEntries(); + const size_t nnzBasis = basisMatrix_->getGlobalNumEntries(); + size_t numTransfers = transferMatrices_.size(); + size_t nnzTransfer = 0; + for (size_t i = 0; i < transferMatrices_.size(); i++) + nnzTransfer += transferMatrices_[i]->pointA_->getGlobalNumEntries(); + const size_t nnzTotal = nnzNearField + nnzKernelApprox + nnzBasis + nnzTransfer; + const double nnzTotalPerRow = Teuchos::as(nnzTotal) / numRows; + std::ostringstream oss; + oss << std::left; + if (printHeader) + oss << setw(9) << "rows" << setw(12) + << "nnz(near)" << setw(14) + << "nnz(near)/row" << setw(12) + << "nnz(basis)" << setw(15) + << "#cluster pairs" << setw(12) + << "nnz(kernel)" << setw(14) + << "#transfers" << setw(14) + << "nnz(transfer)" << setw(12) + << "nnz(total)" << setw(14) + << "nnz(total)/row" << endl; + oss << setw(9) << numRows << setw(12) + << nnzNearField << setw(14) + << nnzNearPerRow << setw(12) + << nnzBasis << setw(15) + << numClusterPairs << setw(12) + << nnzKernelApprox << setw(14) + << numTransfers << setw(14) + << nnzTransfer << setw(12) + << nnzTotal << setw(14) + << nnzTotalPerRow << endl; + out << oss.str(); + } + + bool hasFarField() const { + return kernelApproximations_->blockA_->getGlobalNumEntries() > 0; + } + + bool hasTransferMatrices() const { + return transferMatrices_.size() > 0; + } + + bool denserThanDenseMatrix() const { + const size_t numRows = nearField_->getRowMap()->getGlobalNumElements(); + const size_t nnzNearField = nearField_->getGlobalNumEntries(); + // const double nnzNearPerRow = Teuchos::as(nnzNearField)/numRows; + const size_t nnzKernelApprox = kernelApproximations_->pointA_->getGlobalNumEntries(); + // const size_t numClusterPairs = kernelApproximations_->blockA_->getGlobalNumEntries(); + const size_t nnzBasis = basisMatrix_->getGlobalNumEntries(); + size_t nnzTransfer = 0; + for (size_t i = 0; i < transferMatrices_.size(); i++) + nnzTransfer += transferMatrices_[i]->pointA_->getGlobalNumEntries(); + const size_t nnzTotal = nnzNearField + nnzKernelApprox + nnzBasis + nnzTransfer; + const double nnzTotalPerRow = Teuchos::as(nnzTotal) / numRows; + + return (nnzTotalPerRow >= numRows); + } + + private: + void allocateMemory(size_t numVectors) const; + + void applyWithTransposes(const Tpetra::MultiVector& X, + Tpetra::MultiVector& Y, + Teuchos::ETransp mode = Teuchos::NO_TRANS, + Scalar alpha = Teuchos::ScalarTraits::one(), + Scalar beta = Teuchos::ScalarTraits::zero()) const; + + void applyWithoutTransposes(const Tpetra::MultiVector& X, + Tpetra::MultiVector& Y, + Teuchos::ETransp mode = Teuchos::NO_TRANS, + Scalar alpha = Teuchos::ScalarTraits::one(), + Scalar beta = Teuchos::ScalarTraits::zero()) const; + + bool canApplyWithoutTransposes_; + std::string coarseningCriterion_; + bool debugOutput_; + + Teuchos::RCP nearField_; + Teuchos::RCP kernelApproximations_; + Teuchos::RCP basisMatrix_; + Teuchos::RCP basisMatrixT_; + std::vector > transferMatrices_; + std::vector > transferMatricesT_; + Teuchos::RCP clusterCoeffMap_; + mutable Teuchos::RCP coefficients_, coefficients2_; + mutable Teuchos::RCP X_colmap_, coefficients_colmap_; + + Teuchos::RCP params_; +}; +} // namespace Tpetra + +#endif // TPETRA_HIERARCHICALOPERATOR_DECL_HPP diff --git a/packages/muelu/research/caglusa/Tpetra_HierarchicalOperator_def.hpp b/packages/muelu/research/caglusa/Tpetra_HierarchicalOperator_def.hpp index c1abd28bdd81..67af4b36c4e6 100644 --- a/packages/muelu/research/caglusa/Tpetra_HierarchicalOperator_def.hpp +++ b/packages/muelu/research/caglusa/Tpetra_HierarchicalOperator_def.hpp @@ -4,951 +4,936 @@ #include #include - namespace Tpetra { - template - Teuchos::RCP > - removeSmallEntries(Teuchos::RCP >& A, - typename Teuchos::ScalarTraits::magnitudeType tol) { - - using crs_matrix = Tpetra::CrsMatrix; - using row_ptr_type = typename crs_matrix::local_graph_device_type::row_map_type::non_const_type; - using col_idx_type = typename crs_matrix::local_graph_device_type::entries_type::non_const_type; - using vals_type = typename crs_matrix::local_matrix_device_type::values_type; - - typedef Kokkos::ArithTraits ATS; - using impl_SC = typename ATS::val_type; - using impl_ATS = Kokkos::ArithTraits; - - auto lclA = A->getLocalMatrixDevice(); - - auto rowptr = row_ptr_type("rowptr", lclA.numRows()+1); - - Kokkos::parallel_for("removeSmallEntries::rowptr1", - Kokkos::RangePolicy(0, lclA.numRows()), - KOKKOS_LAMBDA(const LocalOrdinal rlid) { - auto row = lclA.row(rlid); - for (LocalOrdinal k = 0; k tol) { - rowptr(rlid+1) += 1; - } - } - }); - LocalOrdinal nnz; - Kokkos::parallel_scan("removeSmallEntries::rowptr2", - Kokkos::RangePolicy(0, lclA.numRows()), - KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& partial_nnz, bool is_final) { - - partial_nnz += rowptr(rlid+1); - if (is_final) - rowptr(rlid+1) = partial_nnz; - - }, nnz); - - // auto nnz = rowptr(lclA.numRows()); - - auto idx = col_idx_type("idx", nnz); - auto vals = vals_type("vals", nnz); - - Kokkos::parallel_for("removeSmallEntries::indicesValues", - Kokkos::RangePolicy(0, lclA.numRows()), - KOKKOS_LAMBDA(const LocalOrdinal rlid) { - auto row = lclA.row(rlid); - auto I = rowptr(rlid); - for (LocalOrdinal k = 0; k tol) { - idx(I) = row.colidx(k); - vals(I) = row.value(k); - I += 1; - } - } - }); - - auto newA = Teuchos::rcp(new crs_matrix(A->getRowMap(), A->getColMap(), rowptr, idx, vals)); - newA->fillComplete(A->getDomainMap(), - A->getRangeMap()); - return newA; - } +template +Teuchos::RCP > +removeSmallEntries(Teuchos::RCP >& A, + typename Teuchos::ScalarTraits::magnitudeType tol) { + using crs_matrix = Tpetra::CrsMatrix; + using row_ptr_type = typename crs_matrix::local_graph_device_type::row_map_type::non_const_type; + using col_idx_type = typename crs_matrix::local_graph_device_type::entries_type::non_const_type; + using vals_type = typename crs_matrix::local_matrix_device_type::values_type; + + typedef Kokkos::ArithTraits ATS; + using impl_SC = typename ATS::val_type; + using impl_ATS = Kokkos::ArithTraits; + + auto lclA = A->getLocalMatrixDevice(); + + auto rowptr = row_ptr_type("rowptr", lclA.numRows() + 1); + + Kokkos::parallel_for( + "removeSmallEntries::rowptr1", + Kokkos::RangePolicy(0, lclA.numRows()), + KOKKOS_LAMBDA(const LocalOrdinal rlid) { + auto row = lclA.row(rlid); + for (LocalOrdinal k = 0; k < row.length; ++k) { + if (impl_ATS::magnitude(row.value(k)) > tol) { + rowptr(rlid + 1) += 1; + } + } + }); + LocalOrdinal nnz; + Kokkos::parallel_scan( + "removeSmallEntries::rowptr2", + Kokkos::RangePolicy(0, lclA.numRows()), + KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& partial_nnz, bool is_final) { + partial_nnz += rowptr(rlid + 1); + if (is_final) + rowptr(rlid + 1) = partial_nnz; + }, + nnz); + + // auto nnz = rowptr(lclA.numRows()); + + auto idx = col_idx_type("idx", nnz); + auto vals = vals_type("vals", nnz); + + Kokkos::parallel_for( + "removeSmallEntries::indicesValues", + Kokkos::RangePolicy(0, lclA.numRows()), + KOKKOS_LAMBDA(const LocalOrdinal rlid) { + auto row = lclA.row(rlid); + auto I = rowptr(rlid); + for (LocalOrdinal k = 0; k < row.length; ++k) { + if (impl_ATS::magnitude(row.value(k)) > tol) { + idx(I) = row.colidx(k); + vals(I) = row.value(k); + I += 1; + } + } + }); - template - Teuchos::RCP > - transpose(Teuchos::RCP >& A) { + auto newA = Teuchos::rcp(new crs_matrix(A->getRowMap(), A->getColMap(), rowptr, idx, vals)); + newA->fillComplete(A->getDomainMap(), + A->getRangeMap()); + return newA; +} - TEUCHOS_ASSERT(A->ghosted_blockMap_.is_null()); +template +Teuchos::RCP > +transpose(Teuchos::RCP >& A) { + TEUCHOS_ASSERT(A->ghosted_blockMap_.is_null()); - Teuchos::RCP transposeParams = rcp(new Teuchos::ParameterList); + Teuchos::RCP transposeParams = rcp(new Teuchos::ParameterList); + + Tpetra::RowMatrixTransposer transposerPoint(A->pointA_); + auto pointAT = transposerPoint.createTranspose(transposeParams); - Tpetra::RowMatrixTransposer transposerPoint(A->pointA_); - auto pointAT = transposerPoint.createTranspose(transposeParams); + Tpetra::RowMatrixTransposer transposerBlock(A->blockA_); + auto blockAT = transposerBlock.createTranspose(transposeParams); - Tpetra::RowMatrixTransposer transposerBlock(A->blockA_); - auto blockAT = transposerBlock.createTranspose(transposeParams); + auto AT = Teuchos::rcp(new Tpetra::BlockedMatrix(pointAT, blockAT, A->blockMap_)); - auto AT = Teuchos::rcp(new Tpetra::BlockedMatrix(pointAT, blockAT, A->blockMap_)); + return AT; +} - return AT; +template +Teuchos::RCP > +buildIdentityMatrix(Teuchos::RCP >& map) { + using matrix_type = Tpetra::CrsMatrix; + Teuchos::RCP identity = Teuchos::rcp(new matrix_type(map, 1)); + Teuchos::ArrayView gblRows = map->getLocalElementList(); + for (auto it = gblRows.begin(); it != gblRows.end(); ++it) { + Teuchos::Array col(1, *it); + Teuchos::Array val(1, Teuchos::ScalarTraits::one()); + identity->insertGlobalValues(*it, col(), val()); } + identity->fillComplete(); + return identity; +} - template - Teuchos::RCP > - buildIdentityMatrix(Teuchos::RCP >& map) { - using matrix_type = Tpetra::CrsMatrix; - Teuchos::RCP identity = Teuchos::rcp(new matrix_type(map, 1)); - Teuchos::ArrayView gblRows = map->getLocalElementList (); - for (auto it = gblRows.begin (); it != gblRows.end (); ++it) { - Teuchos::Array col (1, *it); - Teuchos::Array val (1, Teuchos::ScalarTraits::one()); - identity->insertGlobalValues (*it, col (), val ()); +template +HierarchicalOperator:: + HierarchicalOperator(const Teuchos::RCP& nearField, + const Teuchos::RCP& kernelApproximations, + const Teuchos::RCP& basisMatrix, + std::vector >& transferMatrices, + const Teuchos::RCP& params) + : nearField_(nearField) + , kernelApproximations_(kernelApproximations) + , basisMatrix_(basisMatrix) + , transferMatrices_(transferMatrices) + , params_(params) { + auto map = nearField_->getDomainMap(); + clusterCoeffMap_ = basisMatrix_->getDomainMap(); + + bool setupTransposes; + bool doDebugChecks; + std::string sendTypeNearField; + std::string sendTypeBasisMatrix; + std::string sendTypeKernelApproximations; + + Teuchos::ParameterList defaultParams("Default params"); + defaultParams.set("setupTransposes", true); + defaultParams.set("doDebugChecks", true); + defaultParams.set("Send type nearField", "Isend"); + defaultParams.set("Send type basisMatrix", "Isend"); + defaultParams.set("Send type kernelApproximations", "Alltoall"); + defaultParams.set("Coarsening criterion", "transferLevels"); + defaultParams.set("debugOutput", false); + defaultParams.set("keepTransfers", -1); + defaultParams.set("treeCoarseningFactor", 2.0); + defaultParams.set("leftOverFactor", 1.0); + if (params_.is_null()) + params_ = Teuchos::rcp(new Teuchos::ParameterList("")); + params_->validateParametersAndSetDefaults(defaultParams); + + setupTransposes = params_->get("setupTransposes"); + doDebugChecks = params_->get("doDebugChecks"); + sendTypeNearField = params_->get("Send type nearField"); + sendTypeBasisMatrix = params_->get("Send type basisMatrix"); + sendTypeKernelApproximations = params_->get("Send type kernelApproximations"); + coarseningCriterion_ = params_->get("Coarsening criterion"); + TEUCHOS_ASSERT((coarseningCriterion_ == "numClusters") || (coarseningCriterion_ == "equivalentDense") || (coarseningCriterion_ == "transferLevels")); + debugOutput_ = params_->get("debugOutput"); + + if (doDebugChecks) { + // near field matrix lives on map and is nonlocal + TEUCHOS_ASSERT(map->isSameAs(*nearField_->getRangeMap())); + TEUCHOS_ASSERT(map->isSameAs(*nearField_->getRowMap())); + + // basis matrix is entirely local and maps from clusterCoeffMap_ to map. + TEUCHOS_ASSERT(map->isSameAs(*basisMatrix->getRangeMap())); + TEUCHOS_ASSERT(map->isSameAs(*basisMatrix->getRowMap())); + // TEUCHOS_ASSERT(clusterCoeffMap_->isSameAs(*basisMatrix->getDomainMap())); + + // kernel approximations live on clusterCoeffMap and are nonlocal + TEUCHOS_ASSERT(clusterCoeffMap_->isSameAs(*kernelApproximations_->pointA_->getDomainMap())); + TEUCHOS_ASSERT(clusterCoeffMap_->isSameAs(*kernelApproximations_->pointA_->getRangeMap())); + TEUCHOS_ASSERT(clusterCoeffMap_->isSameAs(*kernelApproximations_->pointA_->getRowMap())); + + for (size_t i = 0; i < transferMatrices_.size(); i++) { + // transfer matrices are entirely local, block diagonal on clusterCoeffMap + TEUCHOS_ASSERT(clusterCoeffMap_->isSameAs(*transferMatrices_[i]->pointA_->getDomainMap())); + TEUCHOS_ASSERT(clusterCoeffMap_->isSameAs(*transferMatrices_[i]->pointA_->getColMap())); + TEUCHOS_ASSERT(clusterCoeffMap_->isSameAs(*transferMatrices_[i]->pointA_->getRowMap())); + TEUCHOS_ASSERT(clusterCoeffMap_->isSameAs(*transferMatrices_[i]->pointA_->getRangeMap())); } - identity->fillComplete (); - return identity; } + // Set the send types + Teuchos::RCP distParams = rcp(new Teuchos::ParameterList()); + { + distParams->set("Send type", sendTypeNearField); + Teuchos::RCP > nearFieldImporter = nearField_->getGraph()->getImporter(); + nearFieldImporter->getDistributor().setParameterList(distParams); + auto revDistor = nearFieldImporter->getDistributor().getReverse(false); + if (!revDistor.is_null()) + revDistor->setParameterList(distParams); + } - template - HierarchicalOperator:: - HierarchicalOperator(const Teuchos::RCP& nearField, - const Teuchos::RCP& kernelApproximations, - const Teuchos::RCP& basisMatrix, - std::vector >& transferMatrices, - const Teuchos::RCP& params) - : - nearField_(nearField), - kernelApproximations_(kernelApproximations), - basisMatrix_(basisMatrix), - transferMatrices_(transferMatrices), - params_(params) - { - auto map = nearField_->getDomainMap(); - clusterCoeffMap_ = basisMatrix_->getDomainMap(); - - bool setupTransposes; - bool doDebugChecks; - std::string sendTypeNearField; - std::string sendTypeBasisMatrix; - std::string sendTypeKernelApproximations; - - Teuchos::ParameterList defaultParams("Default params"); - defaultParams.set("setupTransposes", true); - defaultParams.set("doDebugChecks", true); - defaultParams.set("Send type nearField", "Isend"); - defaultParams.set("Send type basisMatrix", "Isend"); - defaultParams.set("Send type kernelApproximations", "Alltoall"); - defaultParams.set("Coarsening criterion", "transferLevels"); - defaultParams.set("debugOutput", false); - defaultParams.set("keepTransfers", -1); - defaultParams.set("treeCoarseningFactor", 2.0); - defaultParams.set("leftOverFactor", 1.0); - if (params_.is_null()) - params_ = Teuchos::rcp(new Teuchos::ParameterList("")); - params_->validateParametersAndSetDefaults(defaultParams); - - setupTransposes = params_->get("setupTransposes"); - doDebugChecks = params_->get("doDebugChecks"); - sendTypeNearField = params_->get("Send type nearField"); - sendTypeBasisMatrix = params_->get("Send type basisMatrix"); - sendTypeKernelApproximations = params_->get("Send type kernelApproximations"); - coarseningCriterion_ = params_->get("Coarsening criterion"); - TEUCHOS_ASSERT((coarseningCriterion_ == "numClusters") || (coarseningCriterion_ == "equivalentDense") || (coarseningCriterion_ == "transferLevels")); - debugOutput_ = params_->get("debugOutput"); - - if (doDebugChecks) { - // near field matrix lives on map and is nonlocal - TEUCHOS_ASSERT(map->isSameAs(*nearField_->getRangeMap())); - TEUCHOS_ASSERT(map->isSameAs(*nearField_->getRowMap())); - - // basis matrix is entirely local and maps from clusterCoeffMap_ to map. - TEUCHOS_ASSERT(map->isSameAs(*basisMatrix->getRangeMap())); - TEUCHOS_ASSERT(map->isSameAs(*basisMatrix->getRowMap())); - // TEUCHOS_ASSERT(clusterCoeffMap_->isSameAs(*basisMatrix->getDomainMap())); - - // kernel approximations live on clusterCoeffMap and are nonlocal - TEUCHOS_ASSERT(clusterCoeffMap_->isSameAs(*kernelApproximations_->pointA_->getDomainMap())); - TEUCHOS_ASSERT(clusterCoeffMap_->isSameAs(*kernelApproximations_->pointA_->getRangeMap())); - TEUCHOS_ASSERT(clusterCoeffMap_->isSameAs(*kernelApproximations_->pointA_->getRowMap())); - - for (size_t i = 0; iisSameAs(*transferMatrices_[i]->pointA_->getDomainMap())); - TEUCHOS_ASSERT(clusterCoeffMap_->isSameAs(*transferMatrices_[i]->pointA_->getColMap())); - TEUCHOS_ASSERT(clusterCoeffMap_->isSameAs(*transferMatrices_[i]->pointA_->getRowMap())); - TEUCHOS_ASSERT(clusterCoeffMap_->isSameAs(*transferMatrices_[i]->pointA_->getRangeMap())); - } - } - - // Set the send types - Teuchos::RCP distParams = rcp(new Teuchos::ParameterList()); - { - distParams->set("Send type", sendTypeNearField); - Teuchos::RCP > nearFieldImporter = nearField_->getGraph()->getImporter(); - nearFieldImporter->getDistributor().setParameterList(distParams); - auto revDistor = nearFieldImporter->getDistributor().getReverse(false); - if (!revDistor.is_null()) - revDistor->setParameterList(distParams); - } - - { - distParams->set("Send type", sendTypeBasisMatrix); - Teuchos::RCP > basisMatrixImporter = basisMatrix_->getGraph()->getImporter(); - if (!basisMatrixImporter.is_null()) { - basisMatrixImporter->getDistributor().setParameterList(distParams); - auto revDistor = basisMatrixImporter->getDistributor().getReverse(false); - if (!revDistor.is_null()) - revDistor->setParameterList(distParams); - } - } - - { - distParams->set("Send type", sendTypeKernelApproximations); - Teuchos::RCP > kernelApproximationsImporter = kernelApproximations_->pointA_->getGraph()->getImporter(); - kernelApproximationsImporter->getDistributor().setParameterList(distParams); - auto revDistor = kernelApproximationsImporter->getDistributor().getReverse(false); - if (!revDistor.is_null()) - revDistor->setParameterList(distParams); - } - - if (setupTransposes) { - Teuchos::RCP transposeParams = rcp(new Teuchos::ParameterList); - - Tpetra::RowMatrixTransposer transposerBasisMatrix(basisMatrix_); - basisMatrixT_ = transposerBasisMatrix.createTranspose(transposeParams); - - for (size_t i = 0; iset("Send type", sendTypeBasisMatrix); + Teuchos::RCP > basisMatrixImporter = basisMatrix_->getGraph()->getImporter(); + if (!basisMatrixImporter.is_null()) { + basisMatrixImporter->getDistributor().setParameterList(distParams); + auto revDistor = basisMatrixImporter->getDistributor().getReverse(false); + if (!revDistor.is_null()) + revDistor->setParameterList(distParams); } + } - template - void - HierarchicalOperator:: - apply(const Tpetra::MultiVector& X, - Tpetra::MultiVector& Y, - Teuchos::ETransp mode, - Scalar alpha, - Scalar beta) const { - if (canApplyWithoutTransposes_) - applyWithoutTransposes(X, Y, mode, alpha, beta); - else - applyWithTransposes(X, Y, mode, alpha, beta); + { + distParams->set("Send type", sendTypeKernelApproximations); + Teuchos::RCP > kernelApproximationsImporter = kernelApproximations_->pointA_->getGraph()->getImporter(); + kernelApproximationsImporter->getDistributor().setParameterList(distParams); + auto revDistor = kernelApproximationsImporter->getDistributor().getReverse(false); + if (!revDistor.is_null()) + revDistor->setParameterList(distParams); } - template - void - HierarchicalOperator:: - applyWithTransposes(const Tpetra::MultiVector& X, - Tpetra::MultiVector& Y, - Teuchos::ETransp mode, - Scalar alpha, - Scalar beta) const { - using Teuchos::RCP; - const Scalar one = Teuchos::ScalarTraits::one(); - const Scalar zero = Teuchos::ScalarTraits::zero(); - bool flip = true; - - allocateMemory(X.getNumVectors()); - - // upward pass - { - Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("upward pass"))); + if (setupTransposes) { + Teuchos::RCP transposeParams = rcp(new Teuchos::ParameterList); - basisMatrix_->apply(X, *coefficients_, Teuchos::TRANS); + Tpetra::RowMatrixTransposer transposerBasisMatrix(basisMatrix_); + basisMatrixT_ = transposerBasisMatrix.createTranspose(transposeParams); - for (int i = Teuchos::as(transferMatrices_.size())-1; i>=0; i--) - if (flip) { - coefficients2_->assign(*coefficients_); - transferMatrices_[i]->localApply(*coefficients_, *coefficients2_, Teuchos::NO_TRANS, one, one); - flip = false; - } else { - coefficients_->assign(*coefficients2_); - transferMatrices_[i]->localApply(*coefficients2_, *coefficients_, Teuchos::NO_TRANS, one, one); - flip = true; - } + for (size_t i = 0; i < transferMatrices_.size(); i++) { + transferMatricesT_.push_back(transpose(transferMatrices_[i])); } - // far field interactions - part 1 - { - Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("far field 1"))); + canApplyWithoutTransposes_ = true; + } else + canApplyWithoutTransposes_ = false; + + // Allocate memory for apply with vectors + allocateMemory(1); +} + +template +void HierarchicalOperator:: + apply(const Tpetra::MultiVector& X, + Tpetra::MultiVector& Y, + Teuchos::ETransp mode, + Scalar alpha, + Scalar beta) const { + if (canApplyWithoutTransposes_) + applyWithoutTransposes(X, Y, mode, alpha, beta); + else + applyWithTransposes(X, Y, mode, alpha, beta); +} - RCP > kernelApproximationsImporter = kernelApproximations_->pointA_->getGraph()->getImporter(); +template +void HierarchicalOperator:: + applyWithTransposes(const Tpetra::MultiVector& X, + Tpetra::MultiVector& Y, + Teuchos::ETransp mode, + Scalar alpha, + Scalar beta) const { + using Teuchos::RCP; + const Scalar one = Teuchos::ScalarTraits::one(); + const Scalar zero = Teuchos::ScalarTraits::zero(); + bool flip = true; + + allocateMemory(X.getNumVectors()); + + // upward pass + { + Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("upward pass"))); + + basisMatrix_->apply(X, *coefficients_, Teuchos::TRANS); + + for (int i = Teuchos::as(transferMatrices_.size()) - 1; i >= 0; i--) if (flip) { - if (mode == Teuchos::NO_TRANS) { - coefficients_colmap_->beginImport(*coefficients_, *kernelApproximationsImporter, INSERT); - } else if (mode == Teuchos::TRANS) { - kernelApproximations_->localApply(*coefficients_, *coefficients_colmap_, mode, alpha); - coefficients2_->putScalar(zero); - coefficients2_->beginExport(*coefficients_colmap_, *kernelApproximationsImporter, ADD_ASSIGN); - } + coefficients2_->assign(*coefficients_); + transferMatrices_[i]->localApply(*coefficients_, *coefficients2_, Teuchos::NO_TRANS, one, one); + flip = false; } else { - if (mode == Teuchos::NO_TRANS) { - coefficients_colmap_->beginImport(*coefficients2_, *kernelApproximationsImporter, INSERT); - } else if (mode == Teuchos::TRANS) { - kernelApproximations_->localApply(*coefficients2_, *coefficients_colmap_, mode, alpha); - coefficients_->putScalar(zero); - coefficients_->beginExport(*coefficients_colmap_, *kernelApproximationsImporter, ADD_ASSIGN); - } + coefficients_->assign(*coefficients2_); + transferMatrices_[i]->localApply(*coefficients2_, *coefficients_, Teuchos::NO_TRANS, one, one); + flip = true; } - } + } - // near field - part 1 - RCP > nearFieldImporter = nearField_->getGraph()->getImporter(); - { - Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("near field 1"))); + // far field interactions - part 1 + { + Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("far field 1"))); + + RCP > kernelApproximationsImporter = kernelApproximations_->pointA_->getGraph()->getImporter(); + if (flip) { if (mode == Teuchos::NO_TRANS) { - X_colmap_->beginImport(X, *nearFieldImporter, INSERT); + coefficients_colmap_->beginImport(*coefficients_, *kernelApproximationsImporter, INSERT); } else if (mode == Teuchos::TRANS) { - nearField_->localApply(X, *X_colmap_, mode, alpha, zero); - Y.scale (beta); - Y.beginExport(*X_colmap_, *nearFieldImporter, ADD_ASSIGN); + kernelApproximations_->localApply(*coefficients_, *coefficients_colmap_, mode, alpha); + coefficients2_->putScalar(zero); + coefficients2_->beginExport(*coefficients_colmap_, *kernelApproximationsImporter, ADD_ASSIGN); + } + } else { + if (mode == Teuchos::NO_TRANS) { + coefficients_colmap_->beginImport(*coefficients2_, *kernelApproximationsImporter, INSERT); + } else if (mode == Teuchos::TRANS) { + kernelApproximations_->localApply(*coefficients2_, *coefficients_colmap_, mode, alpha); + coefficients_->putScalar(zero); + coefficients_->beginExport(*coefficients_colmap_, *kernelApproximationsImporter, ADD_ASSIGN); } } + } - // far field interactions - part 2 - { - Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("far field 2"))); - - RCP > kernelApproximationsImporter = kernelApproximations_->pointA_->getGraph()->getImporter(); - if (flip) { - if (mode == Teuchos::NO_TRANS) { - coefficients_colmap_->endImport(*coefficients_, *kernelApproximationsImporter, INSERT); - kernelApproximations_->localApply(*coefficients_colmap_, *coefficients2_, mode, alpha); - } else if (mode == Teuchos::TRANS) { - coefficients2_->endExport(*coefficients_colmap_, *kernelApproximationsImporter, ADD_ASSIGN); - } - } else { - if (mode == Teuchos::NO_TRANS) { - coefficients_colmap_->endImport(*coefficients2_, *kernelApproximationsImporter, INSERT); - kernelApproximations_->localApply(*coefficients_colmap_, *coefficients_, mode, alpha); - } else if (mode == Teuchos::TRANS) { - coefficients_->endExport(*coefficients_colmap_, *kernelApproximationsImporter, ADD_ASSIGN); - } - } + // near field - part 1 + RCP > nearFieldImporter = nearField_->getGraph()->getImporter(); + { + Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("near field 1"))); + if (mode == Teuchos::NO_TRANS) { + X_colmap_->beginImport(X, *nearFieldImporter, INSERT); + } else if (mode == Teuchos::TRANS) { + nearField_->localApply(X, *X_colmap_, mode, alpha, zero); + Y.scale(beta); + Y.beginExport(*X_colmap_, *nearFieldImporter, ADD_ASSIGN); } + } - // near field - part 2 - { - Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("near field 2"))); + // far field interactions - part 2 + { + Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("far field 2"))); + RCP > kernelApproximationsImporter = kernelApproximations_->pointA_->getGraph()->getImporter(); + if (flip) { if (mode == Teuchos::NO_TRANS) { - X_colmap_->endImport(X, *nearFieldImporter, INSERT); - nearField_->localApply(*X_colmap_, Y, mode, alpha, beta); + coefficients_colmap_->endImport(*coefficients_, *kernelApproximationsImporter, INSERT); + kernelApproximations_->localApply(*coefficients_colmap_, *coefficients2_, mode, alpha); } else if (mode == Teuchos::TRANS) { - Y.endExport(*X_colmap_, *nearFieldImporter, ADD_ASSIGN); + coefficients2_->endExport(*coefficients_colmap_, *kernelApproximationsImporter, ADD_ASSIGN); + } + } else { + if (mode == Teuchos::NO_TRANS) { + coefficients_colmap_->endImport(*coefficients2_, *kernelApproximationsImporter, INSERT); + kernelApproximations_->localApply(*coefficients_colmap_, *coefficients_, mode, alpha); + } else if (mode == Teuchos::TRANS) { + coefficients_->endExport(*coefficients_colmap_, *kernelApproximationsImporter, ADD_ASSIGN); } } + } - // downward pass - { - Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("downward pass"))); + // near field - part 2 + { + Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("near field 2"))); - for (size_t i = 0; iassign(*coefficients2_); - transferMatrices_[i]->localApply(*coefficients2_, *coefficients_, Teuchos::TRANS, one, one); - flip = false; - } else { - coefficients2_->assign(*coefficients_); - transferMatrices_[i]->localApply(*coefficients_, *coefficients2_, Teuchos::TRANS, one, one); - flip = true; - } - if (flip) - basisMatrix_->apply(*coefficients2_, Y, Teuchos::NO_TRANS, one, one); - else - basisMatrix_->apply(*coefficients_, Y, Teuchos::NO_TRANS, one, one); + if (mode == Teuchos::NO_TRANS) { + X_colmap_->endImport(X, *nearFieldImporter, INSERT); + nearField_->localApply(*X_colmap_, Y, mode, alpha, beta); + } else if (mode == Teuchos::TRANS) { + Y.endExport(*X_colmap_, *nearFieldImporter, ADD_ASSIGN); } } - template - void - HierarchicalOperator:: - applyWithoutTransposes(const Tpetra::MultiVector& X, - Tpetra::MultiVector& Y, - Teuchos::ETransp mode, - Scalar alpha, - Scalar beta) const { - using Teuchos::RCP; - const Scalar one = Teuchos::ScalarTraits::one(); - const Scalar zero = Teuchos::ScalarTraits::zero(); - bool flip = true; - - allocateMemory(X.getNumVectors()); - - // upward pass - { - Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("upward pass"))); - - basisMatrixT_->apply(X, *coefficients_, Teuchos::NO_TRANS); + // downward pass + { + Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("downward pass"))); - for (int i = Teuchos::as(transferMatrices_.size())-1; i>=0; i--) - if (flip) { - coefficients2_->assign(*coefficients_); - transferMatrices_[i]->localApply(*coefficients_, *coefficients2_, Teuchos::NO_TRANS, one, one); - flip = false; - } else { - coefficients_->assign(*coefficients2_); - transferMatrices_[i]->localApply(*coefficients2_, *coefficients_, Teuchos::NO_TRANS, one, one); - flip = true; - } - } - - // far field interactions - part 1 - { - Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("far field 1"))); + for (size_t i = 0; i < transferMatrices_.size(); i++) + if (flip) { + coefficients_->assign(*coefficients2_); + transferMatrices_[i]->localApply(*coefficients2_, *coefficients_, Teuchos::TRANS, one, one); + flip = false; + } else { + coefficients2_->assign(*coefficients_); + transferMatrices_[i]->localApply(*coefficients_, *coefficients2_, Teuchos::TRANS, one, one); + flip = true; + } + if (flip) + basisMatrix_->apply(*coefficients2_, Y, Teuchos::NO_TRANS, one, one); + else + basisMatrix_->apply(*coefficients_, Y, Teuchos::NO_TRANS, one, one); + } +} - RCP > kernelApproximationsImporter = kernelApproximations_->pointA_->getGraph()->getImporter(); +template +void HierarchicalOperator:: + applyWithoutTransposes(const Tpetra::MultiVector& X, + Tpetra::MultiVector& Y, + Teuchos::ETransp mode, + Scalar alpha, + Scalar beta) const { + using Teuchos::RCP; + const Scalar one = Teuchos::ScalarTraits::one(); + const Scalar zero = Teuchos::ScalarTraits::zero(); + bool flip = true; + + allocateMemory(X.getNumVectors()); + + // upward pass + { + Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("upward pass"))); + + basisMatrixT_->apply(X, *coefficients_, Teuchos::NO_TRANS); + + for (int i = Teuchos::as(transferMatrices_.size()) - 1; i >= 0; i--) if (flip) { - if (mode == Teuchos::NO_TRANS) { - coefficients_colmap_->beginImport(*coefficients_, *kernelApproximationsImporter, INSERT); - } else if (mode == Teuchos::TRANS) { - kernelApproximations_->localApply(*coefficients_, *coefficients_colmap_, mode, alpha); - coefficients2_->putScalar(zero); - coefficients2_->beginExport(*coefficients_colmap_, *kernelApproximationsImporter, ADD_ASSIGN); - } + coefficients2_->assign(*coefficients_); + transferMatrices_[i]->localApply(*coefficients_, *coefficients2_, Teuchos::NO_TRANS, one, one); + flip = false; } else { - if (mode == Teuchos::NO_TRANS) { - coefficients_colmap_->beginImport(*coefficients2_, *kernelApproximationsImporter, INSERT); - } else if (mode == Teuchos::TRANS) { - kernelApproximations_->localApply(*coefficients2_, *coefficients_colmap_, mode, alpha); - coefficients_->putScalar(zero); - coefficients_->beginExport(*coefficients_colmap_, *kernelApproximationsImporter, ADD_ASSIGN); - } + coefficients_->assign(*coefficients2_); + transferMatrices_[i]->localApply(*coefficients2_, *coefficients_, Teuchos::NO_TRANS, one, one); + flip = true; } - } + } - // near field - part 1 - RCP > nearFieldImporter = nearField_->getGraph()->getImporter(); - { - Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("near field 1"))); + // far field interactions - part 1 + { + Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("far field 1"))); + + RCP > kernelApproximationsImporter = kernelApproximations_->pointA_->getGraph()->getImporter(); + if (flip) { if (mode == Teuchos::NO_TRANS) { - X_colmap_->beginImport(X, *nearFieldImporter, INSERT); + coefficients_colmap_->beginImport(*coefficients_, *kernelApproximationsImporter, INSERT); } else if (mode == Teuchos::TRANS) { - nearField_->localApply(X, *X_colmap_, mode, alpha, zero); - Y.scale (beta); - Y.beginExport(*X_colmap_, *nearFieldImporter, ADD_ASSIGN); + kernelApproximations_->localApply(*coefficients_, *coefficients_colmap_, mode, alpha); + coefficients2_->putScalar(zero); + coefficients2_->beginExport(*coefficients_colmap_, *kernelApproximationsImporter, ADD_ASSIGN); + } + } else { + if (mode == Teuchos::NO_TRANS) { + coefficients_colmap_->beginImport(*coefficients2_, *kernelApproximationsImporter, INSERT); + } else if (mode == Teuchos::TRANS) { + kernelApproximations_->localApply(*coefficients2_, *coefficients_colmap_, mode, alpha); + coefficients_->putScalar(zero); + coefficients_->beginExport(*coefficients_colmap_, *kernelApproximationsImporter, ADD_ASSIGN); } } + } - // far field interactions - part 2 - { - Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("far field 2"))); - - RCP > kernelApproximationsImporter = kernelApproximations_->pointA_->getGraph()->getImporter(); - if (flip) { - if (mode == Teuchos::NO_TRANS) { - coefficients_colmap_->endImport(*coefficients_, *kernelApproximationsImporter, INSERT); - kernelApproximations_->localApply(*coefficients_colmap_, *coefficients2_, mode, alpha); - } else if (mode == Teuchos::TRANS) { - coefficients2_->endExport(*coefficients_colmap_, *kernelApproximationsImporter, ADD_ASSIGN); - } - } else { - if (mode == Teuchos::NO_TRANS) { - coefficients_colmap_->endImport(*coefficients2_, *kernelApproximationsImporter, INSERT); - kernelApproximations_->localApply(*coefficients_colmap_, *coefficients_, mode, alpha); - } else if (mode == Teuchos::TRANS) { - coefficients_->endExport(*coefficients_colmap_, *kernelApproximationsImporter, ADD_ASSIGN); - } - } + // near field - part 1 + RCP > nearFieldImporter = nearField_->getGraph()->getImporter(); + { + Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("near field 1"))); + if (mode == Teuchos::NO_TRANS) { + X_colmap_->beginImport(X, *nearFieldImporter, INSERT); + } else if (mode == Teuchos::TRANS) { + nearField_->localApply(X, *X_colmap_, mode, alpha, zero); + Y.scale(beta); + Y.beginExport(*X_colmap_, *nearFieldImporter, ADD_ASSIGN); } + } - // near field - part 2 - { - Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("near field 2"))); + // far field interactions - part 2 + { + Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("far field 2"))); + RCP > kernelApproximationsImporter = kernelApproximations_->pointA_->getGraph()->getImporter(); + if (flip) { if (mode == Teuchos::NO_TRANS) { - X_colmap_->endImport(X, *nearFieldImporter, INSERT); - nearField_->localApply(*X_colmap_, Y, mode, alpha, beta); + coefficients_colmap_->endImport(*coefficients_, *kernelApproximationsImporter, INSERT); + kernelApproximations_->localApply(*coefficients_colmap_, *coefficients2_, mode, alpha); } else if (mode == Teuchos::TRANS) { - Y.endExport(*X_colmap_, *nearFieldImporter, ADD_ASSIGN); + coefficients2_->endExport(*coefficients_colmap_, *kernelApproximationsImporter, ADD_ASSIGN); + } + } else { + if (mode == Teuchos::NO_TRANS) { + coefficients_colmap_->endImport(*coefficients2_, *kernelApproximationsImporter, INSERT); + kernelApproximations_->localApply(*coefficients_colmap_, *coefficients_, mode, alpha); + } else if (mode == Teuchos::TRANS) { + coefficients_->endExport(*coefficients_colmap_, *kernelApproximationsImporter, ADD_ASSIGN); } } + } - // downward pass - { - Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("downward pass"))); + // near field - part 2 + { + Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("near field 2"))); - for (size_t i = 0; iassign(*coefficients2_); - transferMatricesT_[i]->localApply(*coefficients2_, *coefficients_, Teuchos::NO_TRANS, one, one); - flip = false; - } else { - coefficients2_->assign(*coefficients_); - transferMatricesT_[i]->localApply(*coefficients_, *coefficients2_, Teuchos::NO_TRANS, one, one); - flip = true; - } - if (flip) - basisMatrix_->apply(*coefficients2_, Y, Teuchos::NO_TRANS, one, one); - else - basisMatrix_->apply(*coefficients_, Y, Teuchos::NO_TRANS, one, one); + if (mode == Teuchos::NO_TRANS) { + X_colmap_->endImport(X, *nearFieldImporter, INSERT); + nearField_->localApply(*X_colmap_, Y, mode, alpha, beta); + } else if (mode == Teuchos::TRANS) { + Y.endExport(*X_colmap_, *nearFieldImporter, ADD_ASSIGN); } } - template - Teuchos::RCP > - HierarchicalOperator:: - restrict(const Teuchos::RCP& P) { - Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("Galerkin product"))); - - // H_c = P^T * H * P - using lo_vec_type = typename blocked_map_type::lo_vec_type; - using vec_type = typename Tpetra::Vector; - using MagnitudeType = typename Teuchos::ScalarTraits::magnitudeType; - using Teuchos::RCP; - using Teuchos::rcp; - const Scalar ONE = Teuchos::ScalarTraits::one(); - const Scalar ZERO = Teuchos::ScalarTraits::zero(); - const Scalar HALF = ONE/(ONE+ONE); - - // newBasisMatrix = P^T * basisMatrix - RCP newBasisMatrix = rcp(new matrix_type(P->getDomainMap(), clusterCoeffMap_, 0)); - MatrixMatrix::Multiply(*P, true, *basisMatrix_, false, *newBasisMatrix); - - // - auto clusterSizes = kernelApproximations_->blockMap_->blockSizes_; - auto ghosted_clusterMap = kernelApproximations_->blockA_->getColMap(); - auto ghosted_clusterSizes = kernelApproximations_->ghosted_blockMap_->blockSizes_; - - // Get number of unknowns associated with each cluster via new basisMatrix. - // numUnknownsPerCluster = \prod transfer_k * graph(newBasisMatrix)^T * ones - RCP numUnknownsPerCluster; - RCP ghosted_numUnknownsPerCluster; - if ((coarseningCriterion_ == "equivalentDense") || - (coarseningCriterion_ == "numClusters")) { - { - numUnknownsPerCluster = rcp(new vec_type(kernelApproximations_->blockA_->getRowMap(), false)); - auto lcl_clusterSizes = clusterSizes->getLocalViewHost(Tpetra::Access::ReadOnly); - auto lcl_numUnknownsPerCluster = numUnknownsPerCluster->getLocalViewHost(Tpetra::Access::OverwriteAll); - // Compute the transpose of the newBasisMatrix. - RCP > newBasisMatrixT; - Tpetra::RowMatrixTransposer transposer(newBasisMatrix); - RCP transposeParams = rcp(new Teuchos::ParameterList); - newBasisMatrixT = transposer.createTranspose(transposeParams); - - // TODO: parallel_for - auto rowptr = newBasisMatrixT->getLocalRowPtrsHost(); - LocalOrdinal clusterStart = 0; - LocalOrdinal clusterEnd = 0; - for (LocalOrdinal cluster = 0; cluster < lcl_clusterSizes.extent_int(0); ++cluster) { - clusterStart = clusterEnd; - clusterEnd += lcl_clusterSizes(cluster, 0); - LocalOrdinal maxEntries = 0; - for (LocalOrdinal row = clusterStart; row < clusterEnd; ++row) { - LocalOrdinal numEntriesPerRow = rowptr(row+1)-rowptr(row); - maxEntries = std::max(maxEntries, numEntriesPerRow); - } - lcl_numUnknownsPerCluster(cluster, 0) = maxEntries; - } - TEUCHOS_ASSERT_EQUALITY(clusterEnd+1, rowptr.extent_int(0)); - } + // downward pass + { + Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("downward pass"))); + for (size_t i = 0; i < transferMatrices_.size(); i++) + if (flip) { + coefficients_->assign(*coefficients2_); + transferMatricesT_[i]->localApply(*coefficients2_, *coefficients_, Teuchos::NO_TRANS, one, one); + flip = false; + } else { + coefficients2_->assign(*coefficients_); + transferMatricesT_[i]->localApply(*coefficients_, *coefficients2_, Teuchos::NO_TRANS, one, one); + flip = true; + } + if (flip) + basisMatrix_->apply(*coefficients2_, Y, Teuchos::NO_TRANS, one, one); + else + basisMatrix_->apply(*coefficients_, Y, Teuchos::NO_TRANS, one, one); + } +} - // sum from child nodes to parents via transfer operators - for (int i = Teuchos::as(transferMatrices_.size())-1; i>=0; i--) - transferMatrices_[i]->blockA_->apply(*numUnknownsPerCluster, *numUnknownsPerCluster, Teuchos::NO_TRANS, ONE, ONE); +template +Teuchos::RCP > + HierarchicalOperator:: + restrict(const Teuchos::RCP& P) { + Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("Galerkin product"))); + + // H_c = P^T * H * P + using lo_vec_type = typename blocked_map_type::lo_vec_type; + using vec_type = typename Tpetra::Vector; + using MagnitudeType = typename Teuchos::ScalarTraits::magnitudeType; + using Teuchos::RCP; + using Teuchos::rcp; + const Scalar ONE = Teuchos::ScalarTraits::one(); + const Scalar ZERO = Teuchos::ScalarTraits::zero(); + const Scalar HALF = ONE / (ONE + ONE); + + // newBasisMatrix = P^T * basisMatrix + RCP newBasisMatrix = rcp(new matrix_type(P->getDomainMap(), clusterCoeffMap_, 0)); + MatrixMatrix::Multiply(*P, true, *basisMatrix_, false, *newBasisMatrix); + + // + auto clusterSizes = kernelApproximations_->blockMap_->blockSizes_; + auto ghosted_clusterMap = kernelApproximations_->blockA_->getColMap(); + auto ghosted_clusterSizes = kernelApproximations_->ghosted_blockMap_->blockSizes_; + + // Get number of unknowns associated with each cluster via new basisMatrix. + // numUnknownsPerCluster = \prod transfer_k * graph(newBasisMatrix)^T * ones + RCP numUnknownsPerCluster; + RCP ghosted_numUnknownsPerCluster; + if ((coarseningCriterion_ == "equivalentDense") || + (coarseningCriterion_ == "numClusters")) { + { + numUnknownsPerCluster = rcp(new vec_type(kernelApproximations_->blockA_->getRowMap(), false)); + auto lcl_clusterSizes = clusterSizes->getLocalViewHost(Tpetra::Access::ReadOnly); + auto lcl_numUnknownsPerCluster = numUnknownsPerCluster->getLocalViewHost(Tpetra::Access::OverwriteAll); + // Compute the transpose of the newBasisMatrix. + RCP > newBasisMatrixT; + Tpetra::RowMatrixTransposer transposer(newBasisMatrix); + RCP transposeParams = rcp(new Teuchos::ParameterList); + newBasisMatrixT = transposer.createTranspose(transposeParams); - // get ghosted numUnknownsPerCluster - ghosted_numUnknownsPerCluster = rcp(new vec_type(ghosted_clusterMap, false)); - auto import = kernelApproximations_->blockA_->getCrsGraph()->getImporter(); - ghosted_numUnknownsPerCluster->doImport(*numUnknownsPerCluster, *import, Tpetra::INSERT); + // TODO: parallel_for + auto rowptr = newBasisMatrixT->getLocalRowPtrsHost(); + LocalOrdinal clusterStart = 0; + LocalOrdinal clusterEnd = 0; + for (LocalOrdinal cluster = 0; cluster < lcl_clusterSizes.extent_int(0); ++cluster) { + clusterStart = clusterEnd; + clusterEnd += lcl_clusterSizes(cluster, 0); + LocalOrdinal maxEntries = 0; + for (LocalOrdinal row = clusterStart; row < clusterEnd; ++row) { + LocalOrdinal numEntriesPerRow = rowptr(row + 1) - rowptr(row); + maxEntries = std::max(maxEntries, numEntriesPerRow); + } + lcl_numUnknownsPerCluster(cluster, 0) = maxEntries; + } + TEUCHOS_ASSERT_EQUALITY(clusterEnd + 1, rowptr.extent_int(0)); } - // coarse cluster pair graph - RCP newKernelBlockGraph = rcp(new matrix_type(kernelApproximations_->blockA_->getCrsGraph())); - newKernelBlockGraph->resumeFill(); - // point entries of cluster pairs that should be moved to the near field - RCP diffKernelApprox = rcp(new matrix_type(kernelApproximations_->pointA_->getCrsGraph())); + // sum from child nodes to parents via transfer operators + for (int i = Teuchos::as(transferMatrices_.size()) - 1; i >= 0; i--) + transferMatrices_[i]->blockA_->apply(*numUnknownsPerCluster, *numUnknownsPerCluster, Teuchos::NO_TRANS, ONE, ONE); - // Determine which cluster pairs should be moved to the near field. - // We are constructing the coarse block matrix newKernelBlockGraph - // and the point matrix diffKernelApprox. - { - typename vec_type::dual_view_type::t_host::const_type lcl_numUnknownsPerCluster; - typename vec_type::dual_view_type::t_host::const_type lcl_ghosted_numUnknownsPerCluster; - auto lcl_offsets = Kokkos::create_mirror_view(kernelApproximations_->blockMap_->offsets_); - auto lcl_ghosted_offsets = Kokkos::create_mirror_view(kernelApproximations_->ghosted_blockMap_->offsets_); - Kokkos::deep_copy(lcl_offsets, kernelApproximations_->blockMap_->offsets_); - Kokkos::deep_copy(lcl_ghosted_offsets, kernelApproximations_->ghosted_blockMap_->offsets_); - auto lcl_clusterSizes = clusterSizes->getLocalViewHost(Tpetra::Access::ReadOnly); - auto lcl_ghosted_clusterSizes = ghosted_clusterSizes->getLocalViewHost(Tpetra::Access::ReadOnly); - - if ((coarseningCriterion_ == "equivalentDense") || + // get ghosted numUnknownsPerCluster + ghosted_numUnknownsPerCluster = rcp(new vec_type(ghosted_clusterMap, false)); + auto import = kernelApproximations_->blockA_->getCrsGraph()->getImporter(); + ghosted_numUnknownsPerCluster->doImport(*numUnknownsPerCluster, *import, Tpetra::INSERT); + } + + // coarse cluster pair graph + RCP newKernelBlockGraph = rcp(new matrix_type(kernelApproximations_->blockA_->getCrsGraph())); + newKernelBlockGraph->resumeFill(); + // point entries of cluster pairs that should be moved to the near field + RCP diffKernelApprox = rcp(new matrix_type(kernelApproximations_->pointA_->getCrsGraph())); + + // Determine which cluster pairs should be moved to the near field. + // We are constructing the coarse block matrix newKernelBlockGraph + // and the point matrix diffKernelApprox. + { + typename vec_type::dual_view_type::t_host::const_type lcl_numUnknownsPerCluster; + typename vec_type::dual_view_type::t_host::const_type lcl_ghosted_numUnknownsPerCluster; + auto lcl_offsets = Kokkos::create_mirror_view(kernelApproximations_->blockMap_->offsets_); + auto lcl_ghosted_offsets = Kokkos::create_mirror_view(kernelApproximations_->ghosted_blockMap_->offsets_); + Kokkos::deep_copy(lcl_offsets, kernelApproximations_->blockMap_->offsets_); + Kokkos::deep_copy(lcl_ghosted_offsets, kernelApproximations_->ghosted_blockMap_->offsets_); + auto lcl_clusterSizes = clusterSizes->getLocalViewHost(Tpetra::Access::ReadOnly); + auto lcl_ghosted_clusterSizes = ghosted_clusterSizes->getLocalViewHost(Tpetra::Access::ReadOnly); + + if ((coarseningCriterion_ == "equivalentDense") || (coarseningCriterion_ == "numClusters")) { - lcl_numUnknownsPerCluster = numUnknownsPerCluster->getLocalViewHost(Tpetra::Access::ReadOnly); - lcl_ghosted_numUnknownsPerCluster = ghosted_numUnknownsPerCluster->getLocalViewHost(Tpetra::Access::ReadOnly); - } + lcl_numUnknownsPerCluster = numUnknownsPerCluster->getLocalViewHost(Tpetra::Access::ReadOnly); + lcl_ghosted_numUnknownsPerCluster = ghosted_numUnknownsPerCluster->getLocalViewHost(Tpetra::Access::ReadOnly); + } - // Criterion: "numClusters" - // Compute all cluster pair sizes, sort them, and pick cut-off - // so that the number of cluster pairs decreases propotionally - // to the number of unknowns. - size_t tgt_clusterPairSize = 0; - if (coarseningCriterion_ == "numClusters") { - auto lcl_BlockGraph = kernelApproximations_->blockA_->getLocalMatrixHost(); - std::vector clusterPairSizes; - for (LocalOrdinal brlid = 0; brlid < lcl_BlockGraph.numRows(); ++brlid) { - auto brow = lcl_BlockGraph.row(brlid); - for (LocalOrdinal k = 0; k < brow.length; ++k) { - // Entries of the block matrix for kernelApproximations - // decide whether the cluster pair is present and only take - // values 1 or 0. - if (brow.value(k) > HALF) { - LocalOrdinal bclid = brow.colidx(k); - clusterPairSizes.push_back(lcl_numUnknownsPerCluster(brlid, 0) * lcl_ghosted_numUnknownsPerCluster(bclid, 0)); - } + // Criterion: "numClusters" + // Compute all cluster pair sizes, sort them, and pick cut-off + // so that the number of cluster pairs decreases propotionally + // to the number of unknowns. + size_t tgt_clusterPairSize = 0; + if (coarseningCriterion_ == "numClusters") { + auto lcl_BlockGraph = kernelApproximations_->blockA_->getLocalMatrixHost(); + std::vector clusterPairSizes; + for (LocalOrdinal brlid = 0; brlid < lcl_BlockGraph.numRows(); ++brlid) { + auto brow = lcl_BlockGraph.row(brlid); + for (LocalOrdinal k = 0; k < brow.length; ++k) { + // Entries of the block matrix for kernelApproximations + // decide whether the cluster pair is present and only take + // values 1 or 0. + if (brow.value(k) > HALF) { + LocalOrdinal bclid = brow.colidx(k); + clusterPairSizes.push_back(lcl_numUnknownsPerCluster(brlid, 0) * lcl_ghosted_numUnknownsPerCluster(bclid, 0)); } } - std::sort(clusterPairSizes.begin(), clusterPairSizes.end()); - double coarseningRate = Teuchos::as(P->getGlobalNumCols())/Teuchos::as(P->getGlobalNumRows()); - tgt_clusterPairSize = clusterPairSizes[Teuchos::as(clusterPairSizes.size()*(1-coarseningRate))]; - // std::cout << "HERE " << clusterPairSizes[0] << " " << tgt_clusterPairSize << " " << clusterPairSizes[clusterPairSizes.size()-1] << std::endl; } + std::sort(clusterPairSizes.begin(), clusterPairSizes.end()); + double coarseningRate = Teuchos::as(P->getGlobalNumCols()) / Teuchos::as(P->getGlobalNumRows()); + tgt_clusterPairSize = clusterPairSizes[Teuchos::as(clusterPairSizes.size() * (1 - coarseningRate))]; + // std::cout << "HERE " << clusterPairSizes[0] << " " << tgt_clusterPairSize << " " << clusterPairSizes[clusterPairSizes.size()-1] << std::endl; + } - // Criterion: "transferLevels" - // Drop cluster pairs by level in the tree. - auto comm = getComm(); - std::set blidsToDrop; - if (coarseningCriterion_ == "transferLevels") { - double coarseningRate = Teuchos::as(P->getGlobalNumCols())/Teuchos::as(P->getGlobalNumRows()); - size_t droppedClusterPairs = 0; - size_t totalNumClusterPairs = kernelApproximations_->blockA_->getGlobalNumEntries(); - RCP tempV = Teuchos::rcp(new vec_type(kernelApproximations_->blockMap_->blockMap_, false)); - RCP tempV2 = Teuchos::rcp(new vec_type(kernelApproximations_->blockMap_->blockMap_, false)); - int keepTransfers = params_->get("keepTransfers",-1); - if (keepTransfers == -1) { - double leftOverFactor = params_->get("leftOverFactor"); - keepTransfers = transferMatrices_.size(); - double temp = (1.0 / coarseningRate) * leftOverFactor; - const double treeCoarseningFactor = params_->get("treeCoarseningFactor"); - while (temp >= 2.0) { - --keepTransfers; - temp /= treeCoarseningFactor; - } - keepTransfers = std::max(keepTransfers, 0); - params_->set("leftOverFactor", temp); + // Criterion: "transferLevels" + // Drop cluster pairs by level in the tree. + auto comm = getComm(); + std::set blidsToDrop; + if (coarseningCriterion_ == "transferLevels") { + double coarseningRate = Teuchos::as(P->getGlobalNumCols()) / Teuchos::as(P->getGlobalNumRows()); + size_t droppedClusterPairs = 0; + size_t totalNumClusterPairs = kernelApproximations_->blockA_->getGlobalNumEntries(); + RCP tempV = Teuchos::rcp(new vec_type(kernelApproximations_->blockMap_->blockMap_, false)); + RCP tempV2 = Teuchos::rcp(new vec_type(kernelApproximations_->blockMap_->blockMap_, false)); + int keepTransfers = params_->get("keepTransfers", -1); + if (keepTransfers == -1) { + double leftOverFactor = params_->get("leftOverFactor"); + keepTransfers = transferMatrices_.size(); + double temp = (1.0 / coarseningRate) * leftOverFactor; + const double treeCoarseningFactor = params_->get("treeCoarseningFactor"); + while (temp >= 2.0) { + --keepTransfers; + temp /= treeCoarseningFactor; } + keepTransfers = std::max(keepTransfers, 0); + params_->set("leftOverFactor", temp); + } - for (int k = Teuchos::as(transferMatrices_.size())-1; k>=0; --k) { - - size_t clustersInLevel = transferMatrices_[k]->blockA_->getGlobalNumEntries(); + for (int k = Teuchos::as(transferMatrices_.size()) - 1; k >= 0; --k) { + size_t clustersInLevel = transferMatrices_[k]->blockA_->getGlobalNumEntries(); - if (debugOutput_ && (comm->getRank() == 0)) - std::cout << "level " << k << " clustersInLevel " << clustersInLevel << std::endl; + if (debugOutput_ && (comm->getRank() == 0)) + std::cout << "level " << k << " clustersInLevel " << clustersInLevel << std::endl; - tempV->putScalar(ONE); - transferMatrices_[k]->blockA_->apply(*tempV, *tempV2, Teuchos::TRANS); + tempV->putScalar(ONE); + transferMatrices_[k]->blockA_->apply(*tempV, *tempV2, Teuchos::TRANS); - size_t numClusters = tempV2->norm1(); - if (debugOutput_ && (comm->getRank() == 0)) - std::cout << "numClusters " << numClusters << std::endl; - tempV->putScalar(ZERO); - kernelApproximations_->blockA_->apply(*tempV2, *tempV); + size_t numClusters = tempV2->norm1(); + if (debugOutput_ && (comm->getRank() == 0)) + std::cout << "numClusters " << numClusters << std::endl; + tempV->putScalar(ZERO); + kernelApproximations_->blockA_->apply(*tempV2, *tempV); - Scalar numClusterPairs = tempV->dot(*tempV2); - if (debugOutput_ && (comm->getRank() == 0)) - std::cout << "numClusterPairs " << numClusterPairs << std::endl; + Scalar numClusterPairs = tempV->dot(*tempV2); + if (debugOutput_ && (comm->getRank() == 0)) + std::cout << "numClusterPairs " << numClusterPairs << std::endl; - bool doDrop; - if (keepTransfers >= 0) { - doDrop = (keepTransfers<=k); - } else { - doDrop = (droppedClusterPairs + numClusterPairs < (1.0-coarseningRate) * totalNumClusterPairs); - } - if (doDrop) { - auto lcl_transfer = transferMatrices_[k]->blockA_->getLocalMatrixHost(); - auto lcl_transfer_graph = lcl_transfer.graph; - for (LocalOrdinal j = 0; j < lcl_transfer_graph.entries.extent_int(0); j++) - blidsToDrop.insert(lcl_transfer_graph.entries(j)); + bool doDrop; + if (keepTransfers >= 0) { + doDrop = (keepTransfers <= k); + } else { + doDrop = (droppedClusterPairs + numClusterPairs < (1.0 - coarseningRate) * totalNumClusterPairs); + } + if (doDrop) { + auto lcl_transfer = transferMatrices_[k]->blockA_->getLocalMatrixHost(); + auto lcl_transfer_graph = lcl_transfer.graph; + for (LocalOrdinal j = 0; j < lcl_transfer_graph.entries.extent_int(0); j++) + blidsToDrop.insert(lcl_transfer_graph.entries(j)); - droppedClusterPairs += numClusterPairs; - } else { - if (debugOutput_ && (comm->getRank() == 0)) - std::cout << "Dropped " << transferMatrices_.size()-1-k << " transfers of " << transferMatrices_.size() << " dropped cp: " << droppedClusterPairs <getRank() == 0)) + std::cout << "Dropped " << transferMatrices_.size() - 1 - k << " transfers of " << transferMatrices_.size() << " dropped cp: " << droppedClusterPairs << std::endl; + break; } } + } - // number of cluster pairs dropped - int dropped = 0; - // number of cluster pairs we kept - int kept = 0; - // number of cluster pairs that were no longer present - int ignored = 0; - // loop over cluster pairs - // TODO: parallel_for - auto lcl_BlockGraph = kernelApproximations_->blockA_->getLocalMatrixHost(); - auto lcl_newBlockGraph = newKernelBlockGraph->getLocalMatrixHost(); - auto lcl_KernelApprox = kernelApproximations_->pointA_->getLocalMatrixHost(); - auto lcl_diffKernelApprox = diffKernelApprox->getLocalMatrixHost(); - for (LocalOrdinal brlid = 0; brlid < lcl_BlockGraph.numRows(); ++brlid) { - size_t brsize = lcl_clusterSizes(brlid, 0); - auto brow = lcl_BlockGraph.row(brlid); - auto new_brow = lcl_newBlockGraph.row(brlid); - for (LocalOrdinal k = 0; k < brow.length; ++k) { - // Entries of the block matrix for kernelApproximations - // decide whether the cluster pair is present and only take - // values 1 or 0. - if (brow.value(k) > HALF) { - LocalOrdinal bclid = brow.colidx(k); - size_t bcsize = lcl_ghosted_clusterSizes(bclid, 0); - - // criterium for removing a cluster pair from the far field - bool removeCluster = false; - if (coarseningCriterion_ == "equivalentDense") { - // Size of the sparse cluster approximation >= size of dense equivalent - removeCluster = (brsize * bcsize - >= lcl_numUnknownsPerCluster(brlid, 0) * lcl_ghosted_numUnknownsPerCluster(bclid, 0)); - } else if (coarseningCriterion_ == "numClusters") { - removeCluster = (lcl_numUnknownsPerCluster(brlid, 0) * lcl_ghosted_numUnknownsPerCluster(bclid, 0) < tgt_clusterPairSize); - } else if (coarseningCriterion_ == "transferLevels") { - removeCluster = ((blidsToDrop.find(brlid) != blidsToDrop.end()) || - (blidsToDrop.find(bclid) != blidsToDrop.end())); - } - if (removeCluster) { - // we are dropping the cluster pair from the far field - ++dropped; - new_brow.value(k) = ZERO; - - // loop over the point matrix and add the entries to diffKernelApprox - const LocalOrdinal row_start = lcl_offsets(brlid); - const LocalOrdinal row_end = lcl_offsets(brlid+1); - const LocalOrdinal col_start = lcl_ghosted_offsets(bclid); - const LocalOrdinal col_end = lcl_ghosted_offsets(bclid+1); - TEUCHOS_ASSERT_EQUALITY(Teuchos::as(row_end-row_start), brsize); - TEUCHOS_ASSERT_EQUALITY(Teuchos::as(col_end-col_start), bcsize); - for (LocalOrdinal rlid = row_start; rlid < row_end; ++rlid) { - auto diff_row = lcl_diffKernelApprox.row(rlid); - auto row = lcl_KernelApprox.row(rlid); - size_t removed = 0; - for (LocalOrdinal n = 0; n < row.length; ++n) { - if ((col_start <= row.colidx(n)) && (col_end > row.colidx(n))) { - diff_row.value(n) = row.value(n); - ++removed; - } + // number of cluster pairs dropped + int dropped = 0; + // number of cluster pairs we kept + int kept = 0; + // number of cluster pairs that were no longer present + int ignored = 0; + // loop over cluster pairs + // TODO: parallel_for + auto lcl_BlockGraph = kernelApproximations_->blockA_->getLocalMatrixHost(); + auto lcl_newBlockGraph = newKernelBlockGraph->getLocalMatrixHost(); + auto lcl_KernelApprox = kernelApproximations_->pointA_->getLocalMatrixHost(); + auto lcl_diffKernelApprox = diffKernelApprox->getLocalMatrixHost(); + for (LocalOrdinal brlid = 0; brlid < lcl_BlockGraph.numRows(); ++brlid) { + size_t brsize = lcl_clusterSizes(brlid, 0); + auto brow = lcl_BlockGraph.row(brlid); + auto new_brow = lcl_newBlockGraph.row(brlid); + for (LocalOrdinal k = 0; k < brow.length; ++k) { + // Entries of the block matrix for kernelApproximations + // decide whether the cluster pair is present and only take + // values 1 or 0. + if (brow.value(k) > HALF) { + LocalOrdinal bclid = brow.colidx(k); + size_t bcsize = lcl_ghosted_clusterSizes(bclid, 0); + + // criterium for removing a cluster pair from the far field + bool removeCluster = false; + if (coarseningCriterion_ == "equivalentDense") { + // Size of the sparse cluster approximation >= size of dense equivalent + removeCluster = (brsize * bcsize >= lcl_numUnknownsPerCluster(brlid, 0) * lcl_ghosted_numUnknownsPerCluster(bclid, 0)); + } else if (coarseningCriterion_ == "numClusters") { + removeCluster = (lcl_numUnknownsPerCluster(brlid, 0) * lcl_ghosted_numUnknownsPerCluster(bclid, 0) < tgt_clusterPairSize); + } else if (coarseningCriterion_ == "transferLevels") { + removeCluster = ((blidsToDrop.find(brlid) != blidsToDrop.end()) || + (blidsToDrop.find(bclid) != blidsToDrop.end())); + } + if (removeCluster) { + // we are dropping the cluster pair from the far field + ++dropped; + new_brow.value(k) = ZERO; + + // loop over the point matrix and add the entries to diffKernelApprox + const LocalOrdinal row_start = lcl_offsets(brlid); + const LocalOrdinal row_end = lcl_offsets(brlid + 1); + const LocalOrdinal col_start = lcl_ghosted_offsets(bclid); + const LocalOrdinal col_end = lcl_ghosted_offsets(bclid + 1); + TEUCHOS_ASSERT_EQUALITY(Teuchos::as(row_end - row_start), brsize); + TEUCHOS_ASSERT_EQUALITY(Teuchos::as(col_end - col_start), bcsize); + for (LocalOrdinal rlid = row_start; rlid < row_end; ++rlid) { + auto diff_row = lcl_diffKernelApprox.row(rlid); + auto row = lcl_KernelApprox.row(rlid); + size_t removed = 0; + for (LocalOrdinal n = 0; n < row.length; ++n) { + if ((col_start <= row.colidx(n)) && (col_end > row.colidx(n))) { + diff_row.value(n) = row.value(n); + ++removed; } - if (removed != bcsize) { - std::ostringstream oss; - oss << "brlid " << brlid << " row " << rlid << std::endl; - oss << "col_start "<< col_start << " col_end " << col_end << std::endl; - for (LocalOrdinal n = 0; n < row.length; ++n) { - oss << row.colidx(n) << " " << row.value(n) << std::endl; - } - std::cout << oss.str(); + } + if (removed != bcsize) { + std::ostringstream oss; + oss << "brlid " << brlid << " row " << rlid << std::endl; + oss << "col_start " << col_start << " col_end " << col_end << std::endl; + for (LocalOrdinal n = 0; n < row.length; ++n) { + oss << row.colidx(n) << " " << row.value(n) << std::endl; } - TEUCHOS_ASSERT_EQUALITY(removed, bcsize); + std::cout << oss.str(); } - } else { - // We are keeping the cluster pair. - ++kept; - new_brow.value(k) = brow.value(k); + TEUCHOS_ASSERT_EQUALITY(removed, bcsize); } } else { - // The cluster pair has already been dropped on the fine level. - ++ignored; + // We are keeping the cluster pair. + ++kept; new_brow.value(k) = brow.value(k); } + } else { + // The cluster pair has already been dropped on the fine level. + ++ignored; + new_brow.value(k) = brow.value(k); } } - if (debugOutput_) { - // number of cluster pairs dropped - int gbl_dropped = 0; - // number of cluster pairs we kept - int gbl_kept = 0; - // number of cluster pairs that were no longer present - int gbl_ignored = 0; - Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 1, &dropped, &gbl_dropped); - Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 1, &kept, &gbl_kept); - Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 1, &ignored, &gbl_ignored); - if (comm->getRank() == 0) - std::cout << "dropped " << gbl_dropped << " kept " << gbl_kept << " ignored " << gbl_ignored << std::endl; - } } + if (debugOutput_) { + // number of cluster pairs dropped + int gbl_dropped = 0; + // number of cluster pairs we kept + int gbl_kept = 0; + // number of cluster pairs that were no longer present + int gbl_ignored = 0; + Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 1, &dropped, &gbl_dropped); + Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 1, &kept, &gbl_kept); + Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 1, &ignored, &gbl_ignored); + if (comm->getRank() == 0) + std::cout << "dropped " << gbl_dropped << " kept " << gbl_kept << " ignored " << gbl_ignored << std::endl; + } + } - newKernelBlockGraph->fillComplete(kernelApproximations_->blockA_->getDomainMap(), - kernelApproximations_->blockA_->getRangeMap()); - newKernelBlockGraph = removeSmallEntries(newKernelBlockGraph, Teuchos::ScalarTraits::eps()); - diffKernelApprox->fillComplete(clusterCoeffMap_, - clusterCoeffMap_); + newKernelBlockGraph->fillComplete(kernelApproximations_->blockA_->getDomainMap(), + kernelApproximations_->blockA_->getRangeMap()); + newKernelBlockGraph = removeSmallEntries(newKernelBlockGraph, Teuchos::ScalarTraits::eps()); + diffKernelApprox->fillComplete(clusterCoeffMap_, + clusterCoeffMap_); + + // coarse point matrix of cluster pairs + Teuchos::RCP newKernelApprox; + { + Teuchos::RCP temp = MatrixMatrix::add(ONE, false, *kernelApproximations_->pointA_, -ONE, false, *diffKernelApprox); + newKernelApprox = removeSmallEntries(temp, Teuchos::ScalarTraits::eps()); + } - // coarse point matrix of cluster pairs - Teuchos::RCP newKernelApprox; - { - Teuchos::RCP temp = MatrixMatrix::add(ONE, false, *kernelApproximations_->pointA_, -ONE, false, *diffKernelApprox); - newKernelApprox = removeSmallEntries(temp, Teuchos::ScalarTraits::eps()); + // construct identity on clusterCoeffMap_ + Teuchos::RCP identity = buildIdentityMatrix(clusterCoeffMap_); + + Teuchos::RCP newBlockedKernelApproximation = rcp(new blocked_matrix_type(newKernelApprox, newKernelBlockGraph, kernelApproximations_->blockMap_, kernelApproximations_->ghosted_blockMap_)); + + // select subset of transfer matrices for coarse operator + std::vector > newTransferMatrices; + { + auto comm = getComm(); + + RCP v_temp = rcp(new vec_type(newKernelBlockGraph->getDomainMap())); + RCP clusterUseCount = rcp(new vec_type(newKernelBlockGraph->getDomainMap())); + v_temp->putScalar(ONE); + clusterUseCount->putScalar(ZERO); + newKernelBlockGraph->apply(*v_temp, *clusterUseCount, Teuchos::NO_TRANS); + newKernelBlockGraph->apply(*v_temp, *clusterUseCount, Teuchos::TRANS, ONE, ONE); + + for (int i = Teuchos::as(transferMatrices_.size()) - 1; i >= 0; i--) { + // We drop a transfer operator T_i when + // sum(T_i * clusterUseCount) == 0 + // Since we need to use Scalar, we instead check for < 0.5 + transferMatrices_[i]->blockA_->localApply(*clusterUseCount, *v_temp, Teuchos::NO_TRANS); + Scalar gbl_use_count = v_temp->norm1(); + // if (comm->getRank() == 0) + // std::cout << "Transfer " << i << " count " << gbl_use_count << std::endl; + + if (gbl_use_count < HALF) { + // We do not keep the i-th transfer for the coarse operator. + // newBasisMatrix := newBasisMatrix * (I+transferMatrices_[i])^T + Teuchos::RCP temp2 = MatrixMatrix::add(ONE, false, *identity, ONE, false, *transferMatrices_[i]->pointA_); + RCP temp = rcp(new matrix_type(newBasisMatrix->getRowMap(), clusterCoeffMap_, 0)); + MatrixMatrix::Multiply(*newBasisMatrix, false, *temp2, true, *temp); + newBasisMatrix = temp; + } else { + // We keep the i-th transfer for the coarse operator. + newTransferMatrices.insert(newTransferMatrices.begin(), transferMatrices_[i]); + } } + } - // construct identity on clusterCoeffMap_ - Teuchos::RCP identity = buildIdentityMatrix(clusterCoeffMap_); - - Teuchos::RCP newBlockedKernelApproximation = rcp(new blocked_matrix_type(newKernelApprox, newKernelBlockGraph, kernelApproximations_->blockMap_, kernelApproximations_->ghosted_blockMap_)); + // Coarse near field + RCP newNearField; + { + // transfer = newBasisMatrix * (identity + newTransferMatrices[K-1]^T) * ... * (identity + newTransferMatrices[0])^T + Teuchos::RCP transfer = rcp(new matrix_type(*newBasisMatrix)); + for (int i = Teuchos::as(newTransferMatrices.size()) - 1; i >= 0; i--) { + Teuchos::RCP temp = MatrixMatrix::add(ONE, false, *identity, ONE, false, *newTransferMatrices[i]->pointA_); + Teuchos::RCP temp2 = rcp(new matrix_type(newBasisMatrix->getRowMap(), 0)); + MatrixMatrix::Multiply(*transfer, false, *temp, true, *temp2); + transfer = temp2; + } - // select subset of transfer matrices for coarse operator - std::vector > newTransferMatrices; + // diffFarField = transfer * diffKernelApprox * transfer^T + RCP diffFarField; { - auto comm = getComm(); - - RCP v_temp = rcp(new vec_type(newKernelBlockGraph->getDomainMap())); - RCP clusterUseCount = rcp(new vec_type(newKernelBlockGraph->getDomainMap())); - v_temp->putScalar(ONE); - clusterUseCount->putScalar(ZERO); - newKernelBlockGraph->apply(*v_temp, *clusterUseCount, Teuchos::NO_TRANS); - newKernelBlockGraph->apply(*v_temp, *clusterUseCount, Teuchos::TRANS, ONE, ONE); - - for (int i = Teuchos::as(transferMatrices_.size())-1; i>=0; i--) { - // We drop a transfer operator T_i when - // sum(T_i * clusterUseCount) == 0 - // Since we need to use Scalar, we instead check for < 0.5 - transferMatrices_[i]->blockA_->localApply(*clusterUseCount, *v_temp, Teuchos::NO_TRANS); - Scalar gbl_use_count = v_temp->norm1(); - // if (comm->getRank() == 0) - // std::cout << "Transfer " << i << " count " << gbl_use_count << std::endl; - - if (gbl_use_count < HALF) { - // We do not keep the i-th transfer for the coarse operator. - // newBasisMatrix := newBasisMatrix * (I+transferMatrices_[i])^T - Teuchos::RCP temp2 = MatrixMatrix::add(ONE, false, *identity, ONE, false, *transferMatrices_[i]->pointA_); - RCP temp = rcp(new matrix_type(newBasisMatrix->getRowMap(), clusterCoeffMap_, 0)); - MatrixMatrix::Multiply(*newBasisMatrix, false, *temp2, true, *temp); - newBasisMatrix = temp; - } else { - // We keep the i-th transfer for the coarse operator. - newTransferMatrices.insert(newTransferMatrices.begin(), transferMatrices_[i]); - } - } + Teuchos::RCP temp = rcp(new matrix_type(newBasisMatrix->getRowMap(), 0)); + MatrixMatrix::Multiply(*transfer, false, *diffKernelApprox, false, *temp); + diffFarField = rcp(new matrix_type(newBasisMatrix->getRowMap(), 0)); + MatrixMatrix::Multiply(*temp, false, *transfer, true, *diffFarField); } - // Coarse near field - RCP newNearField; + // newNearField = P^T * nearField * P + diffFarField { - // transfer = newBasisMatrix * (identity + newTransferMatrices[K-1]^T) * ... * (identity + newTransferMatrices[0])^T - Teuchos::RCP transfer = rcp(new matrix_type(*newBasisMatrix)); - for (int i = Teuchos::as(newTransferMatrices.size())-1; i>=0; i--) { - Teuchos::RCP temp = MatrixMatrix::add(ONE, false, *identity, ONE, false, *newTransferMatrices[i]->pointA_); - Teuchos::RCP temp2 = rcp(new matrix_type(newBasisMatrix->getRowMap(), 0)); - MatrixMatrix::Multiply(*transfer, false, *temp, true, *temp2); - transfer = temp2; - } - - // diffFarField = transfer * diffKernelApprox * transfer^T - RCP diffFarField; - { - Teuchos::RCP temp = rcp(new matrix_type(newBasisMatrix->getRowMap(), 0)); - MatrixMatrix::Multiply(*transfer, false, *diffKernelApprox, false, *temp); - diffFarField = rcp(new matrix_type(newBasisMatrix->getRowMap(), 0)); - MatrixMatrix::Multiply(*temp, false, *transfer, true, *diffFarField); - } - - // newNearField = P^T * nearField * P + diffFarField - { - RCP temp = rcp(new matrix_type(nearField_->getRowMap(), 0)); - MatrixMatrix::Multiply(*nearField_, false, *P, false, *temp); - RCP temp2 = rcp(new matrix_type(P->getDomainMap(), 0)); - MatrixMatrix::Multiply(*P, true, *temp, false, *temp2); - newNearField = MatrixMatrix::add(ONE, false, *temp2, ONE, false, *diffFarField); - newNearField = removeSmallEntries(newNearField, Teuchos::ScalarTraits::eps()); - } + RCP temp = rcp(new matrix_type(nearField_->getRowMap(), 0)); + MatrixMatrix::Multiply(*nearField_, false, *P, false, *temp); + RCP temp2 = rcp(new matrix_type(P->getDomainMap(), 0)); + MatrixMatrix::Multiply(*P, true, *temp, false, *temp2); + newNearField = MatrixMatrix::add(ONE, false, *temp2, ONE, false, *diffFarField); + newNearField = removeSmallEntries(newNearField, Teuchos::ScalarTraits::eps()); } - - return Teuchos::rcp(new HierarchicalOperator(newNearField, - newBlockedKernelApproximation, - newBasisMatrix, - newTransferMatrices, - params_)); } + return Teuchos::rcp(new HierarchicalOperator(newNearField, + newBlockedKernelApproximation, + newBasisMatrix, + newTransferMatrices, + params_)); +} - template - Teuchos::RCP > - HierarchicalOperator:: - toMatrix() { - Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("Conversion from H-matrix to CSR"))); - - using Teuchos::RCP; - using Teuchos::rcp; +template +Teuchos::RCP > +HierarchicalOperator:: + toMatrix() { + Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("Conversion from H-matrix to CSR"))); - const Scalar ONE = Teuchos::ScalarTraits::one(); + using Teuchos::RCP; + using Teuchos::rcp; - if (hasFarField()) { + const Scalar ONE = Teuchos::ScalarTraits::one(); - // transfer = basisMatrix_ * (identity + transferMatrices_[K-1]) * ... * (identity + transferMatrices_[0]) - RCP transfer = rcp(new matrix_type(*basisMatrix_)); + if (hasFarField()) { + // transfer = basisMatrix_ * (identity + transferMatrices_[K-1]) * ... * (identity + transferMatrices_[0]) + RCP transfer = rcp(new matrix_type(*basisMatrix_)); - if (hasTransferMatrices()) { - // construct identity on clusterCoeffMap_ - Teuchos::RCP identity = buildIdentityMatrix(clusterCoeffMap_); + if (hasTransferMatrices()) { + // construct identity on clusterCoeffMap_ + Teuchos::RCP identity = buildIdentityMatrix(clusterCoeffMap_); - for (int i = Teuchos::as(transferMatrices_.size())-1; i>=0; i--) { - RCP temp = MatrixMatrix::add(ONE, false, *identity, ONE, false, *transferMatrices_[i]->pointA_); - RCP temp2 = rcp(new matrix_type(basisMatrix_->getRowMap(), 0)); - MatrixMatrix::Multiply(*transfer, false, *temp, true, *temp2); - transfer = temp2; - } + for (int i = Teuchos::as(transferMatrices_.size()) - 1; i >= 0; i--) { + RCP temp = MatrixMatrix::add(ONE, false, *identity, ONE, false, *transferMatrices_[i]->pointA_); + RCP temp2 = rcp(new matrix_type(basisMatrix_->getRowMap(), 0)); + MatrixMatrix::Multiply(*transfer, false, *temp, true, *temp2); + transfer = temp2; } + } - // farField = transfer * kernelApproximations_ * transfer^T - RCP temp = rcp(new matrix_type(basisMatrix_->getRowMap(), 0)); - MatrixMatrix::Multiply(*transfer, false, *kernelApproximations_->pointA_, false, *temp); - RCP farField = rcp(new matrix_type(basisMatrix_->getRowMap(), 0)); - MatrixMatrix::Multiply(*temp, false, *transfer, true, *farField); - - // nearField_ + farField - return MatrixMatrix::add(ONE, false, *nearField_, ONE, false, *farField); - - } else + // farField = transfer * kernelApproximations_ * transfer^T + RCP temp = rcp(new matrix_type(basisMatrix_->getRowMap(), 0)); + MatrixMatrix::Multiply(*transfer, false, *kernelApproximations_->pointA_, false, *temp); + RCP farField = rcp(new matrix_type(basisMatrix_->getRowMap(), 0)); + MatrixMatrix::Multiply(*temp, false, *transfer, true, *farField); - return nearField_; + // nearField_ + farField + return MatrixMatrix::add(ONE, false, *nearField_, ONE, false, *farField); - } + } else + return nearField_; +} - template - void - HierarchicalOperator:: - allocateMemory(size_t numVectors) const { - if (coefficients_.is_null() || coefficients_->getNumVectors() != numVectors) { - coefficients_ = Teuchos::rcp(new mv_type(clusterCoeffMap_, numVectors)); - coefficients2_ = Teuchos::rcp(new mv_type(clusterCoeffMap_, numVectors)); - X_colmap_ = Teuchos::rcp(new mv_type(nearField_->getColMap(), numVectors)); - coefficients_colmap_ = Teuchos::rcp(new mv_type(kernelApproximations_->pointA_->getColMap(), numVectors)); - } +template +void HierarchicalOperator:: + allocateMemory(size_t numVectors) const { + if (coefficients_.is_null() || coefficients_->getNumVectors() != numVectors) { + coefficients_ = Teuchos::rcp(new mv_type(clusterCoeffMap_, numVectors)); + coefficients2_ = Teuchos::rcp(new mv_type(clusterCoeffMap_, numVectors)); + X_colmap_ = Teuchos::rcp(new mv_type(nearField_->getColMap(), numVectors)); + coefficients_colmap_ = Teuchos::rcp(new mv_type(kernelApproximations_->pointA_->getColMap(), numVectors)); } } +} // namespace Tpetra -#endif // TPETRA_HIERARCHICALOPERATOR_DEF_HPP +#endif // TPETRA_HIERARCHICALOPERATOR_DEF_HPP diff --git a/packages/muelu/research/caglusa/Xpetra_HierarchicalOperator_decl.hpp b/packages/muelu/research/caglusa/Xpetra_HierarchicalOperator_decl.hpp index f511e0dfded5..f65d3a7a8bca 100644 --- a/packages/muelu/research/caglusa/Xpetra_HierarchicalOperator_decl.hpp +++ b/packages/muelu/research/caglusa/Xpetra_HierarchicalOperator_decl.hpp @@ -9,111 +9,110 @@ #include #include - namespace Xpetra { - template - class HierarchicalOperator : public TpetraOperator { - - public: - using tHOp = Tpetra::HierarchicalOperator; - using map_type = Xpetra::Map; - using mv_type = Xpetra::MultiVector; - using matrix_type = Xpetra::Matrix; - using blocked_matrix_type = Xpetra::TpetraBlockedMatrix; - - //! @name Constructor/Destructor - //@{ - - //! Constructor - HierarchicalOperator(const RCP& op) : op_(op) { } - - HierarchicalOperator(const RCP& nearField, - const RCP& kernelApproximations, - const RCP& basisMatrix, - std::vector >& transferMatrices, - const Teuchos::RCP& params=Teuchos::null); - - //! Returns the Tpetra::Map object associated with the domain of this operator. - Teuchos::RCP getDomainMap() const { - return toXpetra(op_->getDomainMap()); - } - - //! Returns the Tpetra::Map object associated with the range of this operator. - Teuchos::RCP getRangeMap() const { - return toXpetra(op_->getRangeMap()); - } - - //! \brief Computes the operator-multivector application. - /*! Loosely, performs \f$Y = \alpha \cdot A^{\textrm{mode}} \cdot X + \beta \cdot Y\f$. However, the details of operation - vary according to the values of \c alpha and \c beta. Specifically - - if beta == 0, apply() must overwrite \c Y, so that any values in \c Y (including NaNs) are ignored. - - if alpha == 0, apply() may short-circuit the operator, so that any values in \c X (including NaNs) are ignored. - */ - void apply (const mv_type& X, mv_type& Y, - Teuchos::ETransp mode = Teuchos::NO_TRANS, - Scalar alpha = Teuchos::ScalarTraits::one(), - Scalar beta = Teuchos::ScalarTraits::zero()) const { - op_->apply(Xpetra::toTpetra(X), Xpetra::toTpetra(Y), mode, alpha, beta); - } - - //! Compute a residual R = B - (*this) * X - void residual(const mv_type & X, - const mv_type & B, - mv_type& R) const { - Tpetra::Details::residual(*op_, toTpetra(X), toTpetra(B), toTpetra(R)); - } - - RCP > restrict(const RCP& P) { - using TpCrs = TpetraCrsMatrix; - using CrsWrap = CrsMatrixWrap; - return Teuchos::rcp(new HierarchicalOperator(op_->restrict(Teuchos::rcp_dynamic_cast(Teuchos::rcp_dynamic_cast(P)->getCrsMatrix(), true)->getTpetra_CrsMatrixNonConst()))); - } - - RCP toMatrix() { - using TpCrs = TpetraCrsMatrix; - using CrsWrap = CrsMatrixWrap; - auto tpMat = Teuchos::rcp(new TpCrs(op_->toMatrix())); - return Teuchos::rcp(new CrsWrap(Teuchos::rcp_dynamic_cast >(tpMat))); - } - - double getCompression() { - return op_->getCompression(); - } - - RCP nearFieldMatrix() { - auto tpMat = Teuchos::rcp(new TpetraCrsMatrix(op_->nearFieldMatrix())); - return Teuchos::rcp(new CrsMatrixWrap(Teuchos::rcp_dynamic_cast >(tpMat))); - } - - //! Gets the operator out - RCP > getOperator() { return op_; } - - RCP > getOperatorConst() const { return op_; } - - void describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel) const { - describe(out, verbLevel, true); - } - - void describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel, const bool printHeader) const { - op_->describe(out, verbLevel, printHeader); - } - - bool hasFarField() const { - return op_->hasFarField(); - } - - bool denserThanDenseMatrix() const { - return op_->denserThanDenseMatrix(); - } - - private: - RCP op_; - }; - -} - -#endif // XPETRA_HIERARCHICALOPERATOR_DECL_HPP +template +class HierarchicalOperator : public TpetraOperator { + public: + using tHOp = Tpetra::HierarchicalOperator; + using map_type = Xpetra::Map; + using mv_type = Xpetra::MultiVector; + using matrix_type = Xpetra::Matrix; + using blocked_matrix_type = Xpetra::TpetraBlockedMatrix; + + //! @name Constructor/Destructor + //@{ + + //! Constructor + HierarchicalOperator(const RCP& op) + : op_(op) {} + + HierarchicalOperator(const RCP& nearField, + const RCP& kernelApproximations, + const RCP& basisMatrix, + std::vector >& transferMatrices, + const Teuchos::RCP& params = Teuchos::null); + + //! Returns the Tpetra::Map object associated with the domain of this operator. + Teuchos::RCP getDomainMap() const { + return toXpetra(op_->getDomainMap()); + } + + //! Returns the Tpetra::Map object associated with the range of this operator. + Teuchos::RCP getRangeMap() const { + return toXpetra(op_->getRangeMap()); + } + + //! \brief Computes the operator-multivector application. + /*! Loosely, performs \f$Y = \alpha \cdot A^{\textrm{mode}} \cdot X + \beta \cdot Y\f$. However, the details of operation + vary according to the values of \c alpha and \c beta. Specifically + - if beta == 0, apply() must overwrite \c Y, so that any values in \c Y (including NaNs) are ignored. + - if alpha == 0, apply() may short-circuit the operator, so that any values in \c X (including NaNs) are ignored. + */ + void apply(const mv_type& X, mv_type& Y, + Teuchos::ETransp mode = Teuchos::NO_TRANS, + Scalar alpha = Teuchos::ScalarTraits::one(), + Scalar beta = Teuchos::ScalarTraits::zero()) const { + op_->apply(Xpetra::toTpetra(X), Xpetra::toTpetra(Y), mode, alpha, beta); + } + + //! Compute a residual R = B - (*this) * X + void residual(const mv_type& X, + const mv_type& B, + mv_type& R) const { + Tpetra::Details::residual(*op_, toTpetra(X), toTpetra(B), toTpetra(R)); + } + + RCP > restrict(const RCP& P) { + using TpCrs = TpetraCrsMatrix; + using CrsWrap = CrsMatrixWrap; + return Teuchos::rcp(new HierarchicalOperator(op_->restrict(Teuchos::rcp_dynamic_cast(Teuchos::rcp_dynamic_cast(P)->getCrsMatrix(), true)->getTpetra_CrsMatrixNonConst()))); + } + + RCP toMatrix() { + using TpCrs = TpetraCrsMatrix; + using CrsWrap = CrsMatrixWrap; + auto tpMat = Teuchos::rcp(new TpCrs(op_->toMatrix())); + return Teuchos::rcp(new CrsWrap(Teuchos::rcp_dynamic_cast >(tpMat))); + } + + double getCompression() { + return op_->getCompression(); + } + + RCP nearFieldMatrix() { + auto tpMat = Teuchos::rcp(new TpetraCrsMatrix(op_->nearFieldMatrix())); + return Teuchos::rcp(new CrsMatrixWrap(Teuchos::rcp_dynamic_cast >(tpMat))); + } + + //! Gets the operator out + RCP > getOperator() { return op_; } + + RCP > getOperatorConst() const { return op_; } + + void describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel) const { + describe(out, verbLevel, true); + } + + void describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel, const bool printHeader) const { + op_->describe(out, verbLevel, printHeader); + } + + bool hasFarField() const { + return op_->hasFarField(); + } + + bool denserThanDenseMatrix() const { + return op_->denserThanDenseMatrix(); + } + + private: + RCP op_; +}; + +} // namespace Xpetra + +#endif // XPETRA_HIERARCHICALOPERATOR_DECL_HPP diff --git a/packages/muelu/research/caglusa/Xpetra_HierarchicalOperator_def.hpp b/packages/muelu/research/caglusa/Xpetra_HierarchicalOperator_def.hpp index ef53bf383c6b..f59c48e3738a 100644 --- a/packages/muelu/research/caglusa/Xpetra_HierarchicalOperator_def.hpp +++ b/packages/muelu/research/caglusa/Xpetra_HierarchicalOperator_def.hpp @@ -3,35 +3,35 @@ namespace Xpetra { - template - HierarchicalOperator:: - HierarchicalOperator(const Teuchos::RCP& nearField, - const Teuchos::RCP& kernelApproximations, - const Teuchos::RCP& basisMatrix, - std::vector >& transferMatrices, - const Teuchos::RCP& params) { - using Teuchos::RCP; - using Teuchos::rcp; - using TpCrs = TpetraCrsMatrix; - using TpGOVec = TpetraMultiVector; - using CrsWrap = CrsMatrixWrap; +template +HierarchicalOperator:: + HierarchicalOperator(const Teuchos::RCP& nearField, + const Teuchos::RCP& kernelApproximations, + const Teuchos::RCP& basisMatrix, + std::vector >& transferMatrices, + const Teuchos::RCP& params) { + using Teuchos::RCP; + using Teuchos::rcp; + using TpCrs = TpetraCrsMatrix; + using TpGOVec = TpetraMultiVector; + using CrsWrap = CrsMatrixWrap; - std::vector > tTransferMatrices; - for (size_t i = 0; igetTpetra_BlockedMatrix(); - tTransferMatrices.push_back(transferT); - } - - op_ = rcp(new tHOp(Teuchos::rcp_dynamic_cast(Teuchos::rcp_dynamic_cast(nearField)->getCrsMatrix(), true)->getTpetra_CrsMatrixNonConst(), - kernelApproximations->getTpetra_BlockedMatrix(), - Teuchos::rcp_dynamic_cast(Teuchos::rcp_dynamic_cast(basisMatrix)->getCrsMatrix(), true)->getTpetra_CrsMatrixNonConst(), - tTransferMatrices, - params)); + std::vector > tTransferMatrices; + for (size_t i = 0; i < transferMatrices.size(); i++) { + auto transferT = transferMatrices[i]->getTpetra_BlockedMatrix(); + tTransferMatrices.push_back(transferT); } + op_ = rcp(new tHOp(Teuchos::rcp_dynamic_cast(Teuchos::rcp_dynamic_cast(nearField)->getCrsMatrix(), true)->getTpetra_CrsMatrixNonConst(), + kernelApproximations->getTpetra_BlockedMatrix(), + Teuchos::rcp_dynamic_cast(Teuchos::rcp_dynamic_cast(basisMatrix)->getCrsMatrix(), true)->getTpetra_CrsMatrixNonConst(), + tTransferMatrices, + params)); } -#endif // XPETRA_HIERARCHICALOPERATOR_DEF_HPP +} // namespace Xpetra + +#endif // XPETRA_HIERARCHICALOPERATOR_DEF_HPP diff --git a/packages/muelu/research/caglusa/Xpetra_TpetraBlockedMap.hpp b/packages/muelu/research/caglusa/Xpetra_TpetraBlockedMap.hpp index c0259d922220..fca269d36458 100644 --- a/packages/muelu/research/caglusa/Xpetra_TpetraBlockedMap.hpp +++ b/packages/muelu/research/caglusa/Xpetra_TpetraBlockedMap.hpp @@ -8,36 +8,32 @@ #include #include - namespace Xpetra { - template - class TpetraBlockedMap { - - public: - using map_type = Xpetra::Map; - using lo_vec_type = Xpetra::Vector; - using tpetra_blocked_map_type = Tpetra::BlockedMap; - - TpetraBlockedMap(const Teuchos::RCP& pointMap, - const Teuchos::RCP& blockSizes) - { - using TpLOVec = TpetraVector; - tpBlockedMap_ = Teuchos::rcp(new tpetra_blocked_map_type(toTpetra(pointMap), - Teuchos::rcp_dynamic_cast(blockSizes)->getTpetra_Vector())); - } - - RCP getTpetra_BlockedMap() const { - return tpBlockedMap_; - } - - private: - RCP tpBlockedMap_; - - }; - -} - -#endif // XPETRA_TPETRABLOCKEDMAP_HPP +template +class TpetraBlockedMap { + public: + using map_type = Xpetra::Map; + using lo_vec_type = Xpetra::Vector; + using tpetra_blocked_map_type = Tpetra::BlockedMap; + + TpetraBlockedMap(const Teuchos::RCP& pointMap, + const Teuchos::RCP& blockSizes) { + using TpLOVec = TpetraVector; + tpBlockedMap_ = Teuchos::rcp(new tpetra_blocked_map_type(toTpetra(pointMap), + Teuchos::rcp_dynamic_cast(blockSizes)->getTpetra_Vector())); + } + + RCP getTpetra_BlockedMap() const { + return tpBlockedMap_; + } + + private: + RCP tpBlockedMap_; +}; + +} // namespace Xpetra + +#endif // XPETRA_TPETRABLOCKEDMAP_HPP diff --git a/packages/muelu/research/caglusa/Xpetra_TpetraBlockedMatrix.hpp b/packages/muelu/research/caglusa/Xpetra_TpetraBlockedMatrix.hpp index 85d18c207e0d..0145219ee0f3 100644 --- a/packages/muelu/research/caglusa/Xpetra_TpetraBlockedMatrix.hpp +++ b/packages/muelu/research/caglusa/Xpetra_TpetraBlockedMatrix.hpp @@ -9,61 +9,57 @@ #include #include - namespace Xpetra { - template ::scalar_type, - class LocalOrdinal = typename Tpetra::Operator::local_ordinal_type, - class GlobalOrdinal = typename Tpetra::Operator::global_ordinal_type, - class Node = typename Tpetra::Operator::node_type> - class TpetraBlockedMatrix { - - public: - using matrix_type = Xpetra::Matrix; - using blocked_map_type = TpetraBlockedMap; - using tpetra_blocked_matrix_type = Tpetra::BlockedMatrix; - - TpetraBlockedMatrix(const Teuchos::RCP& pointA, - const Teuchos::RCP& blockA, - const Teuchos::RCP& blockMap, - const Teuchos::RCP& ghosted_blockMap=Teuchos::null) - { - using TpCrs = TpetraCrsMatrix; - using CrsWrap = CrsMatrixWrap; - RCP tp_ghosted_blockMap; - if (!ghosted_blockMap.is_null()) - tp_ghosted_blockMap = ghosted_blockMap->getTpetra_BlockedMap(); - blockMatrix_ = Teuchos::rcp(new tpetra_blocked_matrix_type(Teuchos::rcp_dynamic_cast(Teuchos::rcp_dynamic_cast(pointA)->getCrsMatrix(), true)->getTpetra_CrsMatrixNonConst(), - Teuchos::rcp_dynamic_cast(Teuchos::rcp_dynamic_cast(blockA)->getCrsMatrix(), true)->getTpetra_CrsMatrixNonConst(), - blockMap->getTpetra_BlockedMap(), - tp_ghosted_blockMap)); - } +template ::scalar_type, + class LocalOrdinal = typename Tpetra::Operator::local_ordinal_type, + class GlobalOrdinal = typename Tpetra::Operator::global_ordinal_type, + class Node = typename Tpetra::Operator::node_type> +class TpetraBlockedMatrix { + public: + using matrix_type = Xpetra::Matrix; + using blocked_map_type = TpetraBlockedMap; + using tpetra_blocked_matrix_type = Tpetra::BlockedMatrix; - void apply(const Xpetra::MultiVector& X, - Xpetra::MultiVector& Y, - Teuchos::ETransp mode = Teuchos::NO_TRANS, - Scalar alpha = Teuchos::ScalarTraits::one(), - Scalar beta = Teuchos::ScalarTraits::zero()) const { - blockMatrix_->apply(Xpetra::toTpetra(X), Xpetra::toTpetra(Y), mode, alpha, beta); - } + TpetraBlockedMatrix(const Teuchos::RCP& pointA, + const Teuchos::RCP& blockA, + const Teuchos::RCP& blockMap, + const Teuchos::RCP& ghosted_blockMap = Teuchos::null) { + using TpCrs = TpetraCrsMatrix; + using CrsWrap = CrsMatrixWrap; + RCP tp_ghosted_blockMap; + if (!ghosted_blockMap.is_null()) + tp_ghosted_blockMap = ghosted_blockMap->getTpetra_BlockedMap(); + blockMatrix_ = Teuchos::rcp(new tpetra_blocked_matrix_type(Teuchos::rcp_dynamic_cast(Teuchos::rcp_dynamic_cast(pointA)->getCrsMatrix(), true)->getTpetra_CrsMatrixNonConst(), + Teuchos::rcp_dynamic_cast(Teuchos::rcp_dynamic_cast(blockA)->getCrsMatrix(), true)->getTpetra_CrsMatrixNonConst(), + blockMap->getTpetra_BlockedMap(), + tp_ghosted_blockMap)); + } - void localApply(const Xpetra::MultiVector& X, - Xpetra::MultiVector& Y, - Teuchos::ETransp mode = Teuchos::NO_TRANS, - Scalar alpha = Teuchos::ScalarTraits::one(), - Scalar beta = Teuchos::ScalarTraits::zero()) const { - blockMatrix_->localApply(Xpetra::toTpetra(X), Xpetra::toTpetra(Y), mode, alpha, beta); - } + void apply(const Xpetra::MultiVector& X, + Xpetra::MultiVector& Y, + Teuchos::ETransp mode = Teuchos::NO_TRANS, + Scalar alpha = Teuchos::ScalarTraits::one(), + Scalar beta = Teuchos::ScalarTraits::zero()) const { + blockMatrix_->apply(Xpetra::toTpetra(X), Xpetra::toTpetra(Y), mode, alpha, beta); + } - Teuchos::RCP getTpetra_BlockedMatrix() const { - return blockMatrix_; - } + void localApply(const Xpetra::MultiVector& X, + Xpetra::MultiVector& Y, + Teuchos::ETransp mode = Teuchos::NO_TRANS, + Scalar alpha = Teuchos::ScalarTraits::one(), + Scalar beta = Teuchos::ScalarTraits::zero()) const { + blockMatrix_->localApply(Xpetra::toTpetra(X), Xpetra::toTpetra(Y), mode, alpha, beta); + } - private: - Teuchos::RCP blockMatrix_; + Teuchos::RCP getTpetra_BlockedMatrix() const { + return blockMatrix_; + } - }; + private: + Teuchos::RCP blockMatrix_; +}; -} +} // namespace Xpetra -#endif // XPETRA_TPETRABLOCKEDMATRIX_HPP +#endif // XPETRA_TPETRABLOCKEDMATRIX_HPP diff --git a/packages/muelu/research/caglusa/auxiliaryOperators.hpp b/packages/muelu/research/caglusa/auxiliaryOperators.hpp index faa61c24eab5..a8b274e8b444 100644 --- a/packages/muelu/research/caglusa/auxiliaryOperators.hpp +++ b/packages/muelu/research/caglusa/auxiliaryOperators.hpp @@ -19,287 +19,282 @@ #include #include - namespace MueLu { - template - RCP > - buildDistanceLaplacian(RCP >& graph, - RCP::coordinateType,LocalOrdinal,GlobalOrdinal,Node> >& coords) - { - - const Scalar ONE = Teuchos::ScalarTraits::one(); - const Scalar ZERO = Teuchos::ScalarTraits::zero(); - const LocalOrdinal INV = Teuchos::OrdinalTraits::invalid(); - auto distLapl = Xpetra::MatrixFactory::Build(graph); +template +RCP > +buildDistanceLaplacian(RCP >& graph, + RCP::coordinateType, LocalOrdinal, GlobalOrdinal, Node> >& coords) { + const Scalar ONE = Teuchos::ScalarTraits::one(); + const Scalar ZERO = Teuchos::ScalarTraits::zero(); + const LocalOrdinal INV = Teuchos::OrdinalTraits::invalid(); + auto distLapl = Xpetra::MatrixFactory::Build(graph); - auto rowMap = graph->getRowMap(); - auto colMap = graph->getColMap(); - auto ghosted_coords = Xpetra::MultiVectorFactory::Build(colMap, coords->getNumVectors()); - ghosted_coords->doImport(*coords, *graph->getImporter(), Xpetra::INSERT); + auto rowMap = graph->getRowMap(); + auto colMap = graph->getColMap(); + auto ghosted_coords = Xpetra::MultiVectorFactory::Build(colMap, coords->getNumVectors()); + ghosted_coords->doImport(*coords, *graph->getImporter(), Xpetra::INSERT); - { - auto lcl_coords = coords->getHostLocalView(Xpetra::Access::ReadOnly); - auto lcl_ghosted_coords = ghosted_coords->getHostLocalView(Xpetra::Access::ReadOnly); - auto lcl_distLapl = distLapl->getLocalMatrixHost(); - - // TODO: parallel_for - for (LocalOrdinal rlid = 0; rlid < lcl_distLapl.numRows(); ++rlid) { - auto row = lcl_distLapl.row(rlid); - Scalar diag = ZERO; - LocalOrdinal diagIndex = INV; - for (LocalOrdinal k = 0; k < row.length; ++k) { - LocalOrdinal clid = row.colidx(k); - if (rowMap->getGlobalElement(rlid) == colMap->getGlobalElement(clid)) { - diagIndex = k; - } else { - Scalar dist = ZERO; - for (size_t j = 0; j < lcl_coords.extent(1); j++) { - auto s = lcl_coords(rlid,j) - lcl_ghosted_coords(clid,j); - dist += s*s; - } - row.value(k) = ONE/std::sqrt(dist); - diag -= row.value(k); + { + auto lcl_coords = coords->getHostLocalView(Xpetra::Access::ReadOnly); + auto lcl_ghosted_coords = ghosted_coords->getHostLocalView(Xpetra::Access::ReadOnly); + auto lcl_distLapl = distLapl->getLocalMatrixHost(); + + // TODO: parallel_for + for (LocalOrdinal rlid = 0; rlid < lcl_distLapl.numRows(); ++rlid) { + auto row = lcl_distLapl.row(rlid); + Scalar diag = ZERO; + LocalOrdinal diagIndex = INV; + for (LocalOrdinal k = 0; k < row.length; ++k) { + LocalOrdinal clid = row.colidx(k); + if (rowMap->getGlobalElement(rlid) == colMap->getGlobalElement(clid)) { + diagIndex = k; + } else { + Scalar dist = ZERO; + for (size_t j = 0; j < lcl_coords.extent(1); j++) { + auto s = lcl_coords(rlid, j) - lcl_ghosted_coords(clid, j); + dist += s * s; } + row.value(k) = ONE / std::sqrt(dist); + diag -= row.value(k); } - TEUCHOS_ASSERT(diagIndex != INV); - row.value(diagIndex) = diag; } + TEUCHOS_ASSERT(diagIndex != INV); + row.value(diagIndex) = diag; } - distLapl->fillComplete(); - return distLapl; } + distLapl->fillComplete(); + return distLapl; +} - template - RCP > - constructAuxiliaryOperator(RCP > op, - Teuchos::ParameterList& problemParams) { - - using IO = Xpetra::IO; - using IOhelpers = MueLu::IOhelpers; +template +RCP > +constructAuxiliaryOperator(RCP > op, + Teuchos::ParameterList& problemParams) { + using IO = Xpetra::IO; + using IOhelpers = MueLu::IOhelpers; - RCP > hop = rcp_dynamic_cast >(op); + RCP > hop = rcp_dynamic_cast >(op); - RCP > auxOp; + RCP > auxOp; - const std::string auxOpStr = problemParams.get("auxiliary operator"); + const std::string auxOpStr = problemParams.get("auxiliary operator"); - if ((auxOpStr == "near") || (auxOpStr == "distanceLaplacian")) { - if (hop.is_null()) - auxOp = rcp_dynamic_cast >(op, true); - else - auxOp = hop->nearFieldMatrix(); + if ((auxOpStr == "near") || (auxOpStr == "distanceLaplacian")) { + if (hop.is_null()) + auxOp = rcp_dynamic_cast >(op, true); + else + auxOp = hop->nearFieldMatrix(); #ifdef MUELU_HIERARCHICAL_DEBUG - // CoalesceDropFactory_kokkos assumes fitted row and column maps - Xpetra::MatrixUtils::checkLocalRowMapMatchesColMap(*auxOp); + // CoalesceDropFactory_kokkos assumes fitted row and column maps + Xpetra::MatrixUtils::checkLocalRowMapMatchesColMap(*auxOp); #endif - // coordinates - auto coords = Xpetra::IO::coordinateType,LocalOrdinal,GlobalOrdinal,Node>::ReadMultiVector(problemParams.get("coordinates"), op->getRangeMap()); + // coordinates + auto coords = Xpetra::IO::coordinateType, LocalOrdinal, GlobalOrdinal, Node>::ReadMultiVector(problemParams.get("coordinates"), op->getRangeMap()); - { - // apply dropping to auxOp - Level fineLevel; - fineLevel.SetFactoryManager(Teuchos::null); - fineLevel.SetLevelID(0); - fineLevel.Set("A",auxOp); - fineLevel.Set("Coordinates",coords); - fineLevel.Set("DofsPerNode",1); - fineLevel.setlib(auxOp->getDomainMap()->lib()); - auto amalgFact = rcp(new AmalgamationFactory()); - auto dropFact = rcp(new CoalesceDropFactory_kokkos()); - dropFact->SetFactory("UnAmalgamationInfo", amalgFact); - - double dropTol = problemParams.get("drop tolerance"); - // double dropTol = 0.1; // 1D - // double dropTol = 0.03; // 2D - std::string dropScheme = "classical"; - dropFact->SetParameter("aggregation: drop tol",Teuchos::ParameterEntry(dropTol)); - dropFact->SetParameter("aggregation: drop scheme",Teuchos::ParameterEntry(dropScheme)); - - fineLevel.Request("A",dropFact.get()); - fineLevel.Get("A", auxOp, dropFact.get()); - } + { + // apply dropping to auxOp + Level fineLevel; + fineLevel.SetFactoryManager(Teuchos::null); + fineLevel.SetLevelID(0); + fineLevel.Set("A", auxOp); + fineLevel.Set("Coordinates", coords); + fineLevel.Set("DofsPerNode", 1); + fineLevel.setlib(auxOp->getDomainMap()->lib()); + auto amalgFact = rcp(new AmalgamationFactory()); + auto dropFact = rcp(new CoalesceDropFactory_kokkos()); + dropFact->SetFactory("UnAmalgamationInfo", amalgFact); + + double dropTol = problemParams.get("drop tolerance"); + // double dropTol = 0.1; // 1D + // double dropTol = 0.03; // 2D + std::string dropScheme = "classical"; + dropFact->SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(dropTol)); + dropFact->SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(dropScheme)); + + fineLevel.Request("A", dropFact.get()); + fineLevel.Get("A", auxOp, dropFact.get()); + } - { - // filter out small entries in auxOp - Level fineLevel; - fineLevel.SetFactoryManager(Teuchos::null); - fineLevel.SetLevelID(0); - fineLevel.Set("A",auxOp); - auto filterFact = rcp(new ThresholdAFilterFactory("A", 1.0e-8, true, -1)); - fineLevel.Request("A",filterFact.get()); - filterFact->Build(fineLevel); - auxOp = fineLevel.Get< RCP > >("A",filterFact.get()); - } + { + // filter out small entries in auxOp + Level fineLevel; + fineLevel.SetFactoryManager(Teuchos::null); + fineLevel.SetLevelID(0); + fineLevel.Set("A", auxOp); + auto filterFact = rcp(new ThresholdAFilterFactory("A", 1.0e-8, true, -1)); + fineLevel.Request("A", filterFact.get()); + filterFact->Build(fineLevel); + auxOp = fineLevel.Get > >("A", filterFact.get()); + } - if (auxOpStr == "distanceLaplacian") { - // build distance Laplacian using graph of auxOp and coordinates - auto graph = auxOp->getCrsGraph(); - auxOp = buildDistanceLaplacian(graph, coords); - } + if (auxOpStr == "distanceLaplacian") { + // build distance Laplacian using graph of auxOp and coordinates + auto graph = auxOp->getCrsGraph(); + auxOp = buildDistanceLaplacian(graph, coords); + } - } else { - const bool readBinary = problemParams.get("read binary", false); - const bool readLocal = problemParams.get("read local", false); + } else { + const bool readBinary = problemParams.get("read binary", false); + const bool readLocal = problemParams.get("read local", false); + + // colmap of auxiliary operator + auto aux_colmap = IO::ReadMap(problemParams.get("aux colmap"), op->getRangeMap()->lib(), op->getRangeMap()->getComm(), readBinary); - // colmap of auxiliary operator - auto aux_colmap = IO::ReadMap(problemParams.get("aux colmap"), op->getRangeMap()->lib(), op->getRangeMap()->getComm(), readBinary); + auxOp = IOhelpers::Read(auxOpStr, op->getRangeMap(), aux_colmap, op->getRangeMap(), op->getRangeMap(), true, readBinary, readLocal); + } + + return auxOp; +} - auxOp = IOhelpers::Read(auxOpStr, op->getRangeMap(), aux_colmap, op->getRangeMap(), op->getRangeMap(), true, readBinary, readLocal); +template +RCP > +constructHierarchyFromAuxiliary(RCP > op, + RCP > auxH, + Teuchos::ParameterList& params, + Teuchos::FancyOStream& out) { + params.set("coarse: max size", 1); + params.set("max levels", auxH->GetNumLevels()); + + const bool implicitTranspose = params.get("transpose: use implicit", MueLu::MasterList::getDefault("transpose: use implicit")); + + auto hop = rcp_dynamic_cast >(op); + if (!hop.is_null()) + op->describe(out, Teuchos::VERB_EXTREME); + + RCP > H = rcp(new Hierarchy()); + RCP lvl = H->GetLevel(0); + lvl->Set("A", op); + // lvl->Set("Coordinates", coords); + for (int lvlNo = 1; lvlNo < auxH->GetNumLevels(); lvlNo++) { + RCP fineLvl = H->GetLevel(lvlNo - 1); + H->AddNewLevel(); + lvl = H->GetLevel(lvlNo); + RCP auxLvl = auxH->GetLevel(lvlNo); + // auto mgr = auxLvl->GetFactoryManager(); + // auxLvl->print(std::cout, MueLu::Debug); + + RCP > P = auxLvl->Get > >("P"); + RCP > fineAOp = fineLvl->Get > >("A"); + lvl->Set("P", P); + params.sublist("level " + std::to_string(lvlNo)).set("P", P); + + if (!implicitTranspose) { + TEUCHOS_ASSERT(auxLvl->IsAvailable("R")); + RCP > R = auxLvl->Get > >("R"); + lvl->Set("R", R); + params.sublist("level " + std::to_string(lvlNo)).set("R", R); } - return auxOp; - } + auto fineA = rcp_dynamic_cast >(fineAOp); + if (!fineA.is_null()) { + auto coarseA = fineA->restrict(P); - template - RCP > - constructHierarchyFromAuxiliary(RCP > op, - RCP > auxH, - Teuchos::ParameterList& params, - Teuchos::FancyOStream& out) { - - params.set("coarse: max size", 1); - params.set("max levels", auxH->GetNumLevels()); - - const bool implicitTranspose = params.get("transpose: use implicit", MueLu::MasterList::getDefault("transpose: use implicit")); - - auto hop = rcp_dynamic_cast >(op); - if (!hop.is_null()) - op->describe(out, Teuchos::VERB_EXTREME); - - RCP > H = rcp(new Hierarchy()); - RCP lvl = H->GetLevel(0); - lvl->Set("A", op); - // lvl->Set("Coordinates", coords); - for(int lvlNo = 1; lvlNo < auxH->GetNumLevels(); lvlNo++) { - RCP fineLvl = H->GetLevel(lvlNo-1); - H->AddNewLevel(); - lvl = H->GetLevel(lvlNo); - RCP auxLvl = auxH->GetLevel(lvlNo); - // auto mgr = auxLvl->GetFactoryManager(); - // auxLvl->print(std::cout, MueLu::Debug); - - RCP > P = auxLvl->Get > >("P"); - RCP > fineAOp = fineLvl->Get > >("A"); - lvl->Set("P", P); - params.sublist("level "+std::to_string(lvlNo)).set("P", P); - - if (!implicitTranspose) { - TEUCHOS_ASSERT(auxLvl->IsAvailable("R")); - RCP > R = auxLvl->Get > >("R"); - lvl->Set("R", R); - params.sublist("level "+std::to_string(lvlNo)).set("R", R); +#ifdef MUELU_HIERARCHICAL_DEBUG + { + // Test that the Galerkin product worked + using MagnitudeType = typename Teuchos::ScalarTraits::magnitudeType; + const Scalar one = Teuchos::ScalarTraits::one(); + const MagnitudeType tol = 10000 * Teuchos::ScalarTraits::eps(); + auto testLHS = MultiVectorFactory::Build(coarseA->getDomainMap(), 1); + auto testRHS_HOp_coarse = MultiVectorFactory::Build(coarseA->getRangeMap(), 1); + auto testRHS_HOp_fine = MultiVectorFactory::Build(coarseA->getRangeMap(), 1); + auto temp1 = MultiVectorFactory::Build(fineA->getDomainMap(), 1); + auto temp2 = MultiVectorFactory::Build(fineA->getRangeMap(), 1); + testLHS->putScalar(one); + coarseA->apply(*testLHS, *testRHS_HOp_coarse); + P->apply(*testLHS, *temp1); + fineA->apply(*temp1, *temp2); + P->apply(*temp2, *testRHS_HOp_fine, Teuchos::TRANS); + testRHS_HOp_fine->update(one, *testRHS_HOp_coarse, -one); + auto norm = testRHS_HOp_fine->getVector(0)->norm2(); + out << "|P^T*op_fine*P*1 - op_H_coarse*1| = " << norm << std::endl; + TEUCHOS_ASSERT(norm < tol); } +#endif + + if ((lvlNo + 1 == auxH->GetNumLevels()) || !coarseA->hasFarField() || coarseA->denserThanDenseMatrix()) { + // coarseA->describe(out, Teuchos::VERB_EXTREME); - auto fineA = rcp_dynamic_cast >(fineAOp); - if (!fineA.is_null()) { - auto coarseA = fineA->restrict(P); + auto matA = coarseA->toMatrix(); #ifdef MUELU_HIERARCHICAL_DEBUG { - // Test that the Galerkin product worked - using MagnitudeType = typename Teuchos::ScalarTraits::magnitudeType; - const Scalar one = Teuchos::ScalarTraits::one(); - const MagnitudeType tol = 10000*Teuchos::ScalarTraits::eps(); + // test that the conversion to Crs format worked + using MagnitudeType = typename Teuchos::ScalarTraits::magnitudeType; + const Scalar one = Teuchos::ScalarTraits::one(); + const MagnitudeType tol = 10000 * Teuchos::ScalarTraits::eps(); auto testLHS = MultiVectorFactory::Build(coarseA->getDomainMap(), 1); - auto testRHS_HOp_coarse = MultiVectorFactory::Build(coarseA->getRangeMap(), 1); - auto testRHS_HOp_fine = MultiVectorFactory::Build(coarseA->getRangeMap(), 1); - auto temp1 = MultiVectorFactory::Build(fineA->getDomainMap(), 1); - auto temp2 = MultiVectorFactory::Build(fineA->getRangeMap(), 1); + auto testRHS_HOp = MultiVectorFactory::Build(coarseA->getRangeMap(), 1); + auto testRHS_dense = MultiVectorFactory::Build(coarseA->getRangeMap(), 1); testLHS->putScalar(one); - coarseA->apply(*testLHS, *testRHS_HOp_coarse); - P->apply(*testLHS, *temp1); - fineA->apply(*temp1, *temp2); - P->apply(*temp2, *testRHS_HOp_fine, Teuchos::TRANS); - testRHS_HOp_fine->update(one, *testRHS_HOp_coarse, -one); - auto norm = testRHS_HOp_fine->getVector(0)->norm2(); - out << "|P^T*op_fine*P*1 - op_H_coarse*1| = " << norm << std::endl; + coarseA->apply(*testLHS, *testRHS_HOp); + matA->apply(*testLHS, *testRHS_dense); + testRHS_dense->update(one, *testRHS_HOp, -one); + auto norm = testRHS_dense->getVector(0)->norm2(); + out << "|op_dense*1 - op_H*1| = " << norm << std::endl; TEUCHOS_ASSERT(norm < tol); } #endif - if ((lvlNo+1 == auxH->GetNumLevels()) || !coarseA->hasFarField() || coarseA->denserThanDenseMatrix()) { - // coarseA->describe(out, Teuchos::VERB_EXTREME); - - auto matA = coarseA->toMatrix(); - -#ifdef MUELU_HIERARCHICAL_DEBUG - { - // test that the conversion to Crs format worked - using MagnitudeType = typename Teuchos::ScalarTraits::magnitudeType; - const Scalar one = Teuchos::ScalarTraits::one(); - const MagnitudeType tol = 10000*Teuchos::ScalarTraits::eps(); - auto testLHS = MultiVectorFactory::Build(coarseA->getDomainMap(), 1); - auto testRHS_HOp = MultiVectorFactory::Build(coarseA->getRangeMap(), 1); - auto testRHS_dense = MultiVectorFactory::Build(coarseA->getRangeMap(), 1); - testLHS->putScalar(one); - coarseA->apply(*testLHS, *testRHS_HOp); - matA->apply(*testLHS, *testRHS_dense); - testRHS_dense->update(one, *testRHS_HOp, -one); - auto norm = testRHS_dense->getVector(0)->norm2(); - out << "|op_dense*1 - op_H*1| = " << norm << std::endl; - TEUCHOS_ASSERT(norm < tol); - } -#endif - - using std::setw; - using std::endl; - const size_t numRows = matA->getRowMap()->getGlobalNumElements(); - const size_t nnz = matA->getGlobalNumEntries(); - const double nnzPerRow = Teuchos::as(nnz)/numRows; - std::ostringstream oss; - oss << std::left; - // oss << setw(9) << "rows" << setw(12) << "nnz" << setw(14) << "nnz/row" << setw(12) << endl; - oss << setw(9) << numRows << setw(12) << nnz << setw(14) << nnzPerRow << endl; - out << oss.str(); - - lvl->Set("A", matA); - } else { - coarseA->describe(out, Teuchos::VERB_EXTREME, /*printHeader=*/false); - lvl->Set("A", rcp_dynamic_cast >(coarseA)); - } - } else { - // classical RAP - auto fineAmat = rcp_dynamic_cast >(fineAOp, true); - Level fineLevel, coarseLevel; - fineLevel.SetFactoryManager(Teuchos::null); - coarseLevel.SetFactoryManager(Teuchos::null); - coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel)); - fineLevel.SetLevelID(0); - coarseLevel.SetLevelID(1); - fineLevel.Set("A", fineAmat); - coarseLevel.Set("P", P); - RCP > rapFact = rcp(new RAPFactory()); - Teuchos::ParameterList rapList = *(rapFact->GetValidParameterList()); - rapList.set("transpose: use implicit", true); - rapFact->SetParameterList(rapList); - coarseLevel.Request("A", rapFact.get()); - RCP > matA = coarseLevel.Get > >("A", rapFact.get()); - - using std::setw; using std::endl; - const size_t numRows = matA->getRowMap()->getGlobalNumElements(); - const size_t nnz = matA->getGlobalNumEntries(); - const double nnzPerRow = Teuchos::as(nnz)/numRows; + using std::setw; + const size_t numRows = matA->getRowMap()->getGlobalNumElements(); + const size_t nnz = matA->getGlobalNumEntries(); + const double nnzPerRow = Teuchos::as(nnz) / numRows; std::ostringstream oss; oss << std::left; - oss << setw(9) << "rows" << setw(12) << "nnz" << setw(14) << "nnz/row" << setw(12) << endl; + // oss << setw(9) << "rows" << setw(12) << "nnz" << setw(14) << "nnz/row" << setw(12) << endl; oss << setw(9) << numRows << setw(12) << nnz << setw(14) << nnzPerRow << endl; out << oss.str(); lvl->Set("A", matA); + } else { + coarseA->describe(out, Teuchos::VERB_EXTREME, /*printHeader=*/false); + lvl->Set("A", rcp_dynamic_cast >(coarseA)); } + } else { + // classical RAP + auto fineAmat = rcp_dynamic_cast >(fineAOp, true); + Level fineLevel, coarseLevel; + fineLevel.SetFactoryManager(Teuchos::null); + coarseLevel.SetFactoryManager(Teuchos::null); + coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel)); + fineLevel.SetLevelID(0); + coarseLevel.SetLevelID(1); + fineLevel.Set("A", fineAmat); + coarseLevel.Set("P", P); + RCP > rapFact = rcp(new RAPFactory()); + Teuchos::ParameterList rapList = *(rapFact->GetValidParameterList()); + rapList.set("transpose: use implicit", true); + rapFact->SetParameterList(rapList); + coarseLevel.Request("A", rapFact.get()); + RCP > matA = coarseLevel.Get > >("A", rapFact.get()); + + using std::endl; + using std::setw; + const size_t numRows = matA->getRowMap()->getGlobalNumElements(); + const size_t nnz = matA->getGlobalNumEntries(); + const double nnzPerRow = Teuchos::as(nnz) / numRows; + std::ostringstream oss; + oss << std::left; + oss << setw(9) << "rows" << setw(12) << "nnz" << setw(14) << "nnz/row" << setw(12) << endl; + oss << setw(9) << numRows << setw(12) << nnz << setw(14) << nnzPerRow << endl; + out << oss.str(); + + lvl->Set("A", matA); } - - RCP > mueLuFactory = rcp(new ParameterListInterpreter(params,op->getDomainMap()->getComm())); - H->setlib(op->getDomainMap()->lib()); - H->SetProcRankVerbose(op->getDomainMap()->getComm()->getRank()); - mueLuFactory->SetupHierarchy(*H); - H->IsPreconditioner(true); - - return H; } + RCP > mueLuFactory = rcp(new ParameterListInterpreter(params, op->getDomainMap()->getComm())); + H->setlib(op->getDomainMap()->lib()); + H->SetProcRankVerbose(op->getDomainMap()->getComm()->getRank()); + mueLuFactory->SetupHierarchy(*H); + H->IsPreconditioner(true); + + return H; } +} // namespace MueLu + #endif diff --git a/packages/muelu/research/caglusa/main.cpp b/packages/muelu/research/caglusa/main.cpp index e0e2e607b551..0de4ac0a8ef6 100644 --- a/packages/muelu/research/caglusa/main.cpp +++ b/packages/muelu/research/caglusa/main.cpp @@ -68,81 +68,89 @@ using Teuchos::RCP; using Teuchos::rcp; - -template +template int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int argc, char *argv[]) { - #include "MueLu_UseShortNames.hpp" - - std::string xmlHierarchical = "1d-binary/hierarchical.xml"; clp.setOption("xmlHierarchical", &xmlHierarchical, "XML describing the hierarchical operator"); - std::string xmlProblem = "1d-binary/problem.xml"; clp.setOption("xmlProblem", &xmlProblem, "XML describing the problem"); - std::string xmlBelos = "belos.xml"; clp.setOption("xmlBelos", &xmlBelos, "XML with Belos parameters"); - std::string xmlMueLu = "muelu.xml"; clp.setOption("xmlMueLu", &xmlMueLu, "XML with MueLu parameters"); - std::string xmlAuxHierarchy = "auxiliary.xml"; clp.setOption("xmlAux", &xmlAuxHierarchy, "XML with MueLu parameters for the auxiliary hierarchy"); - bool printTimings = true; clp.setOption("timings", "notimings", &printTimings, "print timings to screen"); - bool doTests = true; clp.setOption("tests", "notests", &doTests, "Test operator using known LHS & RHS."); - bool doUnPrecSolve = true; clp.setOption("unPrec", "noUnPrec", &doUnPrecSolve, "Solve unpreconditioned"); - bool doPrecSolve = true; clp.setOption("prec", "noPrec", &doPrecSolve, "Solve preconditioned with AMG"); +#include "MueLu_UseShortNames.hpp" + + std::string xmlHierarchical = "1d-binary/hierarchical.xml"; + clp.setOption("xmlHierarchical", &xmlHierarchical, "XML describing the hierarchical operator"); + std::string xmlProblem = "1d-binary/problem.xml"; + clp.setOption("xmlProblem", &xmlProblem, "XML describing the problem"); + std::string xmlBelos = "belos.xml"; + clp.setOption("xmlBelos", &xmlBelos, "XML with Belos parameters"); + std::string xmlMueLu = "muelu.xml"; + clp.setOption("xmlMueLu", &xmlMueLu, "XML with MueLu parameters"); + std::string xmlAuxHierarchy = "auxiliary.xml"; + clp.setOption("xmlAux", &xmlAuxHierarchy, "XML with MueLu parameters for the auxiliary hierarchy"); + bool printTimings = true; + clp.setOption("timings", "notimings", &printTimings, "print timings to screen"); + bool doTests = true; + clp.setOption("tests", "notests", &doTests, "Test operator using known LHS & RHS."); + bool doUnPrecSolve = true; + clp.setOption("unPrec", "noUnPrec", &doUnPrecSolve, "Solve unpreconditioned"); + bool doPrecSolve = true; + clp.setOption("prec", "noPrec", &doPrecSolve, "Solve preconditioned with AMG"); switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); + RCP > comm = Teuchos::DefaultComm::getComm(); Teuchos::RCP stacked_timer = rcp(new Teuchos::StackedTimer("Hierarchical Driver")); - Teuchos::RCP verbose_out = Teuchos::rcp(new Teuchos::FancyOStream(Teuchos::rcpFromRef(std::cout))); + Teuchos::RCP verbose_out = Teuchos::rcp(new Teuchos::FancyOStream(Teuchos::rcpFromRef(std::cout))); verbose_out->setShowProcRank(true); stacked_timer->setVerboseOstream(verbose_out); Teuchos::TimeMonitor::setStackedTimer(stacked_timer); - using HOp = Xpetra::HierarchicalOperator; - using op_type = Xpetra::Operator; + using HOp = Xpetra::HierarchicalOperator; + using op_type = Xpetra::Operator; using blocked_matrix_type = typename HOp::blocked_matrix_type; - using blocked_map_type = typename blocked_matrix_type::blocked_map_type; - using matrix_type = typename HOp::matrix_type; - using map_type = typename HOp::map_type; - using mv_type = typename HOp::mv_type; - using lo_vec_type = typename blocked_map_type::lo_vec_type; - using coord_mv = Xpetra::MultiVector::coordinateType,LocalOrdinal,GlobalOrdinal,Node>; - using MagnitudeType = typename Teuchos::ScalarTraits::magnitudeType; - using IO = Xpetra::IO; - using IOhelpers = MueLu::IOhelpers; + using blocked_map_type = typename blocked_matrix_type::blocked_map_type; + using matrix_type = typename HOp::matrix_type; + using map_type = typename HOp::map_type; + using mv_type = typename HOp::mv_type; + using lo_vec_type = typename blocked_map_type::lo_vec_type; + using coord_mv = Xpetra::MultiVector::coordinateType, LocalOrdinal, GlobalOrdinal, Node>; + using MagnitudeType = typename Teuchos::ScalarTraits::magnitudeType; + using IO = Xpetra::IO; + using IOhelpers = MueLu::IOhelpers; RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& out = *fancy; + Teuchos::FancyOStream &out = *fancy; out.setOutputToRootOnly(0); - bool success = true; - const Scalar one = Teuchos::ScalarTraits::one(); - const Scalar zero = Teuchos::ScalarTraits::zero(); - const MagnitudeType tol = 100000*Teuchos::ScalarTraits::eps(); + bool success = true; + const Scalar one = Teuchos::ScalarTraits::one(); + const Scalar zero = Teuchos::ScalarTraits::zero(); + const MagnitudeType tol = 100000 * Teuchos::ScalarTraits::eps(); RCP op; RCP hop; { Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("Read hierarchical matrix"))); - op = IOhelpers::Read(xmlHierarchical, comm); + op = IOhelpers::Read(xmlHierarchical, comm); hop = Teuchos::rcp_dynamic_cast(op); } if (!hop.is_null()) - out << "Compression: " << hop->getCompression() << " of dense matrix."<< std::endl; + out << "Compression: " << hop->getCompression() << " of dense matrix." << std::endl; Teuchos::ParameterList problemParams; Teuchos::updateParametersFromXmlFileAndBroadcast(xmlProblem, Teuchos::Ptr(&problemParams), *comm); RCP map = op->getDomainMap(); - RCP auxOp, mass; - RCP X_ex, RHS, X; - RCP coords; + RCP auxOp, mass; + RCP X_ex, RHS, X; + RCP coords; { // Read in auxiliary stuff // coordinates - coords = Xpetra::IO::coordinateType,LocalOrdinal,GlobalOrdinal,Node>::ReadMultiVector(problemParams.get("coordinates"), map); + coords = Xpetra::IO::coordinateType, LocalOrdinal, GlobalOrdinal, Node>::ReadMultiVector(problemParams.get("coordinates"), map); // Auxiliary matrix used for multigrid construction { @@ -154,7 +162,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Mass matrix for L2 error computation { const bool readBinary = problemParams.get("read binary", false); - const bool readLocal = problemParams.get("read local", false); + const bool readLocal = problemParams.get("read local", false); // colmap of auxiliary operator RCP aux_colmap = IO::ReadMap(problemParams.get("aux colmap"), lib, comm, readBinary); @@ -165,8 +173,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg X_ex = IO::ReadMultiVector(problemParams.get("exact solution"), map); RHS = IO::ReadMultiVector(problemParams.get("right-hand side"), map); // solution vector - X = MultiVectorFactory::Build(map, 1); - + X = MultiVectorFactory::Build(map, 1); } if (doTests) { @@ -222,11 +229,11 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg using OP = Belos::OperatorT; X->putScalar(zero); - RCP belosOp = rcp(new Belos::XpetraOp(op)); + RCP belosOp = rcp(new Belos::XpetraOp(op)); RCP > belosProblem = rcp(new Belos::LinearProblem(belosOp, X, RHS)); std::string belosType = "Pseudoblock CG"; - auto belosSolverList = rcpFromRef(belosParams.sublist(belosType)); + auto belosSolverList = rcpFromRef(belosParams.sublist(belosType)); bool set = belosProblem->setProblem(); if (set == false) { @@ -235,24 +242,24 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Create an iterative solver manager Belos::SolverFactory solverFactory; - RCP< Belos::SolverManager > solver = solverFactory.create(belosType, belosSolverList); + RCP > solver = solverFactory.create(belosType, belosSolverList); solver->setProblem(belosProblem); // Perform solve Belos::ReturnType ret = solver->solve(); - int numIts = solver->getNumIters(); + int numIts = solver->getNumIters(); // Get the number of iterations for this solve. out << "Number of iterations performed for this solve: " << numIts << std::endl; // Xpetra::IO::Write("X.mtx", *X); X->update(one, *X_ex, -one); - out << "|X-X_ex| = " << X->getVector(0)->norm2() << std::endl << std::endl; + out << "|X-X_ex| = " << X->getVector(0)->norm2() << std::endl + << std::endl; success &= (ret == Belos::Converged); - } -#endif // HAVE_MUELU_BELOS +#endif // HAVE_MUELU_BELOS if (doPrecSolve) { // Solve linear system using a AMG preconditioned Krylov method @@ -273,8 +280,8 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg auxParams.set("hierarchy label", "Auxiliary"); auxParams.sublist("user data").set("Coordinates", coords); // No rebalancing yet. - auxParams.set("coarse: max size", std::max(auxParams.get("coarse: max size", 2*comm->getSize()), - 2*comm->getSize())); + auxParams.set("coarse: max size", std::max(auxParams.get("coarse: max size", 2 * comm->getSize()), + 2 * comm->getSize())); auxH = MueLu::CreateXpetraPreconditioner(auxOp, auxParams); } @@ -295,7 +302,6 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg H = MueLu::constructHierarchyFromAuxiliary(Teuchos::rcp_dynamic_cast(op, true), auxH, params, out); } - #ifdef HAVE_MUELU_BELOS { //////////////////////////////////////////////////////////////// @@ -307,12 +313,12 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg using OP = Belos::OperatorT; X->putScalar(zero); - RCP belosOp = rcp(new Belos::XpetraOp(op)); - RCP belosPrec = rcp(new Belos::MueLuOp (H)); + RCP belosOp = rcp(new Belos::XpetraOp(op)); + RCP belosPrec = rcp(new Belos::MueLuOp(H)); RCP > belosProblem = rcp(new Belos::LinearProblem(belosOp, X, RHS)); std::string belosType = "Pseudoblock CG"; - auto belosSolverList = rcpFromRef(belosParams.sublist(belosType)); + auto belosSolverList = rcpFromRef(belosParams.sublist(belosType)); belosProblem->setRightPrec(belosPrec); @@ -323,12 +329,12 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Create an iterative solver manager Belos::SolverFactory solverFactory; - RCP< Belos::SolverManager > solver = solverFactory.create(belosType, belosSolverList); + RCP > solver = solverFactory.create(belosType, belosSolverList); solver->setProblem(belosProblem); // Perform solve Belos::ReturnType ret = solver->solve(); - int numIts = solver->getNumIters(); + int numIts = solver->getNumIters(); // Get the number of iterations for this solve. out << "Number of iterations performed for this solve: " << numIts << std::endl; @@ -343,8 +349,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg success &= (ret == Belos::Converged); } -#endif // HAVE_MUELU_BELOS - +#endif // HAVE_MUELU_BELOS } stacked_timer->stop("Hierarchical Driver"); @@ -353,13 +358,13 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg if (printTimings) stacked_timer->report(out, comm, options); - return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); -} //main + return (success ? EXIT_SUCCESS : EXIT_FAILURE); +} // main //- -- -------------------------------------------------------- #define MUELU_AUTOMATIC_TEST_ETI_NAME main_ #include "MueLu_Test_ETI.hpp" int main(int argc, char *argv[]) { - return Automatic_Test_ETI(argc,argv); + return Automatic_Test_ETI(argc, argv); } diff --git a/packages/muelu/research/graham/mf_example_01.cpp b/packages/muelu/research/graham/mf_example_01.cpp index a689dae838e1..d06286f63d92 100644 --- a/packages/muelu/research/graham/mf_example_01.cpp +++ b/packages/muelu/research/graham/mf_example_01.cpp @@ -44,7 +44,7 @@ // // @HEADER -// Matrix-free example 01: Xpetra operator that generates +// Matrix-free example 01: Xpetra operator that generates // a simple tridiagonal finite difference poisson matrix // // This solves the problem f''(x) = h^2*(-pi^2)*sin(pi*x) @@ -52,7 +52,6 @@ // This supplies n global DOFs, and breaks it into // contiguous segments depending on the number of MPI ranks. - // STL includes #include #include @@ -73,7 +72,7 @@ #include "Kokkos_Core.hpp" //#include "kokkosTools.hpp" -//Tpetra includes +// Tpetra includes #include "Tpetra_Map.hpp" #include "Tpetra_MultiVector.hpp" #include "Tpetra_CrsMatrix.hpp" @@ -81,7 +80,7 @@ #include "Tpetra_Export.hpp" #include "MatrixMarket_Tpetra.hpp" -//Xpetra includes +// Xpetra includes #include "Xpetra_TpetraMultiVector.hpp" #include "Xpetra_Map.hpp" #include "Xpetra_MultiVector.hpp" @@ -110,8 +109,8 @@ #include "BelosLinearProblem.hpp" #include "BelosBlockCGSolMgr.hpp" #include "BelosBlockGmresSolMgr.hpp" -#include "BelosXpetraAdapter.hpp" // this header defines Belos::XpetraOp() -#include "BelosMueLuAdapter.hpp" // this header defines Belos::MueLuOp() +#include "BelosXpetraAdapter.hpp" // this header defines Belos::XpetraOp() +#include "BelosMueLuAdapter.hpp" // this header defines Belos::MueLuOp() #include "BelosTpetraAdapter.hpp" #include "BelosTpetraOperator.hpp" #include "BelosBlockGmresSolMgr.hpp" @@ -128,67 +127,67 @@ #endif // some things like multivectors are "2D views" but only appear as 1D in practice, so we macro a print statement -#define PRINT_VIEW2_LINEAR(view) \ -std::cout << #view << " (" << view.extent(0) << "," << view.extent(1) << ") = [" << std::endl; \ -for(unsigned int i=0; i class TridiagonalOperator : public Xpetra::Operator { -public: + public: // Xpetra::Operator subclasses should always define typedefs according to Xpetra - typedef typename Xpetra::Operator::node_type node_type; - typedef typename Xpetra::MultiVector MV; - typedef typename Xpetra::Map map_type; - typedef typename Xpetra::Import import_type; - typedef typename Xpetra::Export export_type; -public: + typedef typename Xpetra::Operator::node_type node_type; + typedef typename Xpetra::MultiVector MV; + typedef typename Xpetra::Map map_type; + typedef typename Xpetra::Import import_type; + typedef typename Xpetra::Export export_type; + + public: /** Constructor * \param[in] n The number of global DOFs * \param[in] comm The Teuchos::Comm for the object */ TridiagonalOperator(const GlobalOrdinal n, - const Teuchos::RCP > comm) - { + const Teuchos::RCP> comm) { TEUCHOS_TEST_FOR_EXCEPTION(comm.is_null(), std::invalid_argument, "TridiagonalOperator constructor: The input Comm object must be nonnull."); - - const int my_rank = comm->getRank(); + + const int my_rank = comm->getRank(); const int num_procs = comm->getSize(); // Construct a default map and let it choose how DOFs are divided // Note: This assumes the map constructor is generating contiguous local DOFs const GlobalOrdinal index_base = 0; - opMap_ = Xpetra::MapFactory::Build(Xpetra::UseTpetra, n, index_base, comm); - LocalOrdinal nlocal = opMap_->getLocalNumElements(); + opMap_ = Xpetra::MapFactory::Build(Xpetra::UseTpetra, n, index_base, comm); + LocalOrdinal nlocal = opMap_->getLocalNumElements(); // Ghosting: procs 0,1,...,n_p-1 are ordered left to right on [0,1] - if(my_rank > 0) + if (my_rank > 0) ++nlocal; - if(my_rank < num_procs - 1) + if (my_rank < num_procs - 1) ++nlocal; - // Construct a list of columns where this process has nonzero elements + // Construct a list of columns where this process has nonzero elements // For this tridiagonal matrix, this is firstRowItOwns-1:lastRowItOwns+1 std::vector indices; indices.reserve(nlocal); - if(my_rank > 0) + if (my_rank > 0) indices.push_back(opMap_->getMinGlobalIndex() - 1); - for(GlobalOrdinal i = opMap_->getMinGlobalIndex(); i <= opMap_->getMaxGlobalIndex(); ++i) + for (GlobalOrdinal i = opMap_->getMinGlobalIndex(); i <= opMap_->getMaxGlobalIndex(); ++i) indices.push_back(i); - if(my_rank < num_procs - 1) + if (my_rank < num_procs - 1) indices.push_back(opMap_->getMaxGlobalIndex() + 1); Teuchos::ArrayView element_list(indices); // column Map for handling the redistribution - const GlobalOrdinal num_global_elements = n + 2*(num_procs - 1); - redistMap_ = Xpetra::MapFactory::Build(Xpetra::UseTpetra, num_global_elements, element_list, index_base, comm); + const GlobalOrdinal num_global_elements = n + 2 * (num_procs - 1); + redistMap_ = Xpetra::MapFactory::Build(Xpetra::UseTpetra, num_global_elements, element_list, index_base, comm); // import object that describes how data will be redistributed importer_ = Xpetra::ImportFactory::Build(opMap_, redistMap_); @@ -197,7 +196,7 @@ class TridiagonalOperator : public Xpetra::Operator& X, Xpetra::MultiVector& Y, Teuchos::ETransp mode = Teuchos::NO_TRANS, - Scalar alpha = Teuchos::ScalarTraits::one(), - Scalar beta = Teuchos::ScalarTraits::zero()) const - { + Scalar alpha = Teuchos::ScalarTraits::one(), + Scalar beta = Teuchos::ScalarTraits::zero()) const { // Setup: get comms, ranks, procs - Teuchos::RCP > comm = opMap_->getComm(); - const int my_rank = comm->getRank(); - const int num_procs = comm->getSize(); - const size_t num_vecs = X.getNumVectors(); - const LocalOrdinal numlocrows = static_cast(X.getLocalLength()); - + Teuchos::RCP> comm = opMap_->getComm(); + const int my_rank = comm->getRank(); + const int num_procs = comm->getSize(); + const size_t num_vecs = X.getNumVectors(); + const LocalOrdinal numlocrows = static_cast(X.getLocalLength()); + // Make a temporary multivector for holding the redistributed data and then redistribute Teuchos::RCP> redistDataX = Xpetra::MultiVectorFactory::Build(redistMap_, num_vecs); redistDataX->doImport(X, *importer_, Xpetra::INSERT); @@ -230,42 +228,41 @@ class TridiagonalOperator : public Xpetra::Operator 0) { - KokkosViewY(0,c) = beta*KokkosViewY(0, c) + alpha*(-KokkosViewX(0, c) + 2*KokkosViewX(1, c) - KokkosViewX(2, c)); - offset = 0; + if (my_rank > 0) { + KokkosViewY(0, c) = beta * KokkosViewY(0, c) + alpha * (-KokkosViewX(0, c) + 2 * KokkosViewX(1, c) - KokkosViewX(2, c)); + offset = 0; } // On rank 0, we only have two entries in the first row // Y[0,c] = beta*Y[0,c] + alpha*(2*colViewX[1] - colViewX[2]) else { - KokkosViewY(0,c) = beta*KokkosViewY(0, c) + alpha*(2*KokkosViewX(0, c) - KokkosViewX(1, c)); - offset = 1; + KokkosViewY(0, c) = beta * KokkosViewY(0, c) + alpha * (2 * KokkosViewX(0, c) - KokkosViewX(1, c)); + offset = 1; } // For all other rows, we need the full stencil // Y[r,c] = beta*Y[r,c] + alpha*(-colViewX[r-offset] + 2*colViewX[r+1-offset] - colViewX[r+2-offset]) - for(LocalOrdinal r = 1; r < numlocrows - 1; ++r) { - const Scalar newVal = beta*KokkosViewY(r, c) + - alpha*(-KokkosViewX(r-offset, c) + 2*KokkosViewX(r+1-offset, c) - KokkosViewX(r+2-offset, c)); - KokkosViewY(r,c) = newVal; + for (LocalOrdinal r = 1; r < numlocrows - 1; ++r) { + const Scalar newVal = beta * KokkosViewY(r, c) + + alpha * (-KokkosViewX(r - offset, c) + 2 * KokkosViewX(r + 1 - offset, c) - KokkosViewX(r + 2 - offset, c)); + KokkosViewY(r, c) = newVal; } // On ranks other than the last rank, we need the ghosted X values for the computation // Y[numlocrows-1,c] = beta*Y[numlocrows-1,c] + alpha*(-colViewX[numlocrows-1-offset] + 2*colViewX[numlocrows-offset] // - colViewX[numlocrows+1-offset]) - if(my_rank < num_procs - 1) { - const Scalar newVal = beta*KokkosViewY(numlocrows-1, c) + - alpha*(-KokkosViewX(numlocrows-1-offset, c) + 2*KokkosViewX(numlocrows-offset, c) - - KokkosViewX(numlocrows+1-offset, c)); - KokkosViewY(numlocrows-1,c) = newVal; + if (my_rank < num_procs - 1) { + const Scalar newVal = beta * KokkosViewY(numlocrows - 1, c) + + alpha * (-KokkosViewX(numlocrows - 1 - offset, c) + 2 * KokkosViewX(numlocrows - offset, c) - KokkosViewX(numlocrows + 1 - offset, c)); + KokkosViewY(numlocrows - 1, c) = newVal; } // On the last rank, we only have two entries in the last row // Y[numlocrows-1,c] = beta*Y[numlocrows-1,c] + alpha*(-colViewX[numlocrows-1-offset] + 2*colViewX[numlocrows-offset]) else { - const Scalar newVal = beta*KokkosViewY(numlocrows-1, c) + - alpha*(-KokkosViewX(numlocrows-1-offset, c) + 2*KokkosViewX(numlocrows-offset, c)); - KokkosViewY(numlocrows-1,c) = newVal; + const Scalar newVal = beta * KokkosViewY(numlocrows - 1, c) + + alpha * (-KokkosViewX(numlocrows - 1 - offset, c) + 2 * KokkosViewX(numlocrows - offset, c)); + KokkosViewY(numlocrows - 1, c) = newVal; } } } @@ -280,46 +277,45 @@ class TridiagonalOperator : public Xpetra::Operator & X, - const Xpetra::MultiVector & B, - Xpetra::MultiVector & R) const { + void residual(const Xpetra::MultiVector& X, + const Xpetra::MultiVector& B, + Xpetra::MultiVector& R) const { typedef Teuchos::ScalarTraits STS; - R.update(STS::one(), B, STS::zero()); // R = 1*B + 0*R - this->apply(X, R, Teuchos::NO_TRANS, -STS::one(), STS::one()); // R = R - Op*X + R.update(STS::one(), B, STS::zero()); // R = 1*B + 0*R + this->apply(X, R, Teuchos::NO_TRANS, -STS::one(), STS::one()); // R = R - Op*X } -private: + private: Teuchos::RCP opMap_, redistMap_; Teuchos::RCP importer_; }; - /** - * This class defines an operator corresponding to the + * This class defines an operator corresponding to the * [1 2 1] interpolation stencil. For interpolation to * behave correctly, each MPI rank should have a - * number of DOFs that is divisible by 3, as the + * number of DOFs that is divisible by 3, as the * coarsening ratio is 3->1. Communication is ignored. */ template class MFProlongatorOperator : public Xpetra::Operator { -public: + public: // Xpetra::Operator subclasses should always define typedefs according to Xpetra - typedef typename Xpetra::Operator::node_type node_type; - typedef typename Xpetra::MultiVector MV; - typedef typename Xpetra::Map map_type; - typedef typename Xpetra::Import import_type; - typedef typename Xpetra::Export export_type; -public: + typedef typename Xpetra::Operator::node_type node_type; + typedef typename Xpetra::MultiVector MV; + typedef typename Xpetra::Map map_type; + typedef typename Xpetra::Import import_type; + typedef typename Xpetra::Export export_type; + + public: /** Constructor * \param[in] n The number of global DOFs * \param[in] comm The Teuchos::Comm for the object */ - MFProlongatorOperator(const Teuchos::RCP fine_map) - { - const GlobalOrdinal n = fine_map->getGlobalNumElements(); - const LocalOrdinal n_local = fine_map->getLocalNumElements(); - const Teuchos::RCP > comm = fine_map->getComm(); + MFProlongatorOperator(const Teuchos::RCP fine_map) { + const GlobalOrdinal n = fine_map->getGlobalNumElements(); + const LocalOrdinal n_local = fine_map->getLocalNumElements(); + const Teuchos::RCP> comm = fine_map->getComm(); TEUCHOS_TEST_FOR_EXCEPTION(comm.is_null(), std::invalid_argument, "MFProlongatorOperator constructor: The input Comm object must be nonnull."); TEUCHOS_TEST_FOR_EXCEPTION(n_local % 3 != 0, std::invalid_argument, "MFProlongatorOperator constructor: The number of local DOFs is not divisible by 3."); @@ -327,16 +323,16 @@ class MFProlongatorOperator : public Xpetra::Operator::Build(Xpetra::UseTpetra, n/3, index_base, comm); + rangeMap_ = fine_map; + domainMap_ = Xpetra::MapFactory::Build(Xpetra::UseTpetra, n / 3, index_base, comm); - TEUCHOS_TEST_FOR_EXCEPTION(domainMap_->getLocalNumElements() != ((size_t) n_local)/3, std::invalid_argument, "MFProlongatorOperator constructor: The operator is not coarsening by 3."); + TEUCHOS_TEST_FOR_EXCEPTION(domainMap_->getLocalNumElements() != ((size_t)n_local) / 3, std::invalid_argument, "MFProlongatorOperator constructor: The operator is not coarsening by 3."); }; // Required since we inherit from Xpetra::Operator // Destructor virtual ~MFProlongatorOperator() {} - + /** * \brief Compute Y := alpha Op X + beta Y. * \param[in] X Vector to apply the operator to @@ -349,16 +345,15 @@ class MFProlongatorOperator : public Xpetra::Operator& X, Xpetra::MultiVector& Y, Teuchos::ETransp mode = Teuchos::NO_TRANS, - Scalar alpha = Teuchos::ScalarTraits::one(), - Scalar beta = Teuchos::ScalarTraits::zero()) const - { + Scalar alpha = Teuchos::ScalarTraits::one(), + Scalar beta = Teuchos::ScalarTraits::zero()) const { // // Setup: get comms, ranks, procs // Teuchos::RCP > comm = domainMap_->getComm(); // const int my_rank = comm->getRank(); // const int num_procs = comm->getSize(); // const size_t num_vecs = X.getNumVectors(); // const LocalOrdinal numlocrows = static_cast(X.getLocalLength()); - + // // Make a temporary multivector for holding the redistributed data and then redistribute // Teuchos::RCP redistDataX = Teuchos::rcp(new MV(redistMap_, num_vecs)); // redistDataX->doImport(X, *importer_, Xpetra::INSERT); @@ -386,7 +381,7 @@ class MFProlongatorOperator : public Xpetra::Operator & X, - const Xpetra::MultiVector & B, - Xpetra::MultiVector & R) const { - //throw Exceptions::RuntimeError("Interface not supported"); + void residual(const Xpetra::MultiVector& X, + const Xpetra::MultiVector& B, + Xpetra::MultiVector& R) const { + // throw Exceptions::RuntimeError("Interface not supported"); } -private: + private: Teuchos::RCP domainMap_, rangeMap_, redistMap_; Teuchos::RCP importer_; }; - -template -int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int argc, char *argv[]) -{ +template +int main_(Teuchos::CommandLineProcessor& clp, Xpetra::UnderlyingLib lib, int argc, char* argv[]) { Teuchos::RCP> comm = Teuchos::DefaultComm::getComm(); - const int my_rank = comm->getRank(); - const int num_procs = comm->getSize(); + const int my_rank = comm->getRank(); + const int num_procs = comm->getSize(); { // Necessary typedefs using SC = Scalar; using LO = LocalOrdinal; using GO = GlobalOrdinal; using NO = Node; - //using map_type = Xpetra::Map<>; // unused - using MV = Xpetra::MultiVector; + // using map_type = Xpetra::Map<>; // unused + using MV = Xpetra::MultiVector; // Set a command line processor and parse it - int n = 300; - int max_iterations = 1000; - double tol = 1e-10; - bool do_multigrid = true; - bool show_timer_summary = false; - bool belos_verbose = false; - bool show_kokkos = false; + int n = 300; + int max_iterations = 1000; + double tol = 1e-10; + bool do_multigrid = true; + bool show_timer_summary = false; + bool belos_verbose = false; + bool show_kokkos = false; bool print_RHS_and_solution = false; - bool dump_matrix_market = false; + bool dump_matrix_market = false; // problem config clp.setOption("n", &n, "Size of the n-by-n operator (default: 300)"); clp.setOption("verbose", "no-verbose", &belos_verbose, "Use Belos verbose output (default: false)"); @@ -472,36 +465,36 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg clp.recogniseAllOptions(true); switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: - case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: + case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } - if(my_rank == 0) - std::cout << "Running example-01 with n=" << n << " verbose=" << belos_verbose << " config=" << show_kokkos << "..." << std::endl; + if (my_rank == 0) + std::cout << "Running example-01 with n=" << n << " verbose=" << belos_verbose << " config=" << show_kokkos << "..." << std::endl; // print configuration details if needed - Kokkos::Serial().print_configuration(std::cout, true/*details*/); - //Kokkos::OpenMP().print_configuration(std::cout, true/*details*/); - //std::cout << "OpenMP Max Threads = " << omp_get_max_threads() << std::endl; - //Kokkos::Cuda().print_configuration(std::cout, true/*details*/); - //Kokkos::HIP().print_configuration(std::cout, true/*details*/); + Kokkos::Serial().print_configuration(std::cout, true /*details*/); + // Kokkos::OpenMP().print_configuration(std::cout, true/*details*/); + // std::cout << "OpenMP Max Threads = " << omp_get_max_threads() << std::endl; + // Kokkos::Cuda().print_configuration(std::cout, true/*details*/); + // Kokkos::HIP().print_configuration(std::cout, true/*details*/); // Create the operator - Teuchos::RCP> matrix = Teuchos::rcp(new TridiagonalOperator(n, comm)); + Teuchos::RCP> matrix = Teuchos::rcp(new TridiagonalOperator(n, comm)); // A useful class name means excruciating template arguments... but I want to avoid headaches std::cout << "Creating the vectors..." << std::endl; - + // Construct the right-hand side - Teuchos::RCP rhs = Xpetra::MultiVectorFactory::Build(matrix->getRangeMap(),1); + Teuchos::RCP rhs = Xpetra::MultiVectorFactory::Build(matrix->getRangeMap(), 1); rhs->putScalar(0.0); // Construct the initial guess (seedrandom is not always reproducible across machines, see Github) - Teuchos::RCP solution = Xpetra::MultiVectorFactory::Build(matrix->getDomainMap(),1); + Teuchos::RCP solution = Xpetra::MultiVectorFactory::Build(matrix->getDomainMap(), 1); Teuchos::ScalarTraits::seedrandom(314159); solution->randomize(); // solution->putScalar(1.0) is an alternative if the random seed isn't reproducible @@ -510,50 +503,51 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // f''(x) = h^2*(-pi^2)*sin(pi*x) { // divide domain up according to MPI ranks - const SC dx = 1.0/num_procs; - const SC x_left = dx*my_rank; - const SC x_right = dx*(my_rank+1); + const SC dx = 1.0 / num_procs; + const SC x_left = dx * my_rank; + const SC x_right = dx * (my_rank + 1); const size_t n_local = matrix->getRangeMap()->getLocalNumElements(); - const SC h = (x_right - x_left)/(n_local-1); + const SC h = (x_right - x_left) / (n_local - 1); // fill the RHS appropriately auto rhs_2d = rhs->getHostLocalView(Xpetra::Access::OverwriteAll); - auto rhs_1d = Kokkos::subview (rhs_2d, Kokkos::ALL(), 0); - SC x = x_left; - for(size_t i=0; i belos_settings = Teuchos::parameterList("Belos"); belos_settings->set("Verbosity", belos_verbose ? 1 : 0); belos_settings->set("Output Style", 1); - belos_settings->set("Maximum Iterations", max_iterations); - //belos_settings->set("Convergence Tolerance", tol); // Relative convergence tolerance requested - //belos_settings->set("Output Frequency", 1); - //belos_settings->set("Output Style", Belos::Brief); + belos_settings->set("Maximum Iterations", max_iterations); + // belos_settings->set("Convergence Tolerance", tol); // Relative convergence tolerance requested + // belos_settings->set("Output Frequency", 1); + // belos_settings->set("Output Style", Belos::Brief); // Define and set the linear problem - Teuchos::RCP> belos_operator = Teuchos::rcp(new Belos::XpetraOp(matrix)); // Turns a Xpetra::Operator object into a Belos operator - Teuchos::RCP>> belos_problem = Teuchos::rcp(new Belos::LinearProblem>(belos_operator, solution, rhs)); - bool set = belos_problem->setProblem(); + Teuchos::RCP> belos_operator = Teuchos::rcp(new Belos::XpetraOp(matrix)); // Turns a Xpetra::Operator object into a Belos operator + Teuchos::RCP>> belos_problem = Teuchos::rcp(new Belos::LinearProblem>(belos_operator, solution, rhs)); + bool set = belos_problem->setProblem(); if (set == false) { if (comm->getRank() == 0) - std::cout << std::endl << "ERROR: Belos::LinearProblem failed to set up correctly!" << std::endl; + std::cout << std::endl + << "ERROR: Belos::LinearProblem failed to set up correctly!" << std::endl; return EXIT_FAILURE; } // Solve the problem std::cout << "Solving the problem..." << std::endl; - Teuchos::RCP>> solver = Teuchos::rcp(new Belos::BlockCGSolMgr>(belos_problem, belos_settings)); - const Belos::ReturnType belos_result = solver->solve(); + Teuchos::RCP>> solver = Teuchos::rcp(new Belos::BlockCGSolMgr>(belos_problem, belos_settings)); + const Belos::ReturnType belos_result = solver->solve(); - if(my_rank == 0) { + if (my_rank == 0) { std::cout << "Belos solver wrapper results: " << (belos_result == Belos::Converged ? "Converged" : "Unconverged") << std::endl @@ -567,30 +561,30 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg params.set("coarse: max size", 1); params.set("max levels", 2); params.set("transpose: use implicit", true); - + // generate coarse matrix-free operator - Teuchos::RCP> coarse_matrix = Teuchos::rcp(new TridiagonalOperator(n/3, comm)); - Teuchos::RCP> P = Teuchos::rcp(new MFProlongatorOperator(matrix->getDomainMap())); + Teuchos::RCP> coarse_matrix = Teuchos::rcp(new TridiagonalOperator(n / 3, comm)); + Teuchos::RCP> P = Teuchos::rcp(new MFProlongatorOperator(matrix->getDomainMap())); // create MueLu hierarchy and levels - Teuchos::RCP> hierarchy = Teuchos::rcp(new typename MueLu::Hierarchy()); + Teuchos::RCP> hierarchy = Teuchos::rcp(new typename MueLu::Hierarchy()); hierarchy->SetProcRankVerbose(matrix->getDomainMap()->getComm()->getRank()); - Teuchos::RCP> hierarchyManager = Teuchos::rcp(new MueLu::ParameterListInterpreter(params,matrix->getDomainMap()->getComm())); - Teuchos::RCP> factoryManager = Teuchos::rcp(new MueLu::FactoryManager()); - + Teuchos::RCP> hierarchyManager = Teuchos::rcp(new MueLu::ParameterListInterpreter(params, matrix->getDomainMap()->getComm())); + Teuchos::RCP> factoryManager = Teuchos::rcp(new MueLu::FactoryManager()); + // set A on fine level Teuchos::RCP fineLevel = hierarchy->GetLevel(0); fineLevel->SetFactoryManager(factoryManager); - Teuchos::RCP> matrix_op = matrix; + Teuchos::RCP> matrix_op = matrix; fineLevel->Set("A", matrix_op); - + // handle the smoother std::string ifpackType = "RELAXATION"; Teuchos::ParameterList ifpackList; - ifpackList.set("relaxation: sweeps", (LO) 1); - ifpackList.set("relaxation: damping factor", (SC) 1.0); - Teuchos::RCP> smootherPrototype = Teuchos::rcp(new MueLu::TrilinosSmoother(ifpackType, ifpackList)); - Teuchos::RCP> smootherFact = Teuchos::rcp(new MueLu::SmootherFactory(smootherPrototype)); + ifpackList.set("relaxation: sweeps", (LO)1); + ifpackList.set("relaxation: damping factor", (SC)1.0); + Teuchos::RCP> smootherPrototype = Teuchos::rcp(new MueLu::TrilinosSmoother(ifpackType, ifpackList)); + Teuchos::RCP> smootherFact = Teuchos::rcp(new MueLu::SmootherFactory(smootherPrototype)); factoryManager->SetFactory("Smoother", smootherFact); // work on the next level @@ -600,38 +594,37 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // coarseLevel->Set("A", coarse_matrix); // hierarchyManager->SetupHierarchy(*hierarchy); - //hierarchy->Setup(factoryManager,fineLevel,coarseLevel); - //hierarchy->setlib(matrix->getDomainMap()->lib()); - //hierarchyManager->SetupHierarchy(*hierarchy); + // hierarchy->Setup(factoryManager,fineLevel,coarseLevel); + // hierarchy->setlib(matrix->getDomainMap()->lib()); + // hierarchyManager->SetupHierarchy(*hierarchy); std::cout << "Finished hierarchy!" << std::endl; Teuchos::ParameterList status; - //status = hierarchy->FullPopulate(PRfact,Acfact,SmooFact,0,maxLevels); + // status = hierarchy->FullPopulate(PRfact,Acfact,SmooFact,0,maxLevels); if (comm->getRank() == 0) { - std::cout << "======================\n Multigrid statistics \n======================" << std::endl; + std::cout << "======================\n Multigrid statistics \n======================" << std::endl; status.print(std::cout, Teuchos::ParameterList::PrintOptions().indent(2)); } - //hierarchy->Iterate(*rhs, *solution, max_iterations); + // hierarchy->Iterate(*rhs, *solution, max_iterations); } // output the RHS and solution for validation // (sleep my_rank is very hacky here, but keeps prints contiguous) - if(print_RHS_and_solution) { + if (print_RHS_and_solution) { sleep(my_rank); auto rhs_2d = rhs->getHostLocalView(Xpetra::Access::ReadOnly); PRINT_VIEW2_LINEAR(rhs_2d) auto solution_2d = solution->getHostLocalView(Xpetra::Access::ReadOnly); - PRINT_VIEW2_LINEAR(solution_2d) + PRINT_VIEW2_LINEAR(solution_2d) } - + // can't dump MatrixMarket if it's not a matrix - if(dump_matrix_market) { - //Xpetra::MatrixMarket::Writer::writeDenseFile("example_01_solution.mm", *solution); - //Xpetra::MatrixMarket::Writer::writeDenseFile("example_01_rhs.mm", *rhs); + if (dump_matrix_market) { + // Xpetra::MatrixMarket::Writer::writeDenseFile("example_01_solution.mm", *solution); + // Xpetra::MatrixMarket::Writer::writeDenseFile("example_01_rhs.mm", *rhs); } - } Kokkos::finalize(); return 0; @@ -641,6 +634,6 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg #define MUELU_AUTOMATIC_TEST_ETI_NAME main_ #include "MueLu_Test_ETI.hpp" -int main(int argc, char *argv[]) { - return Automatic_Test_ETI(argc,argv); +int main(int argc, char* argv[]) { + return Automatic_Test_ETI(argc, argv); } diff --git a/packages/muelu/research/luc/region_algorithms/Driver.cpp b/packages/muelu/research/luc/region_algorithms/Driver.cpp index 5a903f5ca8ea..6e9251a3dac0 100644 --- a/packages/muelu/research/luc/region_algorithms/Driver.cpp +++ b/packages/muelu/research/luc/region_algorithms/Driver.cpp @@ -50,7 +50,7 @@ #include #include -#include // TODO: move into MueLu.hpp +#include // TODO: move into MueLu.hpp #include #include #include @@ -65,25 +65,27 @@ #include #include #include -#include // => This header defines Belos::XpetraOp -#include // => This header defines Belos::MueLuOp +#include // => This header defines Belos::XpetraOp +#include // => This header defines Belos::MueLuOp #endif -template +template void createTwoLevelHierarchy(MueLu::Level& fineLevel, MueLu::Level& coarseLevel, Teuchos::RCP > A); -template -int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int argc, char *argv[]) { +template +int main_(Teuchos::CommandLineProcessor& clp, Xpetra::UnderlyingLib lib, int argc, char* argv[]) { #include - using Teuchos::Array; using Teuchos::ArrayView; - using Teuchos::RCP; using Teuchos::rcp; - using Teuchos::tuple; + using Teuchos::Array; + using Teuchos::ArrayView; + using Teuchos::RCP; + using Teuchos::rcp; using Teuchos::TimeMonitor; + using Teuchos::tuple; - typedef Tpetra::Map map_type; - typedef Tpetra::CrsMatrix crs_matrix_type; + typedef Tpetra::Map map_type; + typedef Tpetra::CrsMatrix crs_matrix_type; typedef typename crs_matrix_type::scalar_type scalar_type; typedef typename crs_matrix_type::local_ordinal_type local_ordinal_type; @@ -95,8 +97,8 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg bool verbose = true; try { - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); - const local_ordinal_type myRank = comm->getRank(); + RCP > comm = Teuchos::DefaultComm::getComm(); + const local_ordinal_type myRank = comm->getRank(); // Manage the way output stream works RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); @@ -111,24 +113,25 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Parameters initialization // ========================================================================= - //GO nx = 100, ny = 100, nz = 100; - //Galeri::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra + // GO nx = 100, ny = 100, nz = 100; + // Galeri::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra - std::string xmlFileName = "driver.xml"; clp.setOption("xml", &xmlFileName, "read parameters from a file. Otherwise, this example uses by default 'scalingTest.xml'"); + std::string xmlFileName = "driver.xml"; + clp.setOption("xml", &xmlFileName, "read parameters from a file. Otherwise, this example uses by default 'scalingTest.xml'"); - switch (clp.parse(argc,argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; + switch (clp.parse(argc, argv)) { + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } global_ordinal_type gNumCompFineGIDs = 10, lCompFineGIDOffset = 0; - global_ordinal_type lRegFineGIDOffset = 0; // gNumRegFineGIDs = 11, - local_ordinal_type lNumCompFineGIDs = 0, lNumRegFineGIDs = 0; - local_ordinal_type lNumRegCoarseGIDs = 0; // lNumCompCoarseGIDs = 0, - if(myRank == 0) { + global_ordinal_type lRegFineGIDOffset = 0; // gNumRegFineGIDs = 11, + local_ordinal_type lNumCompFineGIDs = 0, lNumRegFineGIDs = 0; + local_ordinal_type lNumRegCoarseGIDs = 0; // lNumCompCoarseGIDs = 0, + if (myRank == 0) { lNumCompFineGIDs = 7; lNumRegFineGIDs = 7; @@ -136,8 +139,8 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg lRegFineGIDOffset = 0; // lNumCompCoarseGIDs = 3; - lNumRegCoarseGIDs = 3; - } else if(myRank == 1) { + lNumRegCoarseGIDs = 3; + } else if (myRank == 1) { lNumCompFineGIDs = 3; lNumRegFineGIDs = 4; @@ -145,13 +148,13 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg lRegFineGIDOffset = 6; // lNumCompCoarseGIDs = 1; - lNumRegCoarseGIDs = 2; + lNumRegCoarseGIDs = 2; } // The initial focus is on getting rowMaps as they are also used as rangeMap and potentially // domainMaps... Array fineCompRowGIDs(lNumCompFineGIDs); - for(local_ordinal_type dof = 0; dof < lNumCompFineGIDs; ++dof) { + for (local_ordinal_type dof = 0; dof < lNumCompFineGIDs; ++dof) { fineCompRowGIDs[dof] = lCompFineGIDOffset + dof; } out << "fineCompRowGIDs: " << fineCompRowGIDs << std::endl; @@ -159,12 +162,12 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Now the columnMap for the initial composite operator is needed Array fineCompColGIDs(lNumCompFineGIDs + 1); - if(myRank == 0) { - for(local_ordinal_type dof = 0; dof < lNumCompFineGIDs + 1; ++dof) { + if (myRank == 0) { + for (local_ordinal_type dof = 0; dof < lNumCompFineGIDs + 1; ++dof) { fineCompColGIDs[dof] = lCompFineGIDOffset + dof; } - } else if(myRank == 1) { - for(local_ordinal_type dof = 0; dof < lNumCompFineGIDs + 1; ++dof) { + } else if (myRank == 1) { + for (local_ordinal_type dof = 0; dof < lNumCompFineGIDs + 1; ++dof) { fineCompColGIDs[dof] = lCompFineGIDOffset - 1 + dof; } } @@ -172,40 +175,41 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg RCP fineCompColMap = rcp(new map_type(12, fineCompColGIDs, 0, comm)); // Create the matrix using our maps and assuming at most 3 entries per row - RCP compA (new crs_matrix_type (fineCompRowMap, fineCompColMap, 3)); + RCP compA(new crs_matrix_type(fineCompRowMap, fineCompColMap, 3)); // Now each row needs to be filled - const scalar_type one = static_cast (1.0); - const scalar_type two = static_cast (2.0); - const scalar_type negOne = static_cast (-1.0); - for(local_ordinal_type lclRow = 0; lclRow < static_cast(lNumCompFineGIDs); ++lclRow) { - const global_ordinal_type gblRow = fineCompRowMap->getGlobalElement (lclRow); + const scalar_type one = static_cast(1.0); + const scalar_type two = static_cast(2.0); + const scalar_type negOne = static_cast(-1.0); + for (local_ordinal_type lclRow = 0; lclRow < static_cast(lNumCompFineGIDs); ++lclRow) { + const global_ordinal_type gblRow = fineCompRowMap->getGlobalElement(lclRow); // A(0, 0) = [1] if (gblRow == 0) { - compA->insertGlobalValues (gblRow, - tuple (gblRow), - tuple (one)); + compA->insertGlobalValues(gblRow, + tuple(gblRow), + tuple(one)); } // A(N-1, N-2:N-1) = [-1, 1] else if (gblRow == gNumCompFineGIDs - 1) { - compA->insertGlobalValues (gblRow, - tuple (gblRow - 1, gblRow), - tuple (negOne, one)); + compA->insertGlobalValues(gblRow, + tuple(gblRow - 1, gblRow), + tuple(negOne, one)); } // A(i, i-1:i+1) = [-1, 2, -1] else { - compA->insertGlobalValues (gblRow, - tuple (gblRow - 1, gblRow, gblRow + 1), - tuple (negOne, two, negOne)); + compA->insertGlobalValues(gblRow, + tuple(gblRow - 1, gblRow, gblRow + 1), + tuple(negOne, two, negOne)); } } - compA->fillComplete (fineCompRowMap, fineCompRowMap); + compA->fillComplete(fineCompRowMap, fineCompRowMap); // compA->print(out); - out << std::endl << "Now switching to the region matrix" << std::endl; + out << std::endl + << "Now switching to the region matrix" << std::endl; - Array fineRegRowGIDs (lNumRegFineGIDs); - for(local_ordinal_type dof = 0; dof < lNumRegFineGIDs; ++dof) { + Array fineRegRowGIDs(lNumRegFineGIDs); + for (local_ordinal_type dof = 0; dof < lNumRegFineGIDs; ++dof) { fineRegRowGIDs[dof] = (lRegFineGIDOffset + dof == 6 && myRank == 1) ? 10 : lRegFineGIDOffset + dof; } out << "fineRegRowGIDs: " << fineRegRowGIDs << std::endl; @@ -213,116 +217,119 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Create the matrix using the simplest constructor as we assume that the region matrix // has now off processor entries, hence no column map is required. - RCP regA (new crs_matrix_type (fineRegRowMap, 3)); + RCP regA(new crs_matrix_type(fineRegRowMap, 3)); // Now each row needs to be filled - for(local_ordinal_type lclRow = 0; lclRow < static_cast(lNumRegFineGIDs); ++lclRow) { - const global_ordinal_type prevGblRow = (lclRow > 0) ? fineRegRowMap->getGlobalElement (lclRow - 1) : -1; - const global_ordinal_type curGblRow = fineRegRowMap->getGlobalElement (lclRow); - const global_ordinal_type nextGblRow = (lclRow < lNumRegFineGIDs - 1) ? fineRegRowMap->getGlobalElement (lclRow + 1) : -1; + for (local_ordinal_type lclRow = 0; lclRow < static_cast(lNumRegFineGIDs); ++lclRow) { + const global_ordinal_type prevGblRow = (lclRow > 0) ? fineRegRowMap->getGlobalElement(lclRow - 1) : -1; + const global_ordinal_type curGblRow = fineRegRowMap->getGlobalElement(lclRow); + const global_ordinal_type nextGblRow = (lclRow < lNumRegFineGIDs - 1) ? fineRegRowMap->getGlobalElement(lclRow + 1) : -1; // A(0, 0) = [1] if (lclRow == 0) { - if (curGblRow == 0) { // Apply Dirichlet BC to the left of the mesh - regA->insertGlobalValues (curGblRow, - tuple (curGblRow), - tuple (one)); - } else { // Here I hard code the column associated with "-1" because I'm lazy... - regA->insertGlobalValues (curGblRow, - tuple (curGblRow, nextGblRow), - tuple (one, negOne)); + if (curGblRow == 0) { // Apply Dirichlet BC to the left of the mesh + regA->insertGlobalValues(curGblRow, + tuple(curGblRow), + tuple(one)); + } else { // Here I hard code the column associated with "-1" because I'm lazy... + regA->insertGlobalValues(curGblRow, + tuple(curGblRow, nextGblRow), + tuple(one, negOne)); } } // A(N-1, N-2:N-1) = [-1, 1] else if (lclRow == static_cast(lNumRegFineGIDs - 1)) { - regA->insertGlobalValues (curGblRow, - tuple (prevGblRow, curGblRow), - tuple (negOne, one)); + regA->insertGlobalValues(curGblRow, + tuple(prevGblRow, curGblRow), + tuple(negOne, one)); } // A(i, i-1:i+1) = [-1, 2, -1] else { - regA->insertGlobalValues (curGblRow, - tuple (prevGblRow, curGblRow, nextGblRow), - tuple (negOne, two, negOne)); + regA->insertGlobalValues(curGblRow, + tuple(prevGblRow, curGblRow, nextGblRow), + tuple(negOne, two, negOne)); } } - regA->fillComplete (); + regA->fillComplete(); Tpetra::MatrixMarket::Writer:: - writeSparseFile("regA.m", regA, "regA", - "region representation of the operator."); + writeSparseFile("regA.m", regA, "regA", + "region representation of the operator."); - out << std::endl << "Forming the prolongator" << std::endl; + out << std::endl + << "Forming the prolongator" << std::endl; MueLu::Level fineLevel, coarseLevel; - RCP > matA = MueLu::TpetraCrs_To_XpetraMatrix(regA); - createTwoLevelHierarchy(fineLevel, coarseLevel, matA); + RCP > matA = MueLu::TpetraCrs_To_XpetraMatrix(regA); + createTwoLevelHierarchy(fineLevel, coarseLevel, matA); // Now the prolongator needs to be created - Array coarseRegGIDs (lNumRegCoarseGIDs); - if(myRank == 0) { + Array coarseRegGIDs(lNumRegCoarseGIDs); + if (myRank == 0) { coarseRegGIDs[0] = 0; coarseRegGIDs[1] = 3; coarseRegGIDs[2] = 6; - } else if(myRank == 1) { + } else if (myRank == 1) { coarseRegGIDs[0] = 10; coarseRegGIDs[1] = 9; } out << "coarseRegGIDs: " << coarseRegGIDs << std::endl; RCP colMapP = rcp(new map_type(5, coarseRegGIDs, 0, comm)); // RCP regP (new crs_matrix_type (fineRegRowMap, 1)); - RCP regP (new crs_matrix_type (fineRegRowMap, colMapP, 1)); + RCP regP(new crs_matrix_type(fineRegRowMap, colMapP, 1)); // Now each row needs to be filled - for(local_ordinal_type lclRow = 0; lclRow < static_cast(lNumRegFineGIDs); ++lclRow) { - const global_ordinal_type gblRow = fineRegRowMap->getGlobalElement (lclRow); - const global_ordinal_type gblCol = colMapP->getGlobalElement ((lclRow + 1) / 3); - regP->insertGlobalValues (gblRow, - tuple (gblCol), - tuple (one)); + for (local_ordinal_type lclRow = 0; lclRow < static_cast(lNumRegFineGIDs); ++lclRow) { + const global_ordinal_type gblRow = fineRegRowMap->getGlobalElement(lclRow); + const global_ordinal_type gblCol = colMapP->getGlobalElement((lclRow + 1) / 3); + regP->insertGlobalValues(gblRow, + tuple(gblCol), + tuple(one)); } - regP->fillComplete (); + regP->fillComplete(); Tpetra::MatrixMarket::Writer:: - writeSparseFile("regP.m", regP, "regP", - "region representation of the prolongator."); + writeSparseFile("regP.m", regP, "regP", + "region representation of the prolongator."); - out << std::endl << "Computing AP = AxP" << std::endl; + out << std::endl + << "Computing AP = AxP" << std::endl; // The number of non-zeros per row is set to zero because it's hard to guess what it will be... - RCP regAP = rcp( new crs_matrix_type(fineRegRowMap, 0)); + RCP regAP = rcp(new crs_matrix_type(fineRegRowMap, 0)); Tpetra::MatrixMatrix:: - Multiply(*regA, false, *regP, - false, *regAP); + Multiply(*regA, false, *regP, + false, *regAP); Tpetra::MatrixMarket::Writer:: - writeSparseFile("regAP.m", regAP, "regAP", - "region representation of AP."); + writeSparseFile("regAP.m", regAP, "regAP", + "region representation of AP."); - out << std::endl << "Computing Ac = P'xAP" << std::endl; + out << std::endl + << "Computing Ac = P'xAP" << std::endl; // The number of non-zeros per row is set to zero because it's hard to guess what it will be... - RCP regAc = rcp( new crs_matrix_type(regP->getDomainMap(), 0)); + RCP regAc = rcp(new crs_matrix_type(regP->getDomainMap(), 0)); Tpetra::MatrixMatrix:: - Multiply(*regP, true, *regAP, - false, *regAc); + Multiply(*regP, true, *regAP, + false, *regAc); Tpetra::MatrixMarket::Writer:: - writeSparseFile("regAc.m", regAc, "regAc", - "region representation of Ac."); + writeSparseFile("regAc.m", regAc, "regAc", + "region representation of Ac."); success = true; } TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); - return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); -} //main - + return (success ? EXIT_SUCCESS : EXIT_FAILURE); +} // main -template +template void createTwoLevelHierarchy(MueLu::Level& fineLevel, MueLu::Level& coarseLevel, Teuchos::RCP > A) { #include - using Teuchos::RCP; using Teuchos::rcp; + using Teuchos::RCP; + using Teuchos::rcp; RCP factoryHandler = rcp(new FactoryManager()); fineLevel.SetFactoryManager(factoryHandler); coarseLevel.SetFactoryManager(factoryHandler); @@ -338,14 +345,14 @@ void createTwoLevelHierarchy(MueLu::Level& fineLevel, MueLu::Level& coarseLevel, A->SetFixedBlockSize(1); fineLevel.Request("A"); fineLevel.Set("A", A); - fineLevel.Set("DofsPerNode",2); + fineLevel.Set("DofsPerNode", 2); RCP nullSpace = MultiVectorFactory::Build(A->getRowMap(), 1); nullSpace->randomize(); - fineLevel.Set("Nullspace",nullSpace); + fineLevel.Set("Nullspace", nullSpace); RCP amalgFact = rcp(new AmalgamationFactory()); - RCP dropFact = rcp(new CoalesceDropFactory()); + RCP dropFact = rcp(new CoalesceDropFactory()); dropFact->SetFactory("UnAmalgamationInfo", amalgFact); RCP UnCoupledAggFact = rcp(new UncoupledAggregationFactory()); UnCoupledAggFact->SetFactory("Graph", dropFact); @@ -357,22 +364,22 @@ void createTwoLevelHierarchy(MueLu::Level& fineLevel, MueLu::Level& coarseLevel, TentativePFact->SetFactory("UnAmalgamationInfo", amalgFact); TentativePFact->SetFactory("CoarseMap", coarseMapFact); - coarseLevel.Request("P",TentativePFact.get()); // request Ptent - coarseLevel.Request("Nullspace",TentativePFact.get()); + coarseLevel.Request("P", TentativePFact.get()); // request Ptent + coarseLevel.Request("Nullspace", TentativePFact.get()); coarseLevel.Request(*TentativePFact); Teuchos::ParameterList paramList; - paramList.set("tentative: calculate qr", false); + paramList.set("tentative: calculate qr", false); TentativePFact->SetParameterList(paramList); - TentativePFact->Build(fineLevel,coarseLevel); + TentativePFact->Build(fineLevel, coarseLevel); RCP Ptent; - coarseLevel.Get("P",Ptent,TentativePFact.get()); + coarseLevel.Get("P", Ptent, TentativePFact.get()); } //- -- -------------------------------------------------------- #define MUELU_AUTOMATIC_TEST_ETI_NAME main_ #include "MueLu_Test_ETI.hpp" -int main(int argc, char *argv[]) { - return Automatic_Test_ETI(argc,argv); +int main(int argc, char* argv[]) { + return Automatic_Test_ETI(argc, argv); } diff --git a/packages/muelu/research/luc/region_algorithms/Driver_Structured_Regions.cpp b/packages/muelu/research/luc/region_algorithms/Driver_Structured_Regions.cpp index 5f08211d71c3..a60731e64a4e 100644 --- a/packages/muelu/research/luc/region_algorithms/Driver_Structured_Regions.cpp +++ b/packages/muelu/research/luc/region_algorithms/Driver_Structured_Regions.cpp @@ -84,12 +84,11 @@ #include #include #include -#include // => This header defines Belos::XpetraOp -#include // => This header defines Belos::MueLuOp -#include // => This header defines Belos::TpetraOp +#include // => This header defines Belos::XpetraOp +#include // => This header defines Belos::MueLuOp +#include // => This header defines Belos::TpetraOp #endif - #ifdef HAVE_MUELU_CUDA #include "cuda_profiler_api.h" #endif @@ -108,21 +107,20 @@ #include - -template -int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int argc, char *argv[]) { +template +int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int argc, char *argv[]) { #include + using Teuchos::ArrayRCP; + using Teuchos::ParameterList; using Teuchos::RCP; using Teuchos::rcp; - using Teuchos::ArrayRCP; using Teuchos::TimeMonitor; - using Teuchos::ParameterList; // ========================================================================= // MPI initialization using Teuchos // ========================================================================= RCP > comm = Teuchos::DefaultComm::getComm(); - const int myRank = comm->getRank(); + const int myRank = comm->getRank(); // ========================================================================= // Convenient definitions @@ -130,34 +128,44 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar typedef Teuchos::ScalarTraits STS; SC zero = STS::zero(), one = STS::one(); typedef typename STS::magnitudeType real_type; - typedef Xpetra::MultiVector RealValuedMultiVector; + typedef Xpetra::MultiVector RealValuedMultiVector; // ========================================================================= // Parameters initialization // ========================================================================= GO nx = 10, ny = 10, nz = 10; - Galeri::Xpetra::Parameters galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra - - std::string xmlFileName = ""; clp.setOption("xml", &xmlFileName, "read parameters from an xml file"); - std::string yamlFileName = ""; clp.setOption("yaml", &yamlFileName, "read parameters from a yaml file"); - int maxIts = 200; clp.setOption("its", &maxIts, "maximum number of solver iterations"); - double tol = 1e-12; clp.setOption("tol", &tol, "solver convergence tolerance"); - bool scaleResidualHist = true; clp.setOption("scale", "noscale", &scaleResidualHist, "scaled Krylov residual history"); - bool solvePreconditioned = true; clp.setOption("solve-preconditioned","no-solve-preconditioned", &solvePreconditioned, "use MueLu preconditioner in solve"); - std::string equilibrate = "no" ; clp.setOption("equilibrate", &equilibrate, "equilibrate the system (no | diag | 1-norm)"); + Galeri::Xpetra::Parameters galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra + + std::string xmlFileName = ""; + clp.setOption("xml", &xmlFileName, "read parameters from an xml file"); + std::string yamlFileName = ""; + clp.setOption("yaml", &yamlFileName, "read parameters from a yaml file"); + int maxIts = 200; + clp.setOption("its", &maxIts, "maximum number of solver iterations"); + double tol = 1e-12; + clp.setOption("tol", &tol, "solver convergence tolerance"); + bool scaleResidualHist = true; + clp.setOption("scale", "noscale", &scaleResidualHist, "scaled Krylov residual history"); + bool solvePreconditioned = true; + clp.setOption("solve-preconditioned", "no-solve-preconditioned", &solvePreconditioned, "use MueLu preconditioner in solve"); + std::string equilibrate = "no"; + clp.setOption("equilibrate", &equilibrate, "equilibrate the system (no | diag | 1-norm)"); #ifdef HAVE_MUELU_CUDA - bool profileSetup = false; clp.setOption("cuda-profile-setup", "no-cuda-profile-setup", &profileSetup, "enable CUDA profiling for setup"); - bool profileSolve = false; clp.setOption("cuda-profile-solve", "no-cuda-profile-solve", &profileSolve, "enable CUDA profiling for solve"); + bool profileSetup = false; + clp.setOption("cuda-profile-setup", "no-cuda-profile-setup", &profileSetup, "enable CUDA profiling for setup"); + bool profileSolve = false; + clp.setOption("cuda-profile-solve", "no-cuda-profile-solve", &profileSolve, "enable CUDA profiling for solve"); #endif - int cacheSize = 0; clp.setOption("cachesize", &cacheSize, "cache size (in KB)"); + int cacheSize = 0; + clp.setOption("cachesize", &cacheSize, "cache size (in KB)"); clp.recogniseAllOptions(true); switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } TEUCHOS_TEST_FOR_EXCEPTION(xmlFileName != "" && yamlFileName != "", std::runtime_error, @@ -165,7 +173,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar // Instead of checking each time for rank, create a rank 0 stream RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& out = *fancy; + Teuchos::FancyOStream &out = *fancy; out.setOutputToRootOnly(0); ParameterList paramList; @@ -181,7 +189,6 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, Teuchos::Ptr(¶mList), *comm); } - // Retrieve matrix parameters (they may have been changed on the command line) // [for instance, if we changed matrix type from 2D to 3D we need to update nz] ParameterList galeriList = galeriParameters.GetParameterList(); @@ -192,25 +199,24 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar std::ostringstream galeriStream; #ifdef HAVE_MUELU_OPENMP std::string node_name = Node::name(); - if(!comm->getRank() && !node_name.compare("OpenMP/Wrapper")) - galeriStream<<"OpenMP Max Threads = "<getRank() && !node_name.compare("OpenMP/Wrapper")) + galeriStream << "OpenMP Max Threads = " << omp_get_max_threads() << std::endl; #endif - comm->barrier(); Teuchos::TimeMonitor::setStackedTimer(Teuchos::null); RCP globalTimeMonitor = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: S - Global Time"))); RCP tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 1 - Matrix Build"))); - - RCP A; - RCP map; + RCP A; + RCP map; RCP coordinates; typedef typename RealValuedMultiVector::scalar_type Real; - RCP > nullspace; + RCP > nullspace; RCP X, B; - galeriStream << "========================================================\n" << xpetraParameters << galeriParameters; + galeriStream << "========================================================\n" + << xpetraParameters << galeriParameters; // Galeri will attempt to create a square-as-possible distribution of subdomains di, e.g., // d1 d2 d3 @@ -222,8 +228,8 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar // size. For example, np=14 will give a 7-by-2 distribution. // If you don't want Galeri to do this, specify mx or my on the galeriList. std::string matrixType = galeriParameters.GetMatrixType(); - int numDimensions = 0; - int numDofsPerNode = 1; + int numDimensions = 0; + int numDofsPerNode = 1; Teuchos::Array procsPerDim(3); Teuchos::Array gNodesPerDim(3); Teuchos::Array lNodesPerDim(3); @@ -233,26 +239,26 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar // At the moment, however, things are fragile as we hope that the Problem uses same map and coordinates inside if (matrixType == "Laplace1D") { numDimensions = 1; - map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian1D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", map, galeriList); + map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian1D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", map, galeriList); } else if (matrixType == "Laplace2D" || matrixType == "Star2D" || matrixType == "BigStar2D" || matrixType == "Elasticity2D") { numDimensions = 2; - map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian2D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", map, galeriList); + map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian2D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", map, galeriList); } else if (matrixType == "Laplace3D" || matrixType == "Brick3D" || matrixType == "Elasticity3D") { numDimensions = 3; - map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian3D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", map, galeriList); + map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian3D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", map, galeriList); } // Expand map to do multiple DOF per node for block problems if (matrixType == "Elasticity2D") - map = Xpetra::MapFactory::Build(map, 2); + map = Xpetra::MapFactory::Build(map, 2); if (matrixType == "Elasticity3D") - map = Xpetra::MapFactory::Build(map, 3); + map = Xpetra::MapFactory::Build(map, 3); galeriStream << "Processor subdomains in x direction: " << galeriList.get("mx") << std::endl << "Processor subdomains in y direction: " << galeriList.get("my") << std::endl @@ -261,20 +267,20 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar if (matrixType == "Elasticity2D" || matrixType == "Elasticity3D") { // Our default test case for elasticity: all boundaries of a square/cube have Neumann b.c. except left which has Dirichlet - galeriList.set("right boundary" , "Neumann"); + galeriList.set("right boundary", "Neumann"); galeriList.set("bottom boundary", "Neumann"); - galeriList.set("top boundary" , "Neumann"); - galeriList.set("front boundary" , "Neumann"); - galeriList.set("back boundary" , "Neumann"); + galeriList.set("top boundary", "Neumann"); + galeriList.set("front boundary", "Neumann"); + galeriList.set("back boundary", "Neumann"); } - RCP > Pr = - Galeri::Xpetra::BuildProblem(galeriParameters.GetMatrixType(), map, galeriList); + RCP > Pr = + Galeri::Xpetra::BuildProblem(galeriParameters.GetMatrixType(), map, galeriList); A = Pr->BuildMatrix(); - if(matrixType == "Elasticity2D") { + if (matrixType == "Elasticity2D") { numDofsPerNode = 2; - } else if(matrixType == "Elasticity3D") { + } else if (matrixType == "Elasticity3D") { numDofsPerNode = 3; } @@ -290,7 +296,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar Teuchos::Array norms(1); B->norm2(norms); - B->scale(one/norms[0]); + B->scale(one / norms[0]); galeriStream << "Galeri complete.\n========================================================" << std::endl; out << galeriStream.str(); @@ -301,7 +307,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 2 - Compute region data"))); // Loading geometric info from galeri - if(numDimensions == 1) { + if (numDimensions == 1) { gNodesPerDim[0] = galeriList.get("nx"); gNodesPerDim[1] = 1; gNodesPerDim[2] = 1; @@ -313,7 +319,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar procsPerDim[0] = galeriList.get("mx"); procsPerDim[1] = 1; procsPerDim[2] = 1; - } else if(numDimensions == 2) { + } else if (numDimensions == 2) { gNodesPerDim[0] = galeriList.get("nx"); gNodesPerDim[1] = galeriList.get("ny"); gNodesPerDim[2] = 1; @@ -325,7 +331,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar procsPerDim[0] = galeriList.get("mx"); procsPerDim[1] = galeriList.get("my"); procsPerDim[2] = 1; - } else if(numDimensions == 3) { + } else if (numDimensions == 3) { gNodesPerDim[0] = galeriList.get("nx"); gNodesPerDim[1] = galeriList.get("ny"); gNodesPerDim[2] = galeriList.get("nz"); @@ -342,22 +348,34 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar Teuchos::Array startIndices(3); Teuchos::Array endIndices(3); const GO startGID = map->getMinGlobalIndex(); - startIndices[2] = startGID / (gNodesPerDim[1]*gNodesPerDim[0]); - const GO rem = startGID % (gNodesPerDim[1]*gNodesPerDim[0]); - startIndices[1] = rem / gNodesPerDim[0]; - startIndices[0] = rem % gNodesPerDim[0]; - endIndices[0] = startIndices[0] + lNodesPerDim[0] - 1; - endIndices[1] = startIndices[1] + lNodesPerDim[1] - 1; - endIndices[2] = startIndices[2] + lNodesPerDim[2] - 1; + startIndices[2] = startGID / (gNodesPerDim[1] * gNodesPerDim[0]); + const GO rem = startGID % (gNodesPerDim[1] * gNodesPerDim[0]); + startIndices[1] = rem / gNodesPerDim[0]; + startIndices[0] = rem % gNodesPerDim[0]; + endIndices[0] = startIndices[0] + lNodesPerDim[0] - 1; + endIndices[1] = startIndices[1] + lNodesPerDim[1] - 1; + endIndices[2] = startIndices[2] + lNodesPerDim[2] - 1; int leftBC = 0, rightBC = 0, frontBC = 0, backBC = 0, bottomBC = 0, topBC = 0; - if(startIndices[0] == 0) {leftBC = 1;} - if(startIndices[1] == 0) {frontBC = 1;} - if(startIndices[2] == 0) {bottomBC = 1;} + if (startIndices[0] == 0) { + leftBC = 1; + } + if (startIndices[1] == 0) { + frontBC = 1; + } + if (startIndices[2] == 0) { + bottomBC = 1; + } - if(endIndices[0] == gNodesPerDim[0] - 1) {rightBC = 1;} - if(endIndices[1] == gNodesPerDim[1] - 1) {backBC = 1;} - if(endIndices[2] == gNodesPerDim[2] - 1) {topBC = 1;} + if (endIndices[0] == gNodesPerDim[0] - 1) { + rightBC = 1; + } + if (endIndices[1] == gNodesPerDim[1] - 1) { + backBC = 1; + } + if (endIndices[2] == gNodesPerDim[2] - 1) { + topBC = 1; + } std::cout << "p=" << myRank << " | startGID= " << startGID << ", startIndices: " << startIndices @@ -374,12 +392,12 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar // First we count how many nodes the region needs to send and receive // and allocate arrays accordingly LO numReceive = 0, numSend = 0; - Teuchos::Array receiveGIDs; + Teuchos::Array receiveGIDs; Teuchos::Array receivePIDs; - Teuchos::Array sendGIDs; + Teuchos::Array sendGIDs; Teuchos::Array sendPIDs; - if(numDimensions == 1) { - if(leftBC == 0) { + if (numDimensions == 1) { + if (leftBC == 0) { numReceive = 1; receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); @@ -387,7 +405,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar receiveGIDs[0] = startIndices[0] - 1; receivePIDs[0] = myRank - 1; } - if(rightBC == 0) { + if (rightBC == 0) { numSend = 1; sendGIDs.resize(numSend); sendPIDs.resize(numSend); @@ -395,9 +413,9 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar sendGIDs[0] = endIndices[0]; sendGIDs[0] = myRank + 1; } - } else if(numDimensions == 2) { + } else if (numDimensions == 2) { // Received nodes - if(frontBC == 0 && leftBC == 0) { + if (frontBC == 0 && leftBC == 0) { numReceive = lNodesPerDim[0] + lNodesPerDim[1] + 1; receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); @@ -408,67 +426,67 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar receivePIDs[countIDs] = myRank - procsPerDim[0] - 1; ++countIDs; // Receive front edge nodes - for(LO i = 0; i < lNodesPerDim[0]; ++i) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { receiveGIDs[countIDs] = startGID - gNodesPerDim[0] + i; receivePIDs[countIDs] = myRank - procsPerDim[0]; ++countIDs; } // Receive left edge nodes - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - receiveGIDs[countIDs] = startGID - 1 + j*gNodesPerDim[0]; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + receiveGIDs[countIDs] = startGID - 1 + j * gNodesPerDim[0]; receivePIDs[countIDs] = myRank - 1; ++countIDs; } - } else if(frontBC == 0) { + } else if (frontBC == 0) { numReceive = lNodesPerDim[0]; receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); LO countIDs = 0; // Receive front edge nodes - for(LO i = 0; i < lNodesPerDim[0]; ++i) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { receiveGIDs[countIDs] = startGID - gNodesPerDim[0] + i; receivePIDs[countIDs] = myRank - procsPerDim[0]; ++countIDs; } - } else if(leftBC == 0) { + } else if (leftBC == 0) { numReceive = lNodesPerDim[1]; receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); LO countIDs = 0; // Receive left edge nodes - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - receiveGIDs[countIDs] = startGID - 1 + j*gNodesPerDim[0]; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + receiveGIDs[countIDs] = startGID - 1 + j * gNodesPerDim[0]; receivePIDs[countIDs] = myRank - 1; ++countIDs; } } // Sent nodes - if(rightBC == 0 && backBC == 0) { + if (rightBC == 0 && backBC == 0) { numSend = lNodesPerDim[0] + lNodesPerDim[1] + 1; sendGIDs.resize(numSend); sendPIDs.resize(numSend); LO countIDs = 0; // Send nodes of right edge - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - sendGIDs[countIDs] = j*gNodesPerDim[0] + startGID + lNodesPerDim[0] - 1; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + sendGIDs[countIDs] = j * gNodesPerDim[0] + startGID + lNodesPerDim[0] - 1; sendPIDs[countIDs] = myRank + 1; ++countIDs; } // Send nodes of back edge - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = i + startGID + (lNodesPerDim[1] - 1)*gNodesPerDim[0]; + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = i + startGID + (lNodesPerDim[1] - 1) * gNodesPerDim[0]; sendPIDs[countIDs] = myRank + procsPerDim[0]; ++countIDs; } // Send node of back-right corner - sendGIDs[countIDs] = startGID + (lNodesPerDim[1] - 1)*gNodesPerDim[0] + lNodesPerDim[0] - 1; + sendGIDs[countIDs] = startGID + (lNodesPerDim[1] - 1) * gNodesPerDim[0] + lNodesPerDim[0] - 1; sendPIDs[countIDs] = myRank + procsPerDim[1] + 1; ++countIDs; - } else if(backBC == 0) { + } else if (backBC == 0) { numSend = lNodesPerDim[0]; sendGIDs.resize(numSend); @@ -476,460 +494,406 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar LO countIDs = 0; // Send nodes of back edge - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = i + startGID + (lNodesPerDim[1] - 1)*gNodesPerDim[0]; + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = i + startGID + (lNodesPerDim[1] - 1) * gNodesPerDim[0]; sendPIDs[countIDs] = myRank + procsPerDim[0]; ++countIDs; } - } else if(rightBC == 0) { + } else if (rightBC == 0) { numSend = lNodesPerDim[1]; sendGIDs.resize(numSend); sendPIDs.resize(numSend); LO countIDs = 0; // Send nodes of right edge - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - sendGIDs[countIDs] = j*gNodesPerDim[0] + startGID + lNodesPerDim[0] - 1; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + sendGIDs[countIDs] = j * gNodesPerDim[0] + startGID + lNodesPerDim[0] - 1; sendPIDs[countIDs] = myRank + 1; ++countIDs; } } - } else if(numDimensions == 3) { + } else if (numDimensions == 3) { // Received nodes - if( (bottomBC == 0) && (frontBC == 0) && (leftBC == 0) ) { - numReceive = lNodesPerDim[0]*lNodesPerDim[1] // bottom face - + (lNodesPerDim[0] + 1)*lNodesPerDim[2] // front face - + (lNodesPerDim[1] + 1)*(lNodesPerDim[2] + 1); // left face + if ((bottomBC == 0) && (frontBC == 0) && (leftBC == 0)) { + numReceive = lNodesPerDim[0] * lNodesPerDim[1] // bottom face + + (lNodesPerDim[0] + 1) * lNodesPerDim[2] // front face + + (lNodesPerDim[1] + 1) * (lNodesPerDim[2] + 1); // left face receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); LO countIDs = 0; // Receive front-left-bottom corner node - receiveGIDs[countIDs] = startGID - gNodesPerDim[0] - 1 - - gNodesPerDim[1]*gNodesPerDim[0]; - receivePIDs[countIDs] = myRank - procsPerDim[0] - 1 - - procsPerDim[1]*procsPerDim[0]; + receiveGIDs[countIDs] = startGID - gNodesPerDim[0] - 1 - gNodesPerDim[1] * gNodesPerDim[0]; + receivePIDs[countIDs] = myRank - procsPerDim[0] - 1 - procsPerDim[1] * procsPerDim[0]; ++countIDs; // Receive front-bottom edge nodes - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = startGID - gNodesPerDim[0]*gNodesPerDim[1] - - gNodesPerDim[0] + i; - receivePIDs[countIDs] = myRank - procsPerDim[0]*procsPerDim[1] - procsPerDim[0]; + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = startGID - gNodesPerDim[0] * gNodesPerDim[1] - gNodesPerDim[0] + i; + receivePIDs[countIDs] = myRank - procsPerDim[0] * procsPerDim[1] - procsPerDim[0]; ++countIDs; } // Receive left-bottom edge nodes - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - receiveGIDs[countIDs] = startGID - gNodesPerDim[0]*gNodesPerDim[1] - - 1 + j*gNodesPerDim[0]; - receivePIDs[countIDs] = myRank - procsPerDim[0]*procsPerDim[1] - 1; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + receiveGIDs[countIDs] = startGID - gNodesPerDim[0] * gNodesPerDim[1] - 1 + j * gNodesPerDim[0]; + receivePIDs[countIDs] = myRank - procsPerDim[0] * procsPerDim[1] - 1; ++countIDs; } // Receive bottom face nodes - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = startGID - gNodesPerDim[0]*gNodesPerDim[1] - + i - + j*gNodesPerDim[0]; - receivePIDs[countIDs] = myRank - procsPerDim[0]*procsPerDim[1]; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = startGID - gNodesPerDim[0] * gNodesPerDim[1] + i + j * gNodesPerDim[0]; + receivePIDs[countIDs] = myRank - procsPerDim[0] * procsPerDim[1]; ++countIDs; } } // Receive front-left edge nodes - for(LO k = 0; k < lNodesPerDim[1]; ++k) { - receiveGIDs[countIDs] = startGID - gNodesPerDim[0] - - 1 + k*gNodesPerDim[0]*gNodesPerDim[1]; + for (LO k = 0; k < lNodesPerDim[1]; ++k) { + receiveGIDs[countIDs] = startGID - gNodesPerDim[0] - 1 + k * gNodesPerDim[0] * gNodesPerDim[1]; receivePIDs[countIDs] = myRank - procsPerDim[0] - 1; ++countIDs; } // Receive front face nodes - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = startGID - gNodesPerDim[0] + i - + k*(gNodesPerDim[1]*gNodesPerDim[0]); + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = startGID - gNodesPerDim[0] + i + k * (gNodesPerDim[1] * gNodesPerDim[0]); receivePIDs[countIDs] = myRank - procsPerDim[0]; ++countIDs; } } // Receive left face nodes - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - receiveGIDs[countIDs] = startGID - 1 - + j*gNodesPerDim[0] - + k*(gNodesPerDim[1]*gNodesPerDim[0]); + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + receiveGIDs[countIDs] = startGID - 1 + j * gNodesPerDim[0] + k * (gNodesPerDim[1] * gNodesPerDim[0]); receivePIDs[countIDs] = myRank - 1; ++countIDs; } } - // Two faces received - } else if( (bottomBC == 0) && (frontBC == 0) ) { - numReceive = lNodesPerDim[0]*lNodesPerDim[1] // bottom face - + lNodesPerDim[0]*(lNodesPerDim[2] + 1); // front face; + // Two faces received + } else if ((bottomBC == 0) && (frontBC == 0)) { + numReceive = lNodesPerDim[0] * lNodesPerDim[1] // bottom face + + lNodesPerDim[0] * (lNodesPerDim[2] + 1); // front face; receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); LO countIDs = 0; // Receive front-bottom edge nodes - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = startGID - gNodesPerDim[0]*gNodesPerDim[1] - - gNodesPerDim[0] + i; - receivePIDs[countIDs] = myRank - procsPerDim[0]*procsPerDim[1] - procsPerDim[0]; + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = startGID - gNodesPerDim[0] * gNodesPerDim[1] - gNodesPerDim[0] + i; + receivePIDs[countIDs] = myRank - procsPerDim[0] * procsPerDim[1] - procsPerDim[0]; ++countIDs; } // Receive bottom face nodes - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = startGID - gNodesPerDim[0]*gNodesPerDim[1] - + i - + j*gNodesPerDim[0]; - receivePIDs[countIDs] = myRank - procsPerDim[0]*procsPerDim[1]; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = startGID - gNodesPerDim[0] * gNodesPerDim[1] + i + j * gNodesPerDim[0]; + receivePIDs[countIDs] = myRank - procsPerDim[0] * procsPerDim[1]; ++countIDs; } } // Receive front face nodes - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = startGID - gNodesPerDim[0] + i - + k*(gNodesPerDim[1]*gNodesPerDim[0]); + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = startGID - gNodesPerDim[0] + i + k * (gNodesPerDim[1] * gNodesPerDim[0]); receivePIDs[countIDs] = myRank - procsPerDim[0]; ++countIDs; } } - } else if( (bottomBC == 0) && (leftBC == 0) ) { - numReceive = lNodesPerDim[1]*(lNodesPerDim[0] + lNodesPerDim[2] + 1); + } else if ((bottomBC == 0) && (leftBC == 0)) { + numReceive = lNodesPerDim[1] * (lNodesPerDim[0] + lNodesPerDim[2] + 1); receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); LO countIDs = 0; // Receive left-bottom edge nodes - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - receiveGIDs[countIDs] = j*gNodesPerDim[0] - + startGID - gNodesPerDim[1]*gNodesPerDim[0] - 1; - receivePIDs[countIDs] = myRank - procsPerDim[1]*procsPerDim[0] - 1; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + receiveGIDs[countIDs] = j * gNodesPerDim[0] + startGID - gNodesPerDim[1] * gNodesPerDim[0] - 1; + receivePIDs[countIDs] = myRank - procsPerDim[1] * procsPerDim[0] - 1; ++countIDs; } // Receive bottom face nodes - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = j*gNodesPerDim[0] + i - + startGID - gNodesPerDim[1]*gNodesPerDim[0]; - receivePIDs[countIDs] = myRank - procsPerDim[1]*procsPerDim[0]; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = j * gNodesPerDim[0] + i + startGID - gNodesPerDim[1] * gNodesPerDim[0]; + receivePIDs[countIDs] = myRank - procsPerDim[1] * procsPerDim[0]; ++countIDs; } } // Receive left face nodes - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - receiveGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] + j*gNodesPerDim[0] - + startGID - 1; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + receiveGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + j * gNodesPerDim[0] + startGID - 1; receivePIDs[countIDs] = myRank - 1; ++countIDs; } } - } else if( (frontBC == 0) && (leftBC == 0) ) { - numReceive = lNodesPerDim[2]*(lNodesPerDim[1] + lNodesPerDim[0] + 1); + } else if ((frontBC == 0) && (leftBC == 0)) { + numReceive = lNodesPerDim[2] * (lNodesPerDim[1] + lNodesPerDim[0] + 1); receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); LO countIDs = 0; // Receive front-left edge nodes - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - receiveGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] - + startGID - gNodesPerDim[0] - 1; - receivePIDs[countIDs] = myRank - procsPerDim[0] - 1; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + receiveGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + startGID - gNodesPerDim[0] - 1; + receivePIDs[countIDs] = myRank - procsPerDim[0] - 1; ++countIDs; } // Receive front face nodes - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] + i - + startGID - gNodesPerDim[0]; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + i + startGID - gNodesPerDim[0]; receivePIDs[countIDs] = myRank - procsPerDim[0]; ++countIDs; } } // Receive left face nodes - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - receiveGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] + j*gNodesPerDim[0] - + startGID - 1; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + receiveGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + j * gNodesPerDim[0] + startGID - 1; receivePIDs[countIDs] = myRank - 1; ++countIDs; } } - // Single face received - } else if(bottomBC == 0) { - numReceive = lNodesPerDim[0]*lNodesPerDim[1]; + // Single face received + } else if (bottomBC == 0) { + numReceive = lNodesPerDim[0] * lNodesPerDim[1]; receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); LO countIDs = 0; // Receive bottom face nodes - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = j*gNodesPerDim[0] + i - + startGID - gNodesPerDim[1]*gNodesPerDim[0]; - receivePIDs[countIDs] = myRank - procsPerDim[1]*procsPerDim[0]; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = j * gNodesPerDim[0] + i + startGID - gNodesPerDim[1] * gNodesPerDim[0]; + receivePIDs[countIDs] = myRank - procsPerDim[1] * procsPerDim[0]; ++countIDs; } } - } else if(frontBC == 0) { - numReceive = lNodesPerDim[0]*lNodesPerDim[2]; + } else if (frontBC == 0) { + numReceive = lNodesPerDim[0] * lNodesPerDim[2]; receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); LO countIDs = 0; // Receive front face nodes - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] + i - + startGID - gNodesPerDim[0]; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + i + startGID - gNodesPerDim[0]; receivePIDs[countIDs] = myRank - procsPerDim[0]; ++countIDs; } } - } else if(leftBC == 0) { - numReceive = lNodesPerDim[1]*lNodesPerDim[2]; + } else if (leftBC == 0) { + numReceive = lNodesPerDim[1] * lNodesPerDim[2]; receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); LO countIDs = 0; // Recive left face nodes - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - receiveGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] - + j*gNodesPerDim[0] + startGID - 1; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + receiveGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + j * gNodesPerDim[0] + startGID - 1; receivePIDs[countIDs] = myRank - 1; ++countIDs; } } - } // Sent nodes - if( (topBC == 0) && (backBC == 0) && (rightBC == 0) ) { - numSend = (lNodesPerDim[0])*(lNodesPerDim[1]) - + (lNodesPerDim[0])*(lNodesPerDim[2]) - + (lNodesPerDim[1])*(lNodesPerDim[2]) - + lNodesPerDim[0] - + lNodesPerDim[1] - + lNodesPerDim[2] - + 1; + if ((topBC == 0) && (backBC == 0) && (rightBC == 0)) { + numSend = (lNodesPerDim[0]) * (lNodesPerDim[1]) + (lNodesPerDim[0]) * (lNodesPerDim[2]) + (lNodesPerDim[1]) * (lNodesPerDim[2]) + lNodesPerDim[0] + lNodesPerDim[1] + lNodesPerDim[2] + 1; sendGIDs.resize(numSend); sendPIDs.resize(numSend); LO countIDs = 0; // Send nodes of right face - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - sendGIDs[countIDs] = k*(gNodesPerDim[1]*gNodesPerDim[0]) - + j*gNodesPerDim[0] - + startGID + lNodesPerDim[0] - 1; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + sendGIDs[countIDs] = k * (gNodesPerDim[1] * gNodesPerDim[0]) + j * gNodesPerDim[0] + startGID + lNodesPerDim[0] - 1; sendPIDs[countIDs] = myRank + 1; ++countIDs; } } // Send nodes of back face - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = k*(gNodesPerDim[1]*gNodesPerDim[0]) + i - + startGID + (lNodesPerDim[1] - 1)*gNodesPerDim[0]; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = k * (gNodesPerDim[1] * gNodesPerDim[0]) + i + startGID + (lNodesPerDim[1] - 1) * gNodesPerDim[0]; sendPIDs[countIDs] = myRank + procsPerDim[0]; ++countIDs; } } // Send nodes of right-back edge - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - sendGIDs[countIDs] = k*(gNodesPerDim[1]*gNodesPerDim[0]) - + startGID + (lNodesPerDim[1] - 1)*gNodesPerDim[0] + lNodesPerDim[0] - 1; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + sendGIDs[countIDs] = k * (gNodesPerDim[1] * gNodesPerDim[0]) + startGID + (lNodesPerDim[1] - 1) * gNodesPerDim[0] + lNodesPerDim[0] - 1; sendPIDs[countIDs] = myRank + procsPerDim[0] + 1; ++countIDs; } // Send nodes of top face - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = j*gNodesPerDim[0] + i - + startGID + (lNodesPerDim[2] - 1)*gNodesPerDim[1]*gNodesPerDim[0]; - sendPIDs[countIDs] = myRank + procsPerDim[1]*procsPerDim[0]; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = j * gNodesPerDim[0] + i + startGID + (lNodesPerDim[2] - 1) * gNodesPerDim[1] * gNodesPerDim[0]; + sendPIDs[countIDs] = myRank + procsPerDim[1] * procsPerDim[0]; ++countIDs; } } // Send nodes of top-right edge - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - sendGIDs[countIDs] = j*gNodesPerDim[0] - + startGID + (lNodesPerDim[2] - 1)*gNodesPerDim[1]*gNodesPerDim[0] + lNodesPerDim[0] - 1; - sendPIDs[countIDs] = myRank + procsPerDim[1]*procsPerDim[0] + 1; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + sendGIDs[countIDs] = j * gNodesPerDim[0] + startGID + (lNodesPerDim[2] - 1) * gNodesPerDim[1] * gNodesPerDim[0] + lNodesPerDim[0] - 1; + sendPIDs[countIDs] = myRank + procsPerDim[1] * procsPerDim[0] + 1; ++countIDs; } // Send nodes of top-back edge - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = i - + startGID + (lNodesPerDim[2] - 1)*gNodesPerDim[1]*gNodesPerDim[0] - + (lNodesPerDim[0] - 1)*gNodesPerDim[0]; - sendPIDs[countIDs] = myRank + procsPerDim[1]*procsPerDim[0] + procsPerDim[1]; + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = i + startGID + (lNodesPerDim[2] - 1) * gNodesPerDim[1] * gNodesPerDim[0] + (lNodesPerDim[0] - 1) * gNodesPerDim[0]; + sendPIDs[countIDs] = myRank + procsPerDim[1] * procsPerDim[0] + procsPerDim[1]; ++countIDs; } // Send node of top-back-right corner - sendGIDs[countIDs] = startGID + (lNodesPerDim[2] - 1)*gNodesPerDim[1]*gNodesPerDim[0] - + (lNodesPerDim[0] - 1)*gNodesPerDim[0] + lNodesPerDim[0] - 1; - sendPIDs[countIDs] = myRank + procsPerDim[1]*procsPerDim[0] + procsPerDim[1] + 1; + sendGIDs[countIDs] = startGID + (lNodesPerDim[2] - 1) * gNodesPerDim[1] * gNodesPerDim[0] + (lNodesPerDim[0] - 1) * gNodesPerDim[0] + lNodesPerDim[0] - 1; + sendPIDs[countIDs] = myRank + procsPerDim[1] * procsPerDim[0] + procsPerDim[1] + 1; ++countIDs; - } else if( (topBC == 0) && (backBC == 0) ) { - numSend = (lNodesPerDim[0]*lNodesPerDim[2]) // back face - + (lNodesPerDim[0]*lNodesPerDim[1]) // Top face - + (lNodesPerDim[0]); // top-back edge + } else if ((topBC == 0) && (backBC == 0)) { + numSend = (lNodesPerDim[0] * lNodesPerDim[2]) // back face + + (lNodesPerDim[0] * lNodesPerDim[1]) // Top face + + (lNodesPerDim[0]); // top-back edge sendGIDs.resize(numSend); sendPIDs.resize(numSend); LO countIDs = 0; // Send nodes of back face - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = k*(gNodesPerDim[1]*gNodesPerDim[0]) + i - + startGID + (lNodesPerDim[1] - 1)*gNodesPerDim[0]; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = k * (gNodesPerDim[1] * gNodesPerDim[0]) + i + startGID + (lNodesPerDim[1] - 1) * gNodesPerDim[0]; sendPIDs[countIDs] = myRank + procsPerDim[0]; ++countIDs; } } // Send nodes of top face - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = j*gNodesPerDim[0] + i - + startGID + (lNodesPerDim[2] - 1)*gNodesPerDim[1]*gNodesPerDim[0]; - sendPIDs[countIDs] = myRank + procsPerDim[1]*procsPerDim[0]; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = j * gNodesPerDim[0] + i + startGID + (lNodesPerDim[2] - 1) * gNodesPerDim[1] * gNodesPerDim[0]; + sendPIDs[countIDs] = myRank + procsPerDim[1] * procsPerDim[0]; ++countIDs; } } // Send nodes of top-back edge - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = i - + startGID + (lNodesPerDim[2] - 1)*gNodesPerDim[1]*gNodesPerDim[0] - + (lNodesPerDim[0] - 1)*gNodesPerDim[0]; - sendPIDs[countIDs] = myRank + procsPerDim[1]*procsPerDim[0] + procsPerDim[1]; + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = i + startGID + (lNodesPerDim[2] - 1) * gNodesPerDim[1] * gNodesPerDim[0] + (lNodesPerDim[0] - 1) * gNodesPerDim[0]; + sendPIDs[countIDs] = myRank + procsPerDim[1] * procsPerDim[0] + procsPerDim[1]; ++countIDs; } - } else if( (topBC == 0) && (rightBC == 0) ) { - numSend = (lNodesPerDim[1]*lNodesPerDim[2]) // right face - + (lNodesPerDim[0]*lNodesPerDim[1]) // Top face - + (lNodesPerDim[1]); // top-right edge + } else if ((topBC == 0) && (rightBC == 0)) { + numSend = (lNodesPerDim[1] * lNodesPerDim[2]) // right face + + (lNodesPerDim[0] * lNodesPerDim[1]) // Top face + + (lNodesPerDim[1]); // top-right edge sendGIDs.resize(numSend); sendPIDs.resize(numSend); LO countIDs = 0; // Send nodes of right face - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - sendGIDs[countIDs] = k*(gNodesPerDim[1]*gNodesPerDim[0]) - + j*gNodesPerDim[0] - + startGID + lNodesPerDim[0] - 1; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + sendGIDs[countIDs] = k * (gNodesPerDim[1] * gNodesPerDim[0]) + j * gNodesPerDim[0] + startGID + lNodesPerDim[0] - 1; sendPIDs[countIDs] = myRank + 1; ++countIDs; } } // Send nodes of top face - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = j*gNodesPerDim[0] + i - + startGID + (lNodesPerDim[2] - 1)*gNodesPerDim[1]*gNodesPerDim[0]; - sendPIDs[countIDs] = myRank + procsPerDim[1]*procsPerDim[0]; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = j * gNodesPerDim[0] + i + startGID + (lNodesPerDim[2] - 1) * gNodesPerDim[1] * gNodesPerDim[0]; + sendPIDs[countIDs] = myRank + procsPerDim[1] * procsPerDim[0]; ++countIDs; } } // Send nodes of top-right edge - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - sendGIDs[countIDs] = j*gNodesPerDim[0] - + startGID + (lNodesPerDim[2] - 1)*gNodesPerDim[1]*gNodesPerDim[0] + lNodesPerDim[0] - 1; - sendPIDs[countIDs] = myRank + procsPerDim[1]*procsPerDim[0] + 1; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + sendGIDs[countIDs] = j * gNodesPerDim[0] + startGID + (lNodesPerDim[2] - 1) * gNodesPerDim[1] * gNodesPerDim[0] + lNodesPerDim[0] - 1; + sendPIDs[countIDs] = myRank + procsPerDim[1] * procsPerDim[0] + 1; ++countIDs; } - } else if( (backBC == 0) && (rightBC == 0) ) { - numSend = lNodesPerDim[2]*(lNodesPerDim[0] + lNodesPerDim[1] + 1); + } else if ((backBC == 0) && (rightBC == 0)) { + numSend = lNodesPerDim[2] * (lNodesPerDim[0] + lNodesPerDim[1] + 1); sendGIDs.resize(numSend); sendPIDs.resize(numSend); LO countIDs = 0; // Send nodes of right face - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - sendGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] + j*gNodesPerDim[0] - + startGID + lNodesPerDim[0] - 1; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + sendGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + j * gNodesPerDim[0] + startGID + lNodesPerDim[0] - 1; sendPIDs[countIDs] = myRank + 1; ++countIDs; } } // Send nodes of back face - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] + i - + startGID + (lNodesPerDim[1] - 1)*gNodesPerDim[0]; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + i + startGID + (lNodesPerDim[1] - 1) * gNodesPerDim[0]; sendPIDs[countIDs] = myRank + procsPerDim[0]; ++countIDs; } } // Send nodes of back-right edge - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - sendGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] - + startGID + (lNodesPerDim[1] - 1)*gNodesPerDim[0] + lNodesPerDim[0] - 1; - sendPIDs[countIDs] = myRank + procsPerDim[0] + 1; - ++countIDs; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + sendGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + startGID + (lNodesPerDim[1] - 1) * gNodesPerDim[0] + lNodesPerDim[0] - 1; + sendPIDs[countIDs] = myRank + procsPerDim[0] + 1; + ++countIDs; } - } else if(topBC == 0) { - numSend = lNodesPerDim[0]*lNodesPerDim[1]; + } else if (topBC == 0) { + numSend = lNodesPerDim[0] * lNodesPerDim[1]; sendGIDs.resize(numSend); sendPIDs.resize(numSend); LO countIDs = 0; // Send nodes of top face - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = j*gNodesPerDim[0] + i - + startGID + (lNodesPerDim[2] - 1)*gNodesPerDim[0]*gNodesPerDim[0]; - sendPIDs[countIDs] = myRank + procsPerDim[1]*procsPerDim[0]; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = j * gNodesPerDim[0] + i + startGID + (lNodesPerDim[2] - 1) * gNodesPerDim[0] * gNodesPerDim[0]; + sendPIDs[countIDs] = myRank + procsPerDim[1] * procsPerDim[0]; ++countIDs; } } - } else if(backBC == 0) { - numSend = lNodesPerDim[0]*lNodesPerDim[2]; + } else if (backBC == 0) { + numSend = lNodesPerDim[0] * lNodesPerDim[2]; sendGIDs.resize(numSend); sendPIDs.resize(numSend); LO countIDs = 0; // Send nodes of back face - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] + i - + startGID + (lNodesPerDim[1] - 1)*gNodesPerDim[0]; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + i + startGID + (lNodesPerDim[1] - 1) * gNodesPerDim[0]; sendPIDs[countIDs] = myRank + procsPerDim[0]; ++countIDs; } } - } else if(rightBC == 0) { - numSend = lNodesPerDim[1]*lNodesPerDim[2]; + } else if (rightBC == 0) { + numSend = lNodesPerDim[1] * lNodesPerDim[2]; sendGIDs.resize(numSend); sendPIDs.resize(numSend); LO countIDs = 0; // Send nodes of right face - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - sendGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] - + j*gNodesPerDim[0] + startGID + lNodesPerDim[0] - 1; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + sendGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + j * gNodesPerDim[0] + startGID + lNodesPerDim[0] - 1; sendPIDs[countIDs] = myRank + 1; ++countIDs; } } - } } @@ -946,94 +910,94 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar comm->barrier(); tm = Teuchos::null; -// #ifdef HAVE_MUELU_CUDA -// if(profileSetup) cudaProfilerStart(); -// #endif - -// tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 2 - MueLu Setup"))); -// RCP H; -// RCP Prec; -// A->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); - -// const std::string userName = "user data"; -// Teuchos::ParameterList& userParamList = paramList.sublist(userName); -// userParamList.set("int numDimensions", numDimensions); -// userParamList.set >("Array lNodesPerDim", lNodesPerDim); -// userParamList.set >("Coordinates", coordinates); -// H = MueLu::CreateXpetraPreconditioner(A, paramList, paramList); - -// comm->barrier(); -// tm = Teuchos::null; - -// #ifdef HAVE_MUELU_CUDA -// if(profileSolve) cudaProfilerStop(); -// #endif - -// tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 3 - LHS and RHS initialization"))); -// X->putScalar(zero); -// tm = Teuchos::null; - -// #ifdef HAVE_MUELU_BELOS -// tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 5 - Belos Solve"))); -// #ifdef HAVE_MUELU_CUDA -// if(profileSolve) cudaProfilerStart(); -// #endif -// // Operator and Multivector type that will be used with Belos -// typedef MultiVector MV; -// typedef Belos::OperatorT OP; - -// // Define Operator and Preconditioner -// Teuchos::RCP belosOp = Teuchos::rcp(new Belos::XpetraOp(A)); // Turns a Xpetra::Matrix object into a Belos operator -// Teuchos::RCP belosPrec; // Turns a MueLu::Hierarchy object into a Belos operator -// H->IsPreconditioner(true); -// belosPrec = Teuchos::rcp(new Belos::MueLuOp (H)); // Turns a MueLu::Hierarchy object into a Belos operator - -// // Construct a Belos LinearProblem object -// RCP > belosProblem = rcp(new Belos::LinearProblem(belosOp, X, B)); -// if(solvePreconditioned) belosProblem->setRightPrec(belosPrec); - -// bool set = belosProblem->setProblem(); -// if (set == false) { -// out << "\nERROR: Belos::LinearProblem failed to set up correctly!" << std::endl; -// return EXIT_FAILURE; -// } - -// // Belos parameter list -// Teuchos::ParameterList belosList; -// belosList.set("Maximum Iterations", maxIts); // Maximum number of iterations allowed -// belosList.set("Convergence Tolerance", tol); // Relative convergence tolerance requested -// belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); -// belosList.set("Output Frequency", 1); -// belosList.set("Output Style", Belos::Brief); -// if (!scaleResidualHist) -// belosList.set("Implicit Residual Scaling", "None"); - -// // Create an iterative solver manager -// RCP< Belos::SolverManager > solver; -// solver = rcp(new Belos::BlockGmresSolMgr(belosProblem, rcp(&belosList, false))); - -// // Perform solve -// Belos::ReturnType retStatus = Belos::Unconverged; -// retStatus = solver->solve(); - -// // Get the number of iterations for this solve. -// out << "Number of iterations performed for this solve: " << solver->getNumIters() << std::endl; -// // Check convergence -// if (retStatus != Belos::Converged) -// out << std::endl << "ERROR: Belos did not converge! " << std::endl; -// else -// out << std::endl << "SUCCESS: Belos converged!" << std::endl; -// #ifdef HAVE_MUELU_CUDA -// if(profileSolve) cudaProfilerStop(); -// #endif -// #endif //ifdef HAVE_MUELU_BELOS + // #ifdef HAVE_MUELU_CUDA + // if(profileSetup) cudaProfilerStart(); + // #endif + + // tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 2 - MueLu Setup"))); + // RCP H; + // RCP Prec; + // A->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); + + // const std::string userName = "user data"; + // Teuchos::ParameterList& userParamList = paramList.sublist(userName); + // userParamList.set("int numDimensions", numDimensions); + // userParamList.set >("Array lNodesPerDim", lNodesPerDim); + // userParamList.set >("Coordinates", coordinates); + // H = MueLu::CreateXpetraPreconditioner(A, paramList, paramList); + + // comm->barrier(); + // tm = Teuchos::null; + + // #ifdef HAVE_MUELU_CUDA + // if(profileSolve) cudaProfilerStop(); + // #endif + + // tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 3 - LHS and RHS initialization"))); + // X->putScalar(zero); + // tm = Teuchos::null; + + // #ifdef HAVE_MUELU_BELOS + // tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 5 - Belos Solve"))); + // #ifdef HAVE_MUELU_CUDA + // if(profileSolve) cudaProfilerStart(); + // #endif + // // Operator and Multivector type that will be used with Belos + // typedef MultiVector MV; + // typedef Belos::OperatorT OP; + + // // Define Operator and Preconditioner + // Teuchos::RCP belosOp = Teuchos::rcp(new Belos::XpetraOp(A)); // Turns a Xpetra::Matrix object into a Belos operator + // Teuchos::RCP belosPrec; // Turns a MueLu::Hierarchy object into a Belos operator + // H->IsPreconditioner(true); + // belosPrec = Teuchos::rcp(new Belos::MueLuOp (H)); // Turns a MueLu::Hierarchy object into a Belos operator + + // // Construct a Belos LinearProblem object + // RCP > belosProblem = rcp(new Belos::LinearProblem(belosOp, X, B)); + // if(solvePreconditioned) belosProblem->setRightPrec(belosPrec); + + // bool set = belosProblem->setProblem(); + // if (set == false) { + // out << "\nERROR: Belos::LinearProblem failed to set up correctly!" << std::endl; + // return EXIT_FAILURE; + // } + + // // Belos parameter list + // Teuchos::ParameterList belosList; + // belosList.set("Maximum Iterations", maxIts); // Maximum number of iterations allowed + // belosList.set("Convergence Tolerance", tol); // Relative convergence tolerance requested + // belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); + // belosList.set("Output Frequency", 1); + // belosList.set("Output Style", Belos::Brief); + // if (!scaleResidualHist) + // belosList.set("Implicit Residual Scaling", "None"); + + // // Create an iterative solver manager + // RCP< Belos::SolverManager > solver; + // solver = rcp(new Belos::BlockGmresSolMgr(belosProblem, rcp(&belosList, false))); + + // // Perform solve + // Belos::ReturnType retStatus = Belos::Unconverged; + // retStatus = solver->solve(); + + // // Get the number of iterations for this solve. + // out << "Number of iterations performed for this solve: " << solver->getNumIters() << std::endl; + // // Check convergence + // if (retStatus != Belos::Converged) + // out << std::endl << "ERROR: Belos did not converge! " << std::endl; + // else + // out << std::endl << "SUCCESS: Belos converged!" << std::endl; + // #ifdef HAVE_MUELU_CUDA + // if(profileSolve) cudaProfilerStop(); + // #endif + // #endif //ifdef HAVE_MUELU_BELOS comm->barrier(); - tm = Teuchos::null; + tm = Teuchos::null; globalTimeMonitor = Teuchos::null; RCP reportParams = rcp(new ParameterList); - const std::string filter = ""; + const std::string filter = ""; std::ios_base::fmtflags ff(out.flags()); TimeMonitor::report(comm.ptr(), out, filter, reportParams); out << std::setiosflags(ff); @@ -1048,5 +1012,5 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar #include "MueLu_Test_ETI.hpp" int main(int argc, char *argv[]) { - return Automatic_Test_ETI(argc,argv); + return Automatic_Test_ETI(argc, argv); } diff --git a/packages/muelu/research/max/AdditiveMG/ADRProblemFactory.hpp b/packages/muelu/research/max/AdditiveMG/ADRProblemFactory.hpp index 21a7ce6c8195..3574d40dba79 100644 --- a/packages/muelu/research/max/AdditiveMG/ADRProblemFactory.hpp +++ b/packages/muelu/research/max/AdditiveMG/ADRProblemFactory.hpp @@ -59,30 +59,33 @@ namespace ADR { - namespace Xpetra { +namespace Xpetra { - using Teuchos::RCP; +using Teuchos::RCP; - template - RCP > BuildProblem(const std::string &MatrixType, const RCP& map, Teuchos::ParameterList& list) { - RCP > P; +template +RCP > BuildProblem(const std::string& MatrixType, const RCP& map, Teuchos::ParameterList& list) { + RCP > P; - if (MatrixType == "ADR1D") P.reset(new ADR1DProblem (list, map)); - else if (MatrixType == "ADR2D") P.reset(new ADR2DProblem (list, map)); - else if (MatrixType == "ADR3D") P.reset(new ADR3DProblem (list, map)); - - else - TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, - "`MatrixType' has incorrect value (" << MatrixType << ") in input to function CreateCrsMatrix()." - << "Check the documentation for a list of valid choices"); + if (MatrixType == "ADR1D") + P.reset(new ADR1DProblem(list, map)); + else if (MatrixType == "ADR2D") + P.reset(new ADR2DProblem(list, map)); + else if (MatrixType == "ADR3D") + P.reset(new ADR3DProblem(list, map)); - P->setObjectLabel(MatrixType); + else + TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, + "`MatrixType' has incorrect value (" << MatrixType << ") in input to function CreateCrsMatrix()." + << "Check the documentation for a list of valid choices"); - return P; - } + P->setObjectLabel(MatrixType); - } // namespace Xpetra + return P; +} -} // namespace ADR +} // namespace Xpetra -#endif //ifndef ADRPROBLEMFACTORY_HPP +} // namespace ADR + +#endif // ifndef ADRPROBLEMFACTORY_HPP diff --git a/packages/muelu/research/max/AdditiveMG/ADR_Problem.hpp b/packages/muelu/research/max/AdditiveMG/ADR_Problem.hpp index 3c41e11eafd7..731ca4cad57c 100644 --- a/packages/muelu/research/max/AdditiveMG/ADR_Problem.hpp +++ b/packages/muelu/research/max/AdditiveMG/ADR_Problem.hpp @@ -7,71 +7,73 @@ namespace ADR { - namespace Xpetra { +namespace Xpetra { - enum { - DIR_LEFT = 0x01, - DIR_RIGHT = 0x02, - DIR_BOTTOM = 0x04, - DIR_TOP = 0x08, - DIR_FRONT = 0x10, - DIR_BACK = 0x20, - DIR_ALL = DIR_LEFT | DIR_RIGHT | DIR_BOTTOM | DIR_TOP | DIR_FRONT | DIR_BACK - }; - typedef size_t DirBC; +enum { + DIR_LEFT = 0x01, + DIR_RIGHT = 0x02, + DIR_BOTTOM = 0x04, + DIR_TOP = 0x08, + DIR_FRONT = 0x10, + DIR_BACK = 0x20, + DIR_ALL = DIR_LEFT | DIR_RIGHT | DIR_BOTTOM | DIR_TOP | DIR_FRONT | DIR_BACK +}; +typedef size_t DirBC; - template - class Problem : public Teuchos::Describable { - public: - Problem(Teuchos::ParameterList& list) : list_(list) { - SetBoundary(); - }; - Problem(Teuchos::ParameterList& list, const Teuchos::RCP& map) : list_(list) { - Map_ = map; - SetBoundary(); - }; - virtual ~Problem() { } +template +class Problem : public Teuchos::Describable { + public: + Problem(Teuchos::ParameterList& list) + : list_(list) { + SetBoundary(); + }; + Problem(Teuchos::ParameterList& list, const Teuchos::RCP& map) + : list_(list) { + Map_ = map; + SetBoundary(); + }; + virtual ~Problem() {} - virtual Teuchos::RCP BuildMatrix() = 0; - virtual Teuchos::RCP BuildCoords() { - TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "Coordinates construction is not implemented for this problem"); - } - virtual Teuchos::RCP BuildNullspace() { - TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "Nullspace construction is not implemented for this problem"); - } + virtual Teuchos::RCP BuildMatrix() = 0; + virtual Teuchos::RCP BuildCoords() { + TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "Coordinates construction is not implemented for this problem"); + } + virtual Teuchos::RCP BuildNullspace() { + TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "Nullspace construction is not implemented for this problem"); + } - // Get methods - Teuchos::RCP getMap() const { return Map_; } - Teuchos::RCP getMatrix() const { return A_; } - Teuchos::RCP getNullspace() const { return Nullspace_; } - Teuchos::RCP getCoords() const { return Coords_; } + // Get methods + Teuchos::RCP getMap() const { return Map_; } + Teuchos::RCP getMatrix() const { return A_; } + Teuchos::RCP getNullspace() const { return Nullspace_; } + Teuchos::RCP getCoords() const { return Coords_; } - // Set methods - Teuchos::RCP setMap(const Teuchos::RCP& map) { Map_ = map; } + // Set methods + Teuchos::RCP setMap(const Teuchos::RCP& map) { Map_ = map; } - protected: - Teuchos::ParameterList& list_; - Teuchos::RCP Map_; - Teuchos::RCP A_; - Teuchos::RCP Nullspace_; - Teuchos::RCP Coords_; + protected: + Teuchos::ParameterList& list_; + Teuchos::RCP Map_; + Teuchos::RCP A_; + Teuchos::RCP Nullspace_; + Teuchos::RCP Coords_; - DirBC DirichletBC_; + DirBC DirichletBC_; - private: - void SetBoundary() { - DirichletBC_ = DIR_ALL; - if (this->list_.get("left boundary", "Dirichlet") == "Neumann") DirichletBC_ ^= DIR_LEFT; - if (this->list_.get("right boundary", "Dirichlet") == "Neumann") DirichletBC_ ^= DIR_RIGHT; - if (this->list_.get("bottom boundary", "Dirichlet") == "Neumann") DirichletBC_ ^= DIR_BOTTOM; - if (this->list_.get("top boundary", "Dirichlet") == "Neumann") DirichletBC_ ^= DIR_TOP; - if (this->list_.get("front boundary", "Dirichlet") == "Neumann") DirichletBC_ ^= DIR_FRONT; - if (this->list_.get("back boundary", "Dirichlet") == "Neumann") DirichletBC_ ^= DIR_BACK; - } - }; + private: + void SetBoundary() { + DirichletBC_ = DIR_ALL; + if (this->list_.get("left boundary", "Dirichlet") == "Neumann") DirichletBC_ ^= DIR_LEFT; + if (this->list_.get("right boundary", "Dirichlet") == "Neumann") DirichletBC_ ^= DIR_RIGHT; + if (this->list_.get("bottom boundary", "Dirichlet") == "Neumann") DirichletBC_ ^= DIR_BOTTOM; + if (this->list_.get("top boundary", "Dirichlet") == "Neumann") DirichletBC_ ^= DIR_TOP; + if (this->list_.get("front boundary", "Dirichlet") == "Neumann") DirichletBC_ ^= DIR_FRONT; + if (this->list_.get("back boundary", "Dirichlet") == "Neumann") DirichletBC_ ^= DIR_BACK; + } +}; - } // namespace Xpetra +} // namespace Xpetra -} // namespace ADR +} // namespace ADR -#endif // ADR_PROBLEM_HPP +#endif // ADR_PROBLEM_HPP diff --git a/packages/muelu/research/max/AdditiveMG/ADR_XpetraParameters.hpp b/packages/muelu/research/max/AdditiveMG/ADR_XpetraParameters.hpp index 66705fc2055c..98c3ca2e7595 100644 --- a/packages/muelu/research/max/AdditiveMG/ADR_XpetraParameters.hpp +++ b/packages/muelu/research/max/AdditiveMG/ADR_XpetraParameters.hpp @@ -13,183 +13,203 @@ namespace ADR { - namespace Xpetra { - - // TODO nx/ny/nz == GO or global_size_t ? But what is the best to do? - - template - class Parameters : public Teuchos::VerboseObject >, public Teuchos::Describable { - public: - - Parameters(Teuchos::CommandLineProcessor& clp, GO nx = 16, GO ny = -1, GO nz = -1, const std::string& matrixType = "ADR1D", - int keepBCs = 0, double stretchx = 1.0, double stretchy = 1.0, double stretchz = 1.0, double h = 1.0, double delta = 0.0, - int PMLXL = 0, int PMLXR = 0, int PMLYL = 0, int PMLYR = 0, int PMLZL = 0, int PMLZR = 0, - double omega = 2.0*M_PI, double shift = 0.5, GO mx = -1, GO my = -1, GO mz = -1, int model = 0) - : nx_(nx), ny_(ny), nz_(nz), mx_(mx), my_(my), mz_(mz), stretchx_(stretchx), stretchy_(stretchy), stretchz_(stretchz), matrixType_(matrixType), keepBCs_(keepBCs), - h_(h), delta_(delta), PMLx_left(PMLXL), PMLx_right(PMLXR), PMLy_left(PMLYL), PMLy_right(PMLYR), PMLz_left(PMLZL), PMLz_right(PMLZR), - omega_(omega), shift_(shift), model_(model) { - clp.setOption("nx", &nx_, "mesh points in x-direction."); - clp.setOption("ny", &ny_, "mesh points in y-direction."); - clp.setOption("nz", &nz_, "mesh points in z-direction."); - clp.setOption("mx", &mx_, "processors in x-direction."); - clp.setOption("my", &my_, "processors in y-direction."); - clp.setOption("mz", &mz_, "processors in z-direction."); - clp.setOption("stretchx", &stretchx_, "stretch mesh in x-direction."); - clp.setOption("stretchy", &stretchy_, "stretch mesh in y-direction."); - clp.setOption("stretchz", &stretchz_, "stretch mesh in z-direction."); - clp.setOption("keepBCs", &keepBCs_, "keep Dirichlet boundary rows in matrix (0=false,1=true)"); - clp.setOption("matrixType", &matrixType_, "matrix type: Laplace1D, Laplace2D, Laplace3D, ..."); //TODO: Star2D, numGlobalElements=... - clp.setOption("h", &h_, "mesh width for uniform h"); - clp.setOption("delta", &delta_, "maximum PML damping value"); - clp.setOption("PMLx_left", &PMLx_left, "PML grid points in x-direction (left boundary)"); - clp.setOption("PMLx_right", &PMLx_right, "PML grid points in x-direction (right boundary)"); - clp.setOption("PMLy_left", &PMLy_left, "PML grid points in y-direction (left boundary)"); - clp.setOption("PMLy_right", &PMLy_right, "PML grid points in y-direction (right boundary)"); - clp.setOption("PMLz_left", &PMLz_left, "PML grid points in z-direction (left boundary)"); - clp.setOption("PMLz_right", &PMLz_right, "PML grid points in z-direction (right boundary)"); - clp.setOption("omega", &omega_, "angular frequency omega"); - clp.setOption("shift", &shift_, "complex frequency shift"); - clp.setOption("mx", &mx_, "processors in x-direction."); - clp.setOption("my", &my_, "processors in y-direction."); - clp.setOption("mz", &mz_, "processors in z-direction."); - clp.setOption("model", &model_, "velocity model"); - } - - GO GetNumGlobalElements() const { - const Teuchos::ParameterList& pL = GetParameterList(); - - const std::string& matrixType = pL.get("matrixType"); - const GO nx = pL.get("nx"); - const GO ny = pL.get("ny"); - const GO nz = pL.get("nz"); - - GO numGlobalElements = -1; - if (matrixType == "ADR1D") - numGlobalElements = nx; - - else if (matrixType == "ADR2D") - numGlobalElements = nx*ny; - - else if (matrixType == "ADR3D") - numGlobalElements = nx*ny*nz; - - TEUCHOS_TEST_FOR_EXCEPTION(numGlobalElements < 0, std::runtime_error, - "Gallery: numGlobalElements < 0 (did you forget --ny (or --nz) for 2D (3D) problems?)"); - - return numGlobalElements; - } - - const std::string& GetMatrixType() const { - const Teuchos::ParameterList& paramList = GetParameterList(); - return paramList.get("matrixType"); - } - - void check() const { } - - Teuchos::ParameterList& GetParameterList() const { - if (!paramList_.is_null()) - return *paramList_; - - paramList_ = rcp(new Teuchos::ParameterList()); - - paramList_->set("nx", nx_); - paramList_->set("ny", ny_); - paramList_->set("nz", nz_); - paramList_->set("mx", mx_); - paramList_->set("my", my_); - paramList_->set("mz", mz_); - paramList_->set("model", model_); - paramList_->set("stretchx", stretchx_); - paramList_->set("stretchy", stretchy_); - paramList_->set("stretchz", stretchz_); - paramList_->set("keepBCs", static_cast(keepBCs_)); - paramList_->set("matrixType", matrixType_); - paramList_->set("h", h_); - paramList_->set("delta", delta_); - paramList_->set("PMLx_left", PMLx_left); - paramList_->set("PMLx_right", PMLx_right); - paramList_->set("PMLy_left", PMLy_left); - paramList_->set("PMLy_right", PMLy_right); - paramList_->set("PMLz_left", PMLz_left); - paramList_->set("PMLz_right", PMLz_right); - paramList_->set("omega", omega_); - paramList_->set("shift", shift_); - - check(); - - return *paramList_; - } - - //! @name Overridden from Teuchos::Describable - //@{ - - //! Return a simple one-line description of this object. - std::string description() const { - std::ostringstream out; - out << Teuchos::Describable::description(); - out << "{type = " << GetMatrixType() << ", size = " << GetNumGlobalElements() << "} "; - - return out.str(); - } - - //! Print the object with some verbosity level to an FancyOStream object. - void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel = verbLevel_default) const { - int vl = (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel; - if (vl == Teuchos::VERB_NONE) - return; - - if (vl == Teuchos::VERB_LOW) - out << description() << std::endl; - else - out << Teuchos::Describable::description() << std::endl; - - if (vl == Teuchos:: VERB_MEDIUM || vl == Teuchos::VERB_HIGH || vl == Teuchos::VERB_EXTREME) { - Teuchos::OSTab tab1(out); - - const Teuchos::ParameterList& paramList = GetParameterList(); - std::string matrixType = paramList.get("matrixType"); - GO nx = paramList.get("nx"); - GO ny = paramList.get("ny"); - GO nz = paramList.get("nz"); - - out << "Matrix type: " << matrixType << std::endl - << "Problem size: " << GetNumGlobalElements(); - - if (matrixType == "Laplace2D" || matrixType == "Elasticity2D" || matrixType == "Helmholtz2D") out << " (" << nx << "x" << ny << ")"; - else if (matrixType == "Laplace3D" || matrixType == "Elasticity3D" || matrixType == "Helmholtz3D") out << " (" << nx << "x" << ny << "x" << nz << ")"; - - out << std::endl; - } - } - - //@} - - private: - // See Teuchos BUG 5249: https://software.sandia.gov/bugzilla/show_bug.cgi?id=5249 - mutable GO nx_, ny_, nz_; - mutable GO mx_, my_, mz_; - mutable double stretchx_, stretchy_, stretchz_; - - std::string matrixType_; - - mutable int keepBCs_; - - mutable double h_; - mutable double delta_; - mutable int PMLx_left, PMLx_right; - mutable int PMLy_left, PMLy_right; - mutable int PMLz_left, PMLz_right; - mutable double omega_; - mutable double shift_; - mutable int model_; - - // There is a major assumption here: - // As soon as somebody call GetParameterList(), we freeze all other variables into the list, - // and ignore them. This allows us to make the modification of the list from outside. - mutable Teuchos::RCP paramList_; - }; +namespace Xpetra { + +// TODO nx/ny/nz == GO or global_size_t ? But what is the best to do? + +template +class Parameters : public Teuchos::VerboseObject >, public Teuchos::Describable { + public: + Parameters(Teuchos::CommandLineProcessor& clp, GO nx = 16, GO ny = -1, GO nz = -1, const std::string& matrixType = "ADR1D", + int keepBCs = 0, double stretchx = 1.0, double stretchy = 1.0, double stretchz = 1.0, double h = 1.0, double delta = 0.0, + int PMLXL = 0, int PMLXR = 0, int PMLYL = 0, int PMLYR = 0, int PMLZL = 0, int PMLZR = 0, + double omega = 2.0 * M_PI, double shift = 0.5, GO mx = -1, GO my = -1, GO mz = -1, int model = 0) + : nx_(nx) + , ny_(ny) + , nz_(nz) + , mx_(mx) + , my_(my) + , mz_(mz) + , stretchx_(stretchx) + , stretchy_(stretchy) + , stretchz_(stretchz) + , matrixType_(matrixType) + , keepBCs_(keepBCs) + , h_(h) + , delta_(delta) + , PMLx_left(PMLXL) + , PMLx_right(PMLXR) + , PMLy_left(PMLYL) + , PMLy_right(PMLYR) + , PMLz_left(PMLZL) + , PMLz_right(PMLZR) + , omega_(omega) + , shift_(shift) + , model_(model) { + clp.setOption("nx", &nx_, "mesh points in x-direction."); + clp.setOption("ny", &ny_, "mesh points in y-direction."); + clp.setOption("nz", &nz_, "mesh points in z-direction."); + clp.setOption("mx", &mx_, "processors in x-direction."); + clp.setOption("my", &my_, "processors in y-direction."); + clp.setOption("mz", &mz_, "processors in z-direction."); + clp.setOption("stretchx", &stretchx_, "stretch mesh in x-direction."); + clp.setOption("stretchy", &stretchy_, "stretch mesh in y-direction."); + clp.setOption("stretchz", &stretchz_, "stretch mesh in z-direction."); + clp.setOption("keepBCs", &keepBCs_, "keep Dirichlet boundary rows in matrix (0=false,1=true)"); + clp.setOption("matrixType", &matrixType_, "matrix type: Laplace1D, Laplace2D, Laplace3D, ..."); // TODO: Star2D, numGlobalElements=... + clp.setOption("h", &h_, "mesh width for uniform h"); + clp.setOption("delta", &delta_, "maximum PML damping value"); + clp.setOption("PMLx_left", &PMLx_left, "PML grid points in x-direction (left boundary)"); + clp.setOption("PMLx_right", &PMLx_right, "PML grid points in x-direction (right boundary)"); + clp.setOption("PMLy_left", &PMLy_left, "PML grid points in y-direction (left boundary)"); + clp.setOption("PMLy_right", &PMLy_right, "PML grid points in y-direction (right boundary)"); + clp.setOption("PMLz_left", &PMLz_left, "PML grid points in z-direction (left boundary)"); + clp.setOption("PMLz_right", &PMLz_right, "PML grid points in z-direction (right boundary)"); + clp.setOption("omega", &omega_, "angular frequency omega"); + clp.setOption("shift", &shift_, "complex frequency shift"); + clp.setOption("mx", &mx_, "processors in x-direction."); + clp.setOption("my", &my_, "processors in y-direction."); + clp.setOption("mz", &mz_, "processors in z-direction."); + clp.setOption("model", &model_, "velocity model"); + } + + GO GetNumGlobalElements() const { + const Teuchos::ParameterList& pL = GetParameterList(); + + const std::string& matrixType = pL.get("matrixType"); + const GO nx = pL.get("nx"); + const GO ny = pL.get("ny"); + const GO nz = pL.get("nz"); + + GO numGlobalElements = -1; + if (matrixType == "ADR1D") + numGlobalElements = nx; + + else if (matrixType == "ADR2D") + numGlobalElements = nx * ny; + + else if (matrixType == "ADR3D") + numGlobalElements = nx * ny * nz; + + TEUCHOS_TEST_FOR_EXCEPTION(numGlobalElements < 0, std::runtime_error, + "Gallery: numGlobalElements < 0 (did you forget --ny (or --nz) for 2D (3D) problems?)"); + + return numGlobalElements; + } + + const std::string& GetMatrixType() const { + const Teuchos::ParameterList& paramList = GetParameterList(); + return paramList.get("matrixType"); + } + + void check() const {} + + Teuchos::ParameterList& GetParameterList() const { + if (!paramList_.is_null()) + return *paramList_; + + paramList_ = rcp(new Teuchos::ParameterList()); + + paramList_->set("nx", nx_); + paramList_->set("ny", ny_); + paramList_->set("nz", nz_); + paramList_->set("mx", mx_); + paramList_->set("my", my_); + paramList_->set("mz", mz_); + paramList_->set("model", model_); + paramList_->set("stretchx", stretchx_); + paramList_->set("stretchy", stretchy_); + paramList_->set("stretchz", stretchz_); + paramList_->set("keepBCs", static_cast(keepBCs_)); + paramList_->set("matrixType", matrixType_); + paramList_->set("h", h_); + paramList_->set("delta", delta_); + paramList_->set("PMLx_left", PMLx_left); + paramList_->set("PMLx_right", PMLx_right); + paramList_->set("PMLy_left", PMLy_left); + paramList_->set("PMLy_right", PMLy_right); + paramList_->set("PMLz_left", PMLz_left); + paramList_->set("PMLz_right", PMLz_right); + paramList_->set("omega", omega_); + paramList_->set("shift", shift_); + + check(); + + return *paramList_; + } + + //! @name Overridden from Teuchos::Describable + //@{ + + //! Return a simple one-line description of this object. + std::string description() const { + std::ostringstream out; + out << Teuchos::Describable::description(); + out << "{type = " << GetMatrixType() << ", size = " << GetNumGlobalElements() << "} "; + return out.str(); } -} + + //! Print the object with some verbosity level to an FancyOStream object. + void describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel = verbLevel_default) const { + int vl = (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel; + if (vl == Teuchos::VERB_NONE) + return; + + if (vl == Teuchos::VERB_LOW) + out << description() << std::endl; + else + out << Teuchos::Describable::description() << std::endl; + + if (vl == Teuchos::VERB_MEDIUM || vl == Teuchos::VERB_HIGH || vl == Teuchos::VERB_EXTREME) { + Teuchos::OSTab tab1(out); + + const Teuchos::ParameterList& paramList = GetParameterList(); + std::string matrixType = paramList.get("matrixType"); + GO nx = paramList.get("nx"); + GO ny = paramList.get("ny"); + GO nz = paramList.get("nz"); + + out << "Matrix type: " << matrixType << std::endl + << "Problem size: " << GetNumGlobalElements(); + + if (matrixType == "Laplace2D" || matrixType == "Elasticity2D" || matrixType == "Helmholtz2D") + out << " (" << nx << "x" << ny << ")"; + else if (matrixType == "Laplace3D" || matrixType == "Elasticity3D" || matrixType == "Helmholtz3D") + out << " (" << nx << "x" << ny << "x" << nz << ")"; + + out << std::endl; + } + } + + //@} + + private: + // See Teuchos BUG 5249: https://software.sandia.gov/bugzilla/show_bug.cgi?id=5249 + mutable GO nx_, ny_, nz_; + mutable GO mx_, my_, mz_; + mutable double stretchx_, stretchy_, stretchz_; + + std::string matrixType_; + + mutable int keepBCs_; + + mutable double h_; + mutable double delta_; + mutable int PMLx_left, PMLx_right; + mutable int PMLy_left, PMLy_right; + mutable int PMLz_left, PMLz_right; + mutable double omega_; + mutable double shift_; + mutable int model_; + + // There is a major assumption here: + // As soon as somebody call GetParameterList(), we freeze all other variables into the list, + // and ignore them. This allows us to make the modification of the list from outside. + mutable Teuchos::RCP paramList_; +}; + +} // namespace Xpetra +} // namespace ADR #endif diff --git a/packages/muelu/research/max/AdditiveMG/BAP.hpp b/packages/muelu/research/max/AdditiveMG/BAP.hpp index 72344805ce6d..62faea3a20de 100644 --- a/packages/muelu/research/max/AdditiveMG/BAP.hpp +++ b/packages/muelu/research/max/AdditiveMG/BAP.hpp @@ -3,250 +3,222 @@ #include "neighbours.hpp" - -typedef Tpetra::CrsMatrix tpetra_matrix_type; -typedef Tpetra::MultiVector::magnitudeType,int,int,KokkosClassic::DefaultNode::DefaultNodeType> tpetra_multivector_type; -typedef typename Teuchos::ArrayView::const_iterator iterator_type; +typedef Tpetra::CrsMatrix tpetra_matrix_type; +typedef Tpetra::MultiVector::magnitudeType, int, int, KokkosClassic::DefaultNode::DefaultNodeType> tpetra_multivector_type; +typedef typename Teuchos::ArrayView::const_iterator iterator_type; typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType node_type2; -void BAP1D(Teuchos::RCP BAP, Teuchos::RCP tpetra_prolong, Teuchos::RCP BAP_shrunk, Teuchos::RCP< const Teuchos::Comm > comm) -{ - - //INPUT: BAP = matrix where the uncompressed version of B_DD * A_h * Ptentative must be stored - //INPUT: tpetra_prolong = Ptentative - //INPUT: BAP_shrunk = Tpetra:MultiVector contatining the shrunk version of B_DD * A_h * Ptentative resulting from domain decomposition with coloring - //INPUT: comm = MPI communicator (MPI_COMM_WORLD) - - Teuchos::ArrayView myLocalElements = BAP->getRowMap()->getLocalElementList(); - int mypid = comm->getRank(); - - for(int color = 0; color<3; ++color) - { - Teuchos::ArrayRCP localBAP = BAP_shrunk->getData(color); - - for (iterator_type it = myLocalElements.begin(); it != myLocalElements.end(); ++it) - { - const int i_local = *it; - const int aux = BAP->getRowMap()->getLocalElement (i_local); - - std::vector BAP_inds; - std::vector BAP_vals; - - int aux2; - - if( (mypid-1)%3==color && (mypid-1)>= 0 && (mypid-1)getGlobalNumCols() ) - aux2 = BAP->getColMap()->getLocalElement (mypid-1); - else if( (mypid-2)%3==color && (mypid-2)>= 0 && (mypid-2)getGlobalNumCols() ) - aux2 = BAP->getColMap()->getLocalElement (mypid-2); - else if( (mypid)%3==color && (mypid)>= 0 && (mypid)getGlobalNumCols() ) - aux2 = BAP->getColMap()->getLocalElement (mypid); - - if(aux2>=0) - { - BAP_inds.emplace_back(aux2); - BAP_vals.emplace_back(localBAP[aux]); - BAP->insertLocalValues(aux, BAP_inds, BAP_vals); - } - } - - } +void BAP1D(Teuchos::RCP BAP, Teuchos::RCP tpetra_prolong, Teuchos::RCP BAP_shrunk, Teuchos::RCP > comm) { + // INPUT: BAP = matrix where the uncompressed version of B_DD * A_h * Ptentative must be stored + // INPUT: tpetra_prolong = Ptentative + // INPUT: BAP_shrunk = Tpetra:MultiVector contatining the shrunk version of B_DD * A_h * Ptentative resulting from domain decomposition with coloring + // INPUT: comm = MPI communicator (MPI_COMM_WORLD) + Teuchos::ArrayView myLocalElements = BAP->getRowMap()->getLocalElementList(); + int mypid = comm->getRank(); + + for (int color = 0; color < 3; ++color) { + Teuchos::ArrayRCP localBAP = BAP_shrunk->getData(color); + + for (iterator_type it = myLocalElements.begin(); it != myLocalElements.end(); ++it) { + const int i_local = *it; + const int aux = BAP->getRowMap()->getLocalElement(i_local); + + std::vector BAP_inds; + std::vector BAP_vals; + + int aux2; + + if ((mypid - 1) % 3 == color && (mypid - 1) >= 0 && (mypid - 1) < tpetra_prolong->getGlobalNumCols()) + aux2 = BAP->getColMap()->getLocalElement(mypid - 1); + else if ((mypid - 2) % 3 == color && (mypid - 2) >= 0 && (mypid - 2) < tpetra_prolong->getGlobalNumCols()) + aux2 = BAP->getColMap()->getLocalElement(mypid - 2); + else if ((mypid) % 3 == color && (mypid) >= 0 && (mypid) < tpetra_prolong->getGlobalNumCols()) + aux2 = BAP->getColMap()->getLocalElement(mypid); + + if (aux2 >= 0) { + BAP_inds.emplace_back(aux2); + BAP_vals.emplace_back(localBAP[aux]); + BAP->insertLocalValues(aux, BAP_inds, BAP_vals); + } + } + } } +void BAP2D(Teuchos::RCP BAP, Teuchos::RCP tpetra_prolong, Teuchos::RCP BAP_shrunk, Teuchos::RCP > comm, int ndx) { + // INPUT: BAP = matrix where the uncompressed version of B_DD * A_h * Ptentative must be stored + // INPUT: tpetra_prolong = Ptentative + // INPUT: BAP_shrunk = Tpetra:MultiVector contatining the shrunk version of B_DD * A_h * Ptentative resulting from domain decomposition with coloring + // INPUT: comm = MPI communicator (MPI_COMM_WORLD) + // INPUT: ndx = number of domains along x-direction -void BAP2D(Teuchos::RCP BAP, Teuchos::RCP tpetra_prolong, Teuchos::RCP BAP_shrunk, Teuchos::RCP< const Teuchos::Comm > comm, int ndx) -{ - - //INPUT: BAP = matrix where the uncompressed version of B_DD * A_h * Ptentative must be stored - //INPUT: tpetra_prolong = Ptentative - //INPUT: BAP_shrunk = Tpetra:MultiVector contatining the shrunk version of B_DD * A_h * Ptentative resulting from domain decomposition with coloring - //INPUT: comm = MPI communicator (MPI_COMM_WORLD) - //INPUT: ndx = number of domains along x-direction - - Teuchos::ArrayView myLocalElements = BAP->getRowMap()->getLocalElementList(); - int mypid = comm->getRank(); - int brick_id = mypid; - int shifted_id = brick_id-1; + Teuchos::ArrayView myLocalElements = BAP->getRowMap()->getLocalElementList(); + int mypid = comm->getRank(); + int brick_id = mypid; + int shifted_id = brick_id - 1; - if(mypid>0) - { - for(int color = 0; color<9; ++color) - { - int neighbour = -1; - Teuchos::ArrayRCP localBAP = BAP_shrunk->getData(color); + if (mypid > 0) { + for (int color = 0; color < 9; ++color) { + int neighbour = -1; + Teuchos::ArrayRCP localBAP = BAP_shrunk->getData(color); - //The following if statements control the neighbours of a subdomain in a 2D brick partitioned mesh - if( coloring2D( brick_id, ndx )==color && (shifted_id)>= 0 && (shifted_id)getGlobalNumCols() ) - neighbour = (shifted_id); + // The following if statements control the neighbours of a subdomain in a 2D brick partitioned mesh + if (coloring2D(brick_id, ndx) == color && (shifted_id) >= 0 && (shifted_id) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id); - else if( coloring2D( brick_id-1, ndx )==color && (shifted_id-1)>= 0 && (shifted_id-1)getGlobalNumCols() ) - neighbour = (shifted_id-1); + else if (coloring2D(brick_id - 1, ndx) == color && (shifted_id - 1) >= 0 && (shifted_id - 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - 1); - else if( coloring2D( brick_id+1, ndx )==color && (shifted_id+1)>= 0 && (shifted_id+1)getGlobalNumCols() ) - neighbour = (shifted_id+1); + else if (coloring2D(brick_id + 1, ndx) == color && (shifted_id + 1) >= 0 && (shifted_id + 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + 1); - else if( coloring2D( brick_id-ndx, ndx )==color && (shifted_id-ndx)>= 0 && (shifted_id-ndx)getGlobalNumCols() ) - neighbour = (shifted_id-ndx); + else if (coloring2D(brick_id - ndx, ndx) == color && (shifted_id - ndx) >= 0 && (shifted_id - ndx) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx); - else if( coloring2D( brick_id-ndx-1, ndx )==color && (shifted_id-ndx-1)>= 0 && (shifted_id-ndx-1)getGlobalNumCols() ) - neighbour = (shifted_id-ndx-1); + else if (coloring2D(brick_id - ndx - 1, ndx) == color && (shifted_id - ndx - 1) >= 0 && (shifted_id - ndx - 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx - 1); - else if( coloring2D( brick_id-ndx+1, ndx )==color && (shifted_id-ndx+1)>= 0 && (shifted_id-ndx+1)getGlobalNumCols() ) - neighbour = (shifted_id-ndx+1); + else if (coloring2D(brick_id - ndx + 1, ndx) == color && (shifted_id - ndx + 1) >= 0 && (shifted_id - ndx + 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx + 1); - else if( coloring2D( brick_id+ndx, ndx )==color && (shifted_id+ndx)>= 0 && (shifted_id+ndx)getGlobalNumCols() ) - neighbour = (shifted_id+ndx); + else if (coloring2D(brick_id + ndx, ndx) == color && (shifted_id + ndx) >= 0 && (shifted_id + ndx) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx); - else if( coloring2D( brick_id+ndx-1, ndx )==color && (shifted_id+ndx-1)>= 0 && (shifted_id+ndx-1)getGlobalNumCols() ) - neighbour = (shifted_id+ndx-1); + else if (coloring2D(brick_id + ndx - 1, ndx) == color && (shifted_id + ndx - 1) >= 0 && (shifted_id + ndx - 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx - 1); - else if( coloring2D( brick_id+ndx+1, ndx )==color && (shifted_id+ndx+1)>= 0 && (shifted_id+ndx+1)getGlobalNumCols() ) - neighbour = (shifted_id+ndx+1); + else if (coloring2D(brick_id + ndx + 1, ndx) == color && (shifted_id + ndx + 1) >= 0 && (shifted_id + ndx + 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx + 1); - //in case neighbour>=0, it means that the current MPI processor (=subdomain) has a neighbour with the color analyzed at the current for-loop iteration - //otherwise the current MPI processor sits on the boundary of the mesh and it has less than 8 neighbours. - if(neighbour>=0) - { - for (iterator_type it = myLocalElements.begin(); it != myLocalElements.end(); ++it) - { - const int i_local = *it; - const int aux = BAP->getRowMap()->getLocalElement (i_local); + // in case neighbour>=0, it means that the current MPI processor (=subdomain) has a neighbour with the color analyzed at the current for-loop iteration + // otherwise the current MPI processor sits on the boundary of the mesh and it has less than 8 neighbours. + if (neighbour >= 0) { + for (iterator_type it = myLocalElements.begin(); it != myLocalElements.end(); ++it) { + const int i_local = *it; + const int aux = BAP->getRowMap()->getLocalElement(i_local); - std::vector BAP_inds; - std::vector BAP_vals; + std::vector BAP_inds; + std::vector BAP_vals; - int aux2; + int aux2; - aux2 = BAP->getColMap()->getLocalElement (neighbour); - - if(aux2>=0) - { - BAP_inds.emplace_back(aux2); - BAP_vals.emplace_back(localBAP[aux]); - BAP->insertLocalValues(aux, BAP_inds, BAP_vals); - } - } - } + aux2 = BAP->getColMap()->getLocalElement(neighbour); - } - } + if (aux2 >= 0) { + BAP_inds.emplace_back(aux2); + BAP_vals.emplace_back(localBAP[aux]); + BAP->insertLocalValues(aux, BAP_inds, BAP_vals); + } + } + } + } + } } -void BAP3D(Teuchos::RCP BAP, Teuchos::RCP tpetra_prolong, Teuchos::RCP BAP_shrunk, Teuchos::RCP< const Teuchos::Comm > comm, int ndx, int ndy) -{ - - //INPUT: BAP = matrix where the uncompressed version of B_DD * A_h * Ptentative must be stored - //INPUT: tpetra_prolong = Ptentative - //INPUT: BAP_shrunk = Tpetra:MultiVector contatining the shrunk version of B_DD * A_h * Ptentative resulting from domain decomposition with coloring - //INPUT: comm = MPI communicator (MPI_COMM_WORLD) - //INPUT: ndx = number of domains along x-direction - //INPUT: ndy = number of domains along y-direction +void BAP3D(Teuchos::RCP BAP, Teuchos::RCP tpetra_prolong, Teuchos::RCP BAP_shrunk, Teuchos::RCP > comm, int ndx, int ndy) { + // INPUT: BAP = matrix where the uncompressed version of B_DD * A_h * Ptentative must be stored + // INPUT: tpetra_prolong = Ptentative + // INPUT: BAP_shrunk = Tpetra:MultiVector contatining the shrunk version of B_DD * A_h * Ptentative resulting from domain decomposition with coloring + // INPUT: comm = MPI communicator (MPI_COMM_WORLD) + // INPUT: ndx = number of domains along x-direction + // INPUT: ndy = number of domains along y-direction Teuchos::ArrayView myLocalElements = BAP->getRowMap()->getLocalElementList(); - int mypid = comm->getRank(); - int brick_id = mypid; - int shifted_id = brick_id - 1; - - if(mypid>0) - { - for(int color = 0; color<27; ++color) - { - int neighbour = -1; - Teuchos::ArrayRCP localBAP = BAP_shrunk->getData(color); - - //The following if statements control the neighbours of a subdomain in a 3D brick partitioned mesh - //Each subdomains is incorporated in a 3x3x3 cube which is sliced into 3 squares living on three different planes - //The neighbours of a subdomain are checked plane by plane: in total there are three planes to span - // - //Identification of neighbours that are on the current plane - if( coloring3D( brick_id, ndx, ndy )==color && (shifted_id)>= 0 && (shifted_id)getGlobalNumCols() ) - neighbour = (shifted_id); - else if( coloring3D( brick_id-1, ndx, ndy )==color && (shifted_id-1)>= 0 && (shifted_id-1)getGlobalNumCols() ) - neighbour = (shifted_id-1); - else if( coloring3D( brick_id+1, ndx, ndy )==color && (shifted_id+1)>= 0 && (shifted_id+1)getGlobalNumCols() ) - neighbour = (shifted_id+1); - else if( coloring3D( brick_id-ndx, ndx, ndy )==color && (shifted_id-ndx)>= 0 && (shifted_id-ndx)getGlobalNumCols() ) - neighbour = (shifted_id-ndx); - else if( coloring3D( brick_id-ndx-1, ndx, ndy )==color && (shifted_id-ndx-1)>= 0 && (shifted_id-ndx-1)getGlobalNumCols() ) - neighbour = (shifted_id-ndx-1); - else if( coloring3D( brick_id-ndx+1, ndx, ndy )==color && (shifted_id-ndx+1)>= 0 && (shifted_id-ndx+1)getGlobalNumCols() ) - neighbour = (shifted_id-ndx+1); - else if( coloring3D( brick_id+ndx, ndx, ndy )==color && (shifted_id+ndx)>= 0 && (shifted_id+ndx)getGlobalNumCols() ) - neighbour = (shifted_id+ndx); - else if( coloring3D( brick_id+ndx-1, ndx, ndy )==color && (shifted_id+ndx-1)>= 0 && (shifted_id+ndx-1)getGlobalNumCols() ) - neighbour = (shifted_id+ndx-1); - else if( coloring3D( brick_id+ndx+1, ndx, ndy )==color && (shifted_id+ndx+1)>= 0 && (shifted_id+ndx+1)getGlobalNumCols() ) - neighbour = (shifted_id+ndx+1); - - //Identification of the neighbours that are on the plane below - else if( coloring3D( brick_id-ndx*ndy, ndx, ndy )==color && (shifted_id-ndx*ndy)>= 0 && (shifted_id-ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id-ndx*ndy); - else if( coloring3D( brick_id-1-ndx*ndy, ndx, ndy )==color && (shifted_id-1-ndx*ndy)>= 0 && (shifted_id-1-ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id-1-ndx*ndy); - else if( coloring3D( brick_id+1-ndx*ndy, ndx, ndy )==color && (shifted_id+1-ndx*ndy)>= 0 && (shifted_id+1-ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id+1-ndx*ndy); - else if( coloring3D( brick_id-ndx-ndx*ndy, ndx, ndy )==color && (shifted_id-ndx-ndx*ndy)>= 0 && (shifted_id-ndx-ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id-ndx-ndx*ndy); - else if( coloring3D( brick_id-ndx-1-ndx*ndy, ndx, ndy )==color && (shifted_id-ndx-1-ndx*ndy)>= 0 && (shifted_id-ndx-1-ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id-ndx-1-ndx*ndy); - else if( coloring3D( brick_id-ndx+1-ndx*ndy, ndx, ndy )==color && (shifted_id-ndx+1-ndx*ndy)>= 0 && (shifted_id-ndx+1-ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id-ndx+1-ndx*ndy); - else if( coloring3D( brick_id+ndx-ndx*ndy, ndx, ndy )==color && (shifted_id+ndx-ndx*ndy)>= 0 && (shifted_id+ndx-ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id+ndx-ndx*ndy); - else if( coloring3D( brick_id+ndx-1-ndx*ndy, ndx, ndy )==color && (shifted_id+ndx-1-ndx*ndy)>= 0 && (shifted_id+ndx-1-ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id+ndx-1-ndx*ndy); - else if( coloring3D( brick_id+ndx+1-ndx*ndy, ndx, ndy )==color && (shifted_id+ndx+1-ndx*ndy)>= 0 && (shifted_id+ndx+1-ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id+ndx+1-ndx*ndy); - - //Identification of the neighbours that are on the plane above - else if( coloring3D( brick_id+ndx*ndy, ndx, ndy )==color && (shifted_id+ndx*ndy)>= 0 && (shifted_id+ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id+ndx*ndy); - else if( coloring3D( brick_id-1+ndx*ndy, ndx, ndy )==color && (shifted_id-1+ndx*ndy)>= 0 && (shifted_id-1+ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id-1+ndx*ndy); - else if( coloring3D( brick_id+1+ndx*ndy, ndx, ndy )==color && (shifted_id+1+ndx*ndy)>= 0 && (shifted_id+1+ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id+1+ndx*ndy); - else if( coloring3D( brick_id-ndx+ndx*ndy, ndx, ndy )==color && (shifted_id-ndx+ndx*ndy)>= 0 && (shifted_id-ndx+ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id-ndx+ndx*ndy); - else if( coloring3D( brick_id-ndx-1+ndx*ndy, ndx, ndy )==color && (shifted_id-ndx-1+ndx*ndy)>= 0 && (shifted_id-ndx-1+ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id-ndx-1+ndx*ndy); - else if( coloring3D( brick_id-ndx+1+ndx*ndy, ndx, ndy )==color && (shifted_id-ndx+1+ndx*ndy)>= 0 && (shifted_id-ndx+1+ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id-ndx+1+ndx*ndy); - else if( coloring3D( brick_id+ndx+ndx*ndy, ndx, ndy )==color && (shifted_id+ndx+ndx*ndy)>= 0 && (shifted_id+ndx+ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id+ndx+ndx*ndy); - else if( coloring3D( brick_id+ndx-1+ndx*ndy, ndx, ndy )==color && (shifted_id+ndx-1+ndx*ndy)>= 0 && (shifted_id+ndx-1+ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id+ndx-1+ndx*ndy); - else if( coloring3D( brick_id+ndx+1+ndx*ndy, ndx, ndy )==color && (shifted_id+ndx+1+ndx*ndy)>= 0 && (shifted_id+ndx+1+ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id+ndx+1+ndx*ndy); - - //in case neighbour>=0, it means that the current MPI processor (=subdomain) has a neighbour with the color analyzed at the current for-loop iteration - //otherwise the current MPI processor sits on the boundary of the mesh and it has less than 26 neighbours. - if(neighbour>=0) - { - for (iterator_type it = myLocalElements.begin(); it != myLocalElements.end(); ++it) - { - const int i_local = *it; - const int aux = BAP->getRowMap()->getLocalElement (i_local); - - std::vector BAP_inds; - std::vector BAP_vals; - - int aux2; - - aux2 = BAP->getColMap()->getLocalElement (neighbour); - - if(aux2>=0) - { - BAP_inds.emplace_back(aux2); - BAP_vals.emplace_back(localBAP[aux]); - BAP->insertLocalValues(aux, BAP_inds, BAP_vals); - } - else - std::cout<<"ID: "<getRank(); + int brick_id = mypid; + int shifted_id = brick_id - 1; + + if (mypid > 0) { + for (int color = 0; color < 27; ++color) { + int neighbour = -1; + Teuchos::ArrayRCP localBAP = BAP_shrunk->getData(color); + + // The following if statements control the neighbours of a subdomain in a 3D brick partitioned mesh + // Each subdomains is incorporated in a 3x3x3 cube which is sliced into 3 squares living on three different planes + // The neighbours of a subdomain are checked plane by plane: in total there are three planes to span + // + // Identification of neighbours that are on the current plane + if (coloring3D(brick_id, ndx, ndy) == color && (shifted_id) >= 0 && (shifted_id) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id); + else if (coloring3D(brick_id - 1, ndx, ndy) == color && (shifted_id - 1) >= 0 && (shifted_id - 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - 1); + else if (coloring3D(brick_id + 1, ndx, ndy) == color && (shifted_id + 1) >= 0 && (shifted_id + 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + 1); + else if (coloring3D(brick_id - ndx, ndx, ndy) == color && (shifted_id - ndx) >= 0 && (shifted_id - ndx) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx); + else if (coloring3D(brick_id - ndx - 1, ndx, ndy) == color && (shifted_id - ndx - 1) >= 0 && (shifted_id - ndx - 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx - 1); + else if (coloring3D(brick_id - ndx + 1, ndx, ndy) == color && (shifted_id - ndx + 1) >= 0 && (shifted_id - ndx + 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx + 1); + else if (coloring3D(brick_id + ndx, ndx, ndy) == color && (shifted_id + ndx) >= 0 && (shifted_id + ndx) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx); + else if (coloring3D(brick_id + ndx - 1, ndx, ndy) == color && (shifted_id + ndx - 1) >= 0 && (shifted_id + ndx - 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx - 1); + else if (coloring3D(brick_id + ndx + 1, ndx, ndy) == color && (shifted_id + ndx + 1) >= 0 && (shifted_id + ndx + 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx + 1); + + // Identification of the neighbours that are on the plane below + else if (coloring3D(brick_id - ndx * ndy, ndx, ndy) == color && (shifted_id - ndx * ndy) >= 0 && (shifted_id - ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx * ndy); + else if (coloring3D(brick_id - 1 - ndx * ndy, ndx, ndy) == color && (shifted_id - 1 - ndx * ndy) >= 0 && (shifted_id - 1 - ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - 1 - ndx * ndy); + else if (coloring3D(brick_id + 1 - ndx * ndy, ndx, ndy) == color && (shifted_id + 1 - ndx * ndy) >= 0 && (shifted_id + 1 - ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + 1 - ndx * ndy); + else if (coloring3D(brick_id - ndx - ndx * ndy, ndx, ndy) == color && (shifted_id - ndx - ndx * ndy) >= 0 && (shifted_id - ndx - ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx - ndx * ndy); + else if (coloring3D(brick_id - ndx - 1 - ndx * ndy, ndx, ndy) == color && (shifted_id - ndx - 1 - ndx * ndy) >= 0 && (shifted_id - ndx - 1 - ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx - 1 - ndx * ndy); + else if (coloring3D(brick_id - ndx + 1 - ndx * ndy, ndx, ndy) == color && (shifted_id - ndx + 1 - ndx * ndy) >= 0 && (shifted_id - ndx + 1 - ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx + 1 - ndx * ndy); + else if (coloring3D(brick_id + ndx - ndx * ndy, ndx, ndy) == color && (shifted_id + ndx - ndx * ndy) >= 0 && (shifted_id + ndx - ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx - ndx * ndy); + else if (coloring3D(brick_id + ndx - 1 - ndx * ndy, ndx, ndy) == color && (shifted_id + ndx - 1 - ndx * ndy) >= 0 && (shifted_id + ndx - 1 - ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx - 1 - ndx * ndy); + else if (coloring3D(brick_id + ndx + 1 - ndx * ndy, ndx, ndy) == color && (shifted_id + ndx + 1 - ndx * ndy) >= 0 && (shifted_id + ndx + 1 - ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx + 1 - ndx * ndy); + + // Identification of the neighbours that are on the plane above + else if (coloring3D(brick_id + ndx * ndy, ndx, ndy) == color && (shifted_id + ndx * ndy) >= 0 && (shifted_id + ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx * ndy); + else if (coloring3D(brick_id - 1 + ndx * ndy, ndx, ndy) == color && (shifted_id - 1 + ndx * ndy) >= 0 && (shifted_id - 1 + ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - 1 + ndx * ndy); + else if (coloring3D(brick_id + 1 + ndx * ndy, ndx, ndy) == color && (shifted_id + 1 + ndx * ndy) >= 0 && (shifted_id + 1 + ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + 1 + ndx * ndy); + else if (coloring3D(brick_id - ndx + ndx * ndy, ndx, ndy) == color && (shifted_id - ndx + ndx * ndy) >= 0 && (shifted_id - ndx + ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx + ndx * ndy); + else if (coloring3D(brick_id - ndx - 1 + ndx * ndy, ndx, ndy) == color && (shifted_id - ndx - 1 + ndx * ndy) >= 0 && (shifted_id - ndx - 1 + ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx - 1 + ndx * ndy); + else if (coloring3D(brick_id - ndx + 1 + ndx * ndy, ndx, ndy) == color && (shifted_id - ndx + 1 + ndx * ndy) >= 0 && (shifted_id - ndx + 1 + ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx + 1 + ndx * ndy); + else if (coloring3D(brick_id + ndx + ndx * ndy, ndx, ndy) == color && (shifted_id + ndx + ndx * ndy) >= 0 && (shifted_id + ndx + ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx + ndx * ndy); + else if (coloring3D(brick_id + ndx - 1 + ndx * ndy, ndx, ndy) == color && (shifted_id + ndx - 1 + ndx * ndy) >= 0 && (shifted_id + ndx - 1 + ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx - 1 + ndx * ndy); + else if (coloring3D(brick_id + ndx + 1 + ndx * ndy, ndx, ndy) == color && (shifted_id + ndx + 1 + ndx * ndy) >= 0 && (shifted_id + ndx + 1 + ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx + 1 + ndx * ndy); + + // in case neighbour>=0, it means that the current MPI processor (=subdomain) has a neighbour with the color analyzed at the current for-loop iteration + // otherwise the current MPI processor sits on the boundary of the mesh and it has less than 26 neighbours. + if (neighbour >= 0) { + for (iterator_type it = myLocalElements.begin(); it != myLocalElements.end(); ++it) { + const int i_local = *it; + const int aux = BAP->getRowMap()->getLocalElement(i_local); + + std::vector BAP_inds; + std::vector BAP_vals; + + int aux2; + + aux2 = BAP->getColMap()->getLocalElement(neighbour); + + if (aux2 >= 0) { + BAP_inds.emplace_back(aux2); + BAP_vals.emplace_back(localBAP[aux]); + BAP->insertLocalValues(aux, BAP_inds, BAP_vals); + } else + std::cout << "ID: " << mypid << " does not reach " << neighbour + 1 << std::endl; + } + } + } + } } - - diff --git a/packages/muelu/research/max/AdditiveMG/CreateADRMatrix.hpp b/packages/muelu/research/max/AdditiveMG/CreateADRMatrix.hpp index e936baf87926..f233e57f4cc9 100644 --- a/packages/muelu/research/max/AdditiveMG/CreateADRMatrix.hpp +++ b/packages/muelu/research/max/AdditiveMG/CreateADRMatrix.hpp @@ -54,439 +54,462 @@ namespace ADR { - namespace Xpetra { +namespace Xpetra { - // ============================================= ADR1D ============================================= - template - class ADR1DProblem : public Problem { - public: - ADR1DProblem(Teuchos::ParameterList& list, const Teuchos::RCP& map) : Problem(list, map) { } - Teuchos::RCP BuildMatrix(); +// ============================================= ADR1D ============================================= +template +class ADR1DProblem : public Problem { + public: + ADR1DProblem(Teuchos::ParameterList& list, const Teuchos::RCP& map) + : Problem(list, map) {} + Teuchos::RCP BuildMatrix(); - private: + private: + // domain definition + Scalar xleft = 0.0; + Scalar xright = 1.0; - //domain definition - Scalar xleft = 0.0; - Scalar xright = 1.0; + // Function that defines the diffusion coefficient + inline Scalar diff(Scalar x) { return 1.0 + x; }; - //Function that defines the diffusion coefficient - inline Scalar diff( Scalar x){return 1.0 + x;}; + // Function that defines the first derivative of the diffusion coefficient + inline Scalar diff_prime(Scalar x) { return 1.0; }; - //Function that defines the first derivative of the diffusion coefficient - inline Scalar diff_prime( Scalar x ){return 1.0;}; + // Function that defines the advection coefficient + inline Scalar adv(Scalar x) { return 10.0 * x; }; - //Function that defines the advection coefficient - inline Scalar adv( Scalar x){return 10.0 * x;}; + // Function taht defines the reaction coefficient + inline Scalar reac(Scalar x) { return 0.0; }; +}; - //Function taht defines the reaction coefficient - inline Scalar reac( Scalar x ){return 0.0;}; - }; +template +Teuchos::RCP ADR1DProblem::BuildMatrix() { + GlobalOrdinal nx = this->list_.get("nx", (GlobalOrdinal)-1); - template - Teuchos::RCP ADR1DProblem::BuildMatrix() { - GlobalOrdinal nx = this->list_.get("nx", (GlobalOrdinal) -1); + if (nx == -1) + nx = this->Map_->getGlobalNumElements(); - if (nx == -1) - nx = this->Map_->getGlobalNumElements(); + const Scalar dx = (Scalar)(xright - xleft) / static_cast(nx - 1); - const Scalar dx = (Scalar) (xright - xleft)/static_cast(nx - 1); + const Scalar a = 2.0; + const Scalar b = -1.0; + const Scalar c = -1.0; - const Scalar a = 2.0; - const Scalar b = -1.0; - const Scalar c = -1.0; + // this->A_ = Galeri::Xpetra::TriDiag(this->Map_, nx, 2.0, -1.0, -1.0); + Teuchos::RCP mtx = Galeri::Xpetra::MatrixTraits::Build(this->Map_, 3); - //this->A_ = Galeri::Xpetra::TriDiag(this->Map_, nx, 2.0, -1.0, -1.0); - Teuchos::RCP mtx = Galeri::Xpetra::MatrixTraits::Build(this->Map_, 3); + LocalOrdinal NumMyElements = this->Map_->getLocalNumElements(); + Teuchos::ArrayView MyGlobalElements = this->Map_->getLocalElementList(); + GlobalOrdinal indexBase = this->Map_->getIndexBase(); - LocalOrdinal NumMyElements = this->Map_->getLocalNumElements(); - Teuchos::ArrayView MyGlobalElements = this->Map_->getLocalElementList(); - GlobalOrdinal indexBase = this->Map_->getIndexBase(); + Teuchos::RCP > comm = this->Map_->getComm(); - Teuchos::RCP > comm = this->Map_->getComm(); + GlobalOrdinal NumGlobalElements = this->Map_->getGlobalNumElements(); - GlobalOrdinal NumGlobalElements = this->Map_->getGlobalNumElements(); + GlobalOrdinal NumEntries; + LocalOrdinal nnz = 2; + std::vector Values(nnz); + std::vector Indices(nnz); - GlobalOrdinal NumEntries; - LocalOrdinal nnz = 2; - std::vector Values(nnz); - std::vector Indices(nnz); + comm->barrier(); - comm->barrier(); + Teuchos::RCP timer = Teuchos::rcp(new Teuchos::Time("1D Assembler global insert")); + timer->start(true); - Teuchos::RCP timer = Teuchos::rcp(new Teuchos::Time("1D Assembler global insert")); - timer->start(true); + // c a b + for (LocalOrdinal i = 0; i < NumMyElements; i++) { + if (MyGlobalElements[i] == indexBase) { + // off-diagonal for first row + Indices[0] = 1 + indexBase; - // c a b - for (LocalOrdinal i = 0; i < NumMyElements; i++) { - if (MyGlobalElements[i] == indexBase) { - // off-diagonal for first row - Indices[0] = 1 + indexBase; + // Diffusive term + Values[0] = b * diff(xleft + MyGlobalElements[i] * dx) / (dx * dx); + Values[0] = Values[0] - diff_prime(xleft + MyGlobalElements[i] * dx) / (2.0 * dx); - //Diffusive term - Values [0] = b * diff( xleft + MyGlobalElements[i]*dx )/(dx * dx); - Values [0] = Values[0] - diff_prime( xleft + MyGlobalElements[i]*dx )/(2.0 * dx); + // Advective term + Values[0] = Values[0] + adv(xleft + MyGlobalElements[i] * dx) / (2.0 * dx); - //Advective term - Values [0] = Values[0] + adv( xleft + MyGlobalElements[i]*dx )/(2.0 * dx); + NumEntries = 1; - NumEntries = 1; + } else if (MyGlobalElements[i] == NumGlobalElements + indexBase - 1) { + // off-diagonal for last row + Indices[0] = NumGlobalElements - 2 + indexBase; - } else if (MyGlobalElements[i] == NumGlobalElements + indexBase - 1) { - // off-diagonal for last row - Indices[0] = NumGlobalElements - 2 + indexBase; + // Diffusive term + Values[0] = c * diff(xleft + MyGlobalElements[i] * dx) / (dx * dx); + Values[0] = Values[0] + diff_prime(xleft + MyGlobalElements[i] * dx) / (2.0 * dx); - //Diffusive term - Values [0] = c * diff( xleft + MyGlobalElements[i]*dx )/(dx * dx); - Values [0] = Values[0] + diff_prime( xleft + MyGlobalElements[i]*dx )/(2.0 * dx); + // Advective term + Values[0] = Values[0] - adv(xleft + MyGlobalElements[i] * dx) / (2.0 * dx); - //Advective term - Values [0] = Values[0] - adv( xleft + MyGlobalElements[i]*dx )/(2.0 * dx); + NumEntries = 1; - NumEntries = 1; + } else { + // off-diagonal for internal row + Indices[0] = MyGlobalElements[i] - 1; + Indices[1] = MyGlobalElements[i] + 1; - } else { - // off-diagonal for internal row - Indices[0] = MyGlobalElements[i] - 1; - Indices[1] = MyGlobalElements[i] + 1; + // Diffusive term + Values[0] = c * diff(xleft + MyGlobalElements[i] * dx) / (dx * dx); + Values[0] = Values[0] + diff_prime(xleft + MyGlobalElements[i] * dx) / (2.0 * dx); + Values[1] = b * diff(xleft + MyGlobalElements[i] * dx) / (dx * dx); + Values[1] = Values[1] - diff_prime(xleft + MyGlobalElements[i] * dx) / (2.0 * dx); - //Diffusive term - Values [0] = c * diff( xleft + MyGlobalElements[i]*dx )/(dx * dx); - Values [0] = Values[0] + diff_prime( xleft + MyGlobalElements[i]*dx )/(2.0 * dx); - Values [1] = b * diff( xleft + MyGlobalElements[i]*dx )/(dx * dx); - Values [1] = Values[1] - diff_prime( xleft + MyGlobalElements[i]*dx )/(2.0 * dx); - - //Advective term - Values [0] = Values[0] - adv( xleft + MyGlobalElements[i]*dx )/(2.0 * dx); - Values [1] = Values[1] + adv( xleft + MyGlobalElements[i]*dx )/(2.0 * dx); + // Advective term + Values[0] = Values[0] - adv(xleft + MyGlobalElements[i] * dx) / (2.0 * dx); + Values[1] = Values[1] + adv(xleft + MyGlobalElements[i] * dx) / (2.0 * dx); - NumEntries = 2; - } - - // put the off-diagonal entries - // Xpetra wants ArrayViews (sigh) - Teuchos::ArrayView av(&Values [0], NumEntries); - Teuchos::ArrayView iv(&Indices[0], NumEntries); - mtx->insertGlobalValues(MyGlobalElements[i], iv, av); - - // Put in the diagonal entry - //Diffusion - Scalar diag_entry = a * diff( xleft + MyGlobalElements[i]*dx )/(dx * dx); - //Reaction - diag_entry = diag_entry + reac( xleft + MyGlobalElements[i]*dx ); - - mtx->insertGlobalValues(MyGlobalElements[i], - Teuchos::tuple(MyGlobalElements[i]), - Teuchos::tuple( diag_entry )); - } - - timer->stop(); - - timer = Teuchos::rcp(new Teuchos::Time("1D Assembler fillComplete")); - timer->start(true); - - mtx->fillComplete(); - this->A_ = mtx; - - timer->stop(); - - this->A_->setObjectLabel(this->getObjectLabel()); - return this->A_; + NumEntries = 2; } + // put the off-diagonal entries + // Xpetra wants ArrayViews (sigh) + Teuchos::ArrayView av(&Values[0], NumEntries); + Teuchos::ArrayView iv(&Indices[0], NumEntries); + mtx->insertGlobalValues(MyGlobalElements[i], iv, av); + + // Put in the diagonal entry + // Diffusion + Scalar diag_entry = a * diff(xleft + MyGlobalElements[i] * dx) / (dx * dx); + // Reaction + diag_entry = diag_entry + reac(xleft + MyGlobalElements[i] * dx); + + mtx->insertGlobalValues(MyGlobalElements[i], + Teuchos::tuple(MyGlobalElements[i]), + Teuchos::tuple(diag_entry)); + } - // ============================================= ADR2D ============================================= - template - class ADR2DProblem : public Problem { - public: - ADR2DProblem(Teuchos::ParameterList& list, const Teuchos::RCP& map) : Problem(list, map) { } - Teuchos::RCP BuildMatrix(); - - private: - - //Function that defines the diffusion coefficient - inline Scalar diff( Scalar x, Scalar y ){return 1.0 + 0.0 * x + 0.0 * y;}; - - //Function that defines the first derivative in x-direction of the diffusion coefficient - inline Scalar diff_primex( Scalar x, Scalar y ){return 0.0 + 0.0 * x + 0.0 * y;}; + timer->stop(); + + timer = Teuchos::rcp(new Teuchos::Time("1D Assembler fillComplete")); + timer->start(true); - //Function that defines the first derivative in x-direction of the diffusion coefficient - inline Scalar diff_primey( Scalar x, Scalar y ){return 0.0 + 0.0 * x + 0.0 * y;}; - - //Function that defines the advection coefficient in the x-direction - inline Scalar advx( Scalar x, Scalar y ){return 10.0 + 0.0 * x + 0.0 * y;}; - - //Function that defines the advection coefficient in the x-direction - inline Scalar advy( Scalar x, Scalar y ){return 10.0 + 0.0 * x + 0.0 * y;}; - - //Function taht defines the reaction coefficient - inline Scalar reac( Scalar x, Scalar y ){return 0.0 + 0.0 * x + 0.0 * y;}; - - }; - - template - Teuchos::RCP ADR2DProblem::BuildMatrix() { - GlobalOrdinal nx = this->list_.get("nx", (GlobalOrdinal) -1); - GlobalOrdinal ny = this->list_.get("ny", (GlobalOrdinal) -1); - double one = 1.0; - Scalar stretchx = (Scalar) this->list_.get("stretchx", one); - Scalar stretchy = (Scalar) this->list_.get("stretchy", one); - - if (nx == -1 || ny == -1) { - GlobalOrdinal n = this->Map_->getGlobalNumElements(); - nx = (GlobalOrdinal)sqrt((double)n); - ny = nx; - TEUCHOS_TEST_FOR_EXCEPTION(nx*ny != n, std::logic_error, "You need to specify nx and ny."); + mtx->fillComplete(); + this->A_ = mtx; + + timer->stop(); + + this->A_->setObjectLabel(this->getObjectLabel()); + return this->A_; +} + +// ============================================= ADR2D ============================================= +template +class ADR2DProblem : public Problem { + public: + ADR2DProblem(Teuchos::ParameterList& list, const Teuchos::RCP& map) + : Problem(list, map) {} + Teuchos::RCP BuildMatrix(); + + private: + // Function that defines the diffusion coefficient + inline Scalar diff(Scalar x, Scalar y) { return 1.0 + 0.0 * x + 0.0 * y; }; + + // Function that defines the first derivative in x-direction of the diffusion coefficient + inline Scalar diff_primex(Scalar x, Scalar y) { return 0.0 + 0.0 * x + 0.0 * y; }; + + // Function that defines the first derivative in x-direction of the diffusion coefficient + inline Scalar diff_primey(Scalar x, Scalar y) { return 0.0 + 0.0 * x + 0.0 * y; }; + + // Function that defines the advection coefficient in the x-direction + inline Scalar advx(Scalar x, Scalar y) { return 10.0 + 0.0 * x + 0.0 * y; }; + + // Function that defines the advection coefficient in the x-direction + inline Scalar advy(Scalar x, Scalar y) { return 10.0 + 0.0 * x + 0.0 * y; }; + + // Function taht defines the reaction coefficient + inline Scalar reac(Scalar x, Scalar y) { return 0.0 + 0.0 * x + 0.0 * y; }; +}; + +template +Teuchos::RCP ADR2DProblem::BuildMatrix() { + GlobalOrdinal nx = this->list_.get("nx", (GlobalOrdinal)-1); + GlobalOrdinal ny = this->list_.get("ny", (GlobalOrdinal)-1); + double one = 1.0; + Scalar stretchx = (Scalar)this->list_.get("stretchx", one); + Scalar stretchy = (Scalar)this->list_.get("stretchy", one); + + if (nx == -1 || ny == -1) { + GlobalOrdinal n = this->Map_->getGlobalNumElements(); + nx = (GlobalOrdinal)sqrt((double)n); + ny = nx; + TEUCHOS_TEST_FOR_EXCEPTION(nx * ny != n, std::logic_error, "You need to specify nx and ny."); + } + bool keepBCs = this->list_.get("keepBCs", false); + + // Diffusion stencil + Scalar c1 = (Scalar)-one / (stretchx * stretchx); + Scalar b1 = (Scalar)-one / (stretchx * stretchx); + Scalar e1 = (Scalar)-one / (stretchy * stretchy); + Scalar d1 = (Scalar)-one / (stretchy * stretchy); + Scalar a1 = -(b1 + c1 + d1 + e1); + + // Advection stencil + Scalar c2 = (Scalar)one / (2.0 * stretchx); + Scalar b2 = (Scalar)one / (2.0 * stretchx); + Scalar e2 = (Scalar)one / (2.0 * stretchy); + Scalar d2 = (Scalar)one / (2.0 * stretchy); + + // this->A_ = Cross2D(this->Map_, nx, ny, center, west, east, south, north, this->DirichletBC_, keepBCs); + LocalOrdinal nnz = 5; + + Teuchos::RCP mtx = Galeri::Xpetra::MatrixTraits::Build(this->Map_, nnz); + + LocalOrdinal numMyElements = (this->Map_)->getLocalNumElements(); + GlobalOrdinal indexBase = (this->Map_)->getIndexBase(); + + Teuchos::ArrayView myGlobalElements = (this->Map_)->getLocalElementList(); + + GlobalOrdinal center, left, right, lower, upper; + std::vector vals(nnz); + std::vector inds(nnz); + + // e + // b a c + // d + for (LocalOrdinal i = 0; i < numMyElements; ++i) { + size_t n = 0; + + center = myGlobalElements[i] - indexBase; + + // Determine coordinates + Scalar x1 = (Scalar)(center % nx) * stretchx; + Scalar x2 = (Scalar)(std::floor(center / nx)) * stretchy; + + Galeri::Xpetra::GetNeighboursCartesian2d(center, nx, ny, left, right, lower, upper); + + bool isDirichlet = (left == -1 && (this->DirichletBC_ & DIR_LEFT)) || + (right == -1 && (this->DirichletBC_ & DIR_RIGHT)) || + (lower == -1 && (this->DirichletBC_ & DIR_BOTTOM)) || + (upper == -1 && (this->DirichletBC_ & DIR_TOP)); + + if (isDirichlet && keepBCs) { + // Dirichlet unknown we want to keep + inds[n] = center; + vals[n++] = Teuchos::ScalarTraits::one(); + + } else { + // The Neumann b.c. are treated in a sane way. The Dirichlet b.c., however, are treated + // insane when the option keepBCs=false. Speicifically, in this case we don't want to keep + // Dirichlet b.c., but that would result in inconsistency between the map and the number of + // degrees of freedom, plus the problem with GIDs. Therefore, we virtually expand domain by + // one node in the direction of the Dirichlet b.c., and then assume that that node was + // not kept. But we use an old GIDs. So yes, that's weird. + + if (left != -1) { + inds[n] = left; + vals[n++] = b1 * diff(x1, x2) + b2 * diff_primex(x1, x2) - b2 * advx(x1, x2); + } + if (right != -1) { + inds[n] = right; + vals[n++] = c1 * diff(x1, x2) - c2 * diff_primex(x1, x2) + c2 * advx(x1, x2); + } + if (lower != -1) { + inds[n] = lower; + vals[n++] = d1 * diff(x1, x2) + d2 * diff_primey(x1, x2) - d2 * advy(x1, x2); + } + if (upper != -1) { + inds[n] = upper; + vals[n++] = e1 * diff(x1, x2) - e2 * diff_primey(x1, x2) + e2 * advy(x1, x2); } - bool keepBCs = this->list_.get("keepBCs", false); - - //Diffusion stencil - Scalar c1 = (Scalar) -one / (stretchx*stretchx); - Scalar b1 = (Scalar) -one / (stretchx*stretchx); - Scalar e1 = (Scalar) -one / (stretchy*stretchy); - Scalar d1 = (Scalar) -one / (stretchy*stretchy); - Scalar a1 = -( b1 + c1 + d1 + e1 ); - - //Advection stencil - Scalar c2 = (Scalar) one / (2.0 * stretchx); - Scalar b2 = (Scalar) one / (2.0 * stretchx); - Scalar e2 = (Scalar) one / (2.0 * stretchy); - Scalar d2 = (Scalar) one / (2.0 * stretchy); - - //this->A_ = Cross2D(this->Map_, nx, ny, center, west, east, south, north, this->DirichletBC_, keepBCs); - LocalOrdinal nnz = 5; - - Teuchos::RCP mtx = Galeri::Xpetra::MatrixTraits::Build(this->Map_, nnz); - - LocalOrdinal numMyElements = (this->Map_)->getLocalNumElements(); - GlobalOrdinal indexBase = (this->Map_)->getIndexBase(); - - Teuchos::ArrayView myGlobalElements = (this->Map_)->getLocalElementList(); - - GlobalOrdinal center, left, right, lower, upper; - std::vector vals(nnz); - std::vector inds(nnz); - - // e - // b a c - // d - for (LocalOrdinal i = 0; i < numMyElements; ++i) { - size_t n = 0; - - center = myGlobalElements[i] - indexBase; - - //Determine coordinates - Scalar x1 = (Scalar) (center % nx) * stretchx; - Scalar x2 = (Scalar) (std::floor( center/nx )) * stretchy; - - Galeri::Xpetra::GetNeighboursCartesian2d(center, nx, ny, left, right, lower, upper); - - bool isDirichlet = (left == -1 && (this->DirichletBC_ & DIR_LEFT)) || - (right == -1 && (this->DirichletBC_ & DIR_RIGHT)) || - (lower == -1 && (this->DirichletBC_ & DIR_BOTTOM)) || - (upper == -1 && (this->DirichletBC_ & DIR_TOP)); - - if (isDirichlet && keepBCs) { - // Dirichlet unknown we want to keep - inds[n] = center; - vals[n++] = Teuchos::ScalarTraits::one(); - - } else { - // The Neumann b.c. are treated in a sane way. The Dirichlet b.c., however, are treated - // insane when the option keepBCs=false. Speicifically, in this case we don't want to keep - // Dirichlet b.c., but that would result in inconsistency between the map and the number of - // degrees of freedom, plus the problem with GIDs. Therefore, we virtually expand domain by - // one node in the direction of the Dirichlet b.c., and then assume that that node was - // not kept. But we use an old GIDs. So yes, that's weird. - - - if (left != -1) { inds[n] = left; vals[n++] = b1 * diff(x1,x2) + b2 * diff_primex(x1,x2) - b2 * advx(x1,x2); } - if (right != -1) { inds[n] = right; vals[n++] = c1 * diff(x1,x2) - c2 * diff_primex(x1,x2) + c2 * advx(x1,x2); } - if (lower != -1) { inds[n] = lower; vals[n++] = d1 * diff(x1,x2) + d2 * diff_primey(x1,x2) - d2 * advy(x1,x2); } - if (upper != -1) { inds[n] = upper; vals[n++] = e1 * diff(x1,x2) - e2 * diff_primey(x1,x2) + e2 * advy(x1,x2); } - - // diagonal - Scalar z = a1 * diff(x1,x2) + reac(x1,x2); - if (Galeri::Xpetra::IsBoundary2d(center, nx, ny) && !isDirichlet) { - // Neumann boundary unknown (diagonal = sum of all offdiagonal) - z = Teuchos::ScalarTraits::zero(); - for (size_t j = 0; j < n; j++) - z -= vals[j]; - } - inds[n] = center; - vals[n++] = z; - } + // diagonal + Scalar z = a1 * diff(x1, x2) + reac(x1, x2); + if (Galeri::Xpetra::IsBoundary2d(center, nx, ny) && !isDirichlet) { + // Neumann boundary unknown (diagonal = sum of all offdiagonal) + z = Teuchos::ScalarTraits::zero(); for (size_t j = 0; j < n; j++) - inds[j] += indexBase; - - Teuchos::ArrayView iv(&inds[0], n); - Teuchos::ArrayView av(&vals[0], n); - mtx->insertGlobalValues(myGlobalElements[i], iv, av); + z -= vals[j]; } - - mtx->fillComplete(); - this->A_ = mtx; - - this->A_->setObjectLabel(this->getObjectLabel()); - return this->A_; + inds[n] = center; + vals[n++] = z; } - // ============================================= ADR3D ============================================= - template - class ADR3DProblem : public Problem { - public: - ADR3DProblem(Teuchos::ParameterList& list, const Teuchos::RCP& map) : Problem(list, map) { } - Teuchos::RCP BuildMatrix(); - - private: - - //Function that defines the diffusion coefficient - inline Scalar diff( Scalar x, Scalar y, Scalar z ){return 1.0 + 0.0 * x + 0.0 * y + 0.0 * z;}; - - //Function that defines the first derivative in x-direction of the diffusion coefficient - inline Scalar diff_primex( Scalar x, Scalar y, Scalar z ){return 0.0 + 0.0 * x + 0.0 * y + 0.0 * z;}; - - //Function that defines the first derivative in x-direction of the diffusion coefficient - inline Scalar diff_primey( Scalar x, Scalar y, Scalar z ){return 0.0 + 0.0 * x + 0.0 * y + 0.0 * z;}; - - //Function that defines the first derivative in x-direction of the diffusion coefficient - inline Scalar diff_primez( Scalar x, Scalar y, Scalar z ){return 0.0 + 0.0 * x + 0.0 * y + 0.0 * z;}; - - //Function that defines the advection coefficient in the x-direction - inline Scalar advx( Scalar x, Scalar y, Scalar z ){return 10.0 + 0.0 * x + 0.0 * y + 0.0 * z;}; - - //Function that defines the advection coefficient in the x-direction - inline Scalar advy( Scalar x, Scalar y, Scalar z ){return 10.0 + 0.0 * x + 0.0 * y + 0.0 * z;}; - - //Function that defines the advection coefficient in the x-direction - inline Scalar advz( Scalar x, Scalar y, Scalar z ){return 10.0 + 0.0 * x + 0.0 * y + 0.0 * z;}; - - //Function taht defines the reaction coefficient - inline Scalar reac( Scalar x, Scalar y, Scalar z ){return 0.0 + 0.0 * x + 0.0 * y + 0.0 * z;}; - - }; - - template - Teuchos::RCP ADR3DProblem::BuildMatrix() { - GlobalOrdinal nx = this->list_.get("nx", (GlobalOrdinal) -1); - GlobalOrdinal ny = this->list_.get("ny", (GlobalOrdinal) -1); - GlobalOrdinal nz = this->list_.get("nz", (GlobalOrdinal) -1); - double one = 1.0; - Scalar stretchx = (Scalar) this->list_.get("stretchx", one); - Scalar stretchy = (Scalar) this->list_.get("stretchy", one); - Scalar stretchz = (Scalar) this->list_.get("stretchz", one); - - if (nx == -1 || ny == -1 || nz == -1) { - GlobalOrdinal n = this->Map_->getGlobalNumElements(); - nx = (GlobalOrdinal) Teuchos::ScalarTraits::pow(n, 0.33334); - ny = nx; nz = nx; - TEUCHOS_TEST_FOR_EXCEPTION(nx * ny * nz != n, std::logic_error, "You need to specify nx, ny, and nz"); + for (size_t j = 0; j < n; j++) + inds[j] += indexBase; + + Teuchos::ArrayView iv(&inds[0], n); + Teuchos::ArrayView av(&vals[0], n); + mtx->insertGlobalValues(myGlobalElements[i], iv, av); + } + + mtx->fillComplete(); + this->A_ = mtx; + + this->A_->setObjectLabel(this->getObjectLabel()); + return this->A_; +} + +// ============================================= ADR3D ============================================= +template +class ADR3DProblem : public Problem { + public: + ADR3DProblem(Teuchos::ParameterList& list, const Teuchos::RCP& map) + : Problem(list, map) {} + Teuchos::RCP BuildMatrix(); + + private: + // Function that defines the diffusion coefficient + inline Scalar diff(Scalar x, Scalar y, Scalar z) { return 1.0 + 0.0 * x + 0.0 * y + 0.0 * z; }; + + // Function that defines the first derivative in x-direction of the diffusion coefficient + inline Scalar diff_primex(Scalar x, Scalar y, Scalar z) { return 0.0 + 0.0 * x + 0.0 * y + 0.0 * z; }; + + // Function that defines the first derivative in x-direction of the diffusion coefficient + inline Scalar diff_primey(Scalar x, Scalar y, Scalar z) { return 0.0 + 0.0 * x + 0.0 * y + 0.0 * z; }; + + // Function that defines the first derivative in x-direction of the diffusion coefficient + inline Scalar diff_primez(Scalar x, Scalar y, Scalar z) { return 0.0 + 0.0 * x + 0.0 * y + 0.0 * z; }; + + // Function that defines the advection coefficient in the x-direction + inline Scalar advx(Scalar x, Scalar y, Scalar z) { return 10.0 + 0.0 * x + 0.0 * y + 0.0 * z; }; + + // Function that defines the advection coefficient in the x-direction + inline Scalar advy(Scalar x, Scalar y, Scalar z) { return 10.0 + 0.0 * x + 0.0 * y + 0.0 * z; }; + + // Function that defines the advection coefficient in the x-direction + inline Scalar advz(Scalar x, Scalar y, Scalar z) { return 10.0 + 0.0 * x + 0.0 * y + 0.0 * z; }; + + // Function taht defines the reaction coefficient + inline Scalar reac(Scalar x, Scalar y, Scalar z) { return 0.0 + 0.0 * x + 0.0 * y + 0.0 * z; }; +}; + +template +Teuchos::RCP ADR3DProblem::BuildMatrix() { + GlobalOrdinal nx = this->list_.get("nx", (GlobalOrdinal)-1); + GlobalOrdinal ny = this->list_.get("ny", (GlobalOrdinal)-1); + GlobalOrdinal nz = this->list_.get("nz", (GlobalOrdinal)-1); + double one = 1.0; + Scalar stretchx = (Scalar)this->list_.get("stretchx", one); + Scalar stretchy = (Scalar)this->list_.get("stretchy", one); + Scalar stretchz = (Scalar)this->list_.get("stretchz", one); + + if (nx == -1 || ny == -1 || nz == -1) { + GlobalOrdinal n = this->Map_->getGlobalNumElements(); + nx = (GlobalOrdinal)Teuchos::ScalarTraits::pow(n, 0.33334); + ny = nx; + nz = nx; + TEUCHOS_TEST_FOR_EXCEPTION(nx * ny * nz != n, std::logic_error, "You need to specify nx, ny, and nz"); + } + bool keepBCs = this->list_.get("keepBCs", false); + + // Diffusion stencil + Scalar c1 = (Scalar)-one / (stretchx * stretchx); + Scalar b1 = (Scalar)-one / (stretchx * stretchx); + Scalar g1 = (Scalar)-one / (stretchy * stretchy); + Scalar f1 = (Scalar)-one / (stretchy * stretchy); + Scalar e1 = (Scalar)-one / (stretchz * stretchz); + Scalar d1 = (Scalar)-one / (stretchz * stretchz); + Scalar a1 = -(c1 + b1 + g1 + f1 + e1 + d1); + + // Advection stencil + Scalar c2 = (Scalar)one / (2.0 * stretchx); + Scalar b2 = (Scalar)one / (2.0 * stretchx); + Scalar g2 = (Scalar)one / (2.0 * stretchy); + Scalar f2 = (Scalar)one / (2.0 * stretchy); + Scalar e2 = (Scalar)one / (2.0 * stretchz); + Scalar d2 = (Scalar)one / (2.0 * stretchz); + + // this->A_ = Cross3D(this->Map_, nx, ny, nz, center, left, right, front, back, down, up, this->DirichletBC_, keepBCs); + LocalOrdinal nnz = 7; + + Teuchos::RCP mtx = Galeri::Xpetra::MatrixTraits::Build(this->Map_, nnz); + + LocalOrdinal numMyElements = this->Map_->getLocalNumElements(); + GlobalOrdinal indexBase = this->Map_->getIndexBase(); + + Teuchos::ArrayView myGlobalElements = (this->Map_)->getLocalElementList(); + + GlobalOrdinal center, left, right, bottom, top, front, back; + std::vector inds(nnz); + std::vector vals(nnz); + + // e + // b a c + // d + // + f bottom and g top + for (LocalOrdinal i = 0; i < numMyElements; ++i) { + size_t n = 0; + + center = myGlobalElements[i] - indexBase; + + // Determine coordinates + Scalar x3 = (Scalar)(std::floor(center / (nx * ny))) * stretchz; + int plane = center % (nx * ny); + Scalar x1 = (Scalar)(plane % nx) * stretchx; + Scalar x2 = (Scalar)(std::floor(plane / nx)) * stretchy; + + Galeri::Xpetra::GetNeighboursCartesian3d(center, nx, ny, nz, + left, right, front, back, bottom, top); + + bool isDirichlet = (left == -1 && (this->DirichletBC_ & DIR_LEFT)) || + (right == -1 && (this->DirichletBC_ & DIR_RIGHT)) || + (front == -1 && (this->DirichletBC_ & DIR_BOTTOM)) || + (back == -1 && (this->DirichletBC_ & DIR_TOP)) || + (front == -1 && (this->DirichletBC_ & DIR_FRONT)) || + (back == -1 && (this->DirichletBC_ & DIR_BACK)); + + if (isDirichlet && keepBCs) { + // Dirichlet unknown we want to keep + inds[n] = center; + vals[n++] = Teuchos::ScalarTraits::one(); + + } else { + // See comments about weird in Cross2D + if (left != -1) { + inds[n] = left; + vals[n++] = b1 * diff(x1, x2, x3) + b2 * diff_primex(x1, x2, x3) - b2 * advx(x1, x2, x3); + } + if (right != -1) { + inds[n] = right; + vals[n++] = c1 * diff(x1, x2, x3) - c2 * diff_primex(x1, x2, x3) + c2 * advx(x1, x2, x3); + } + if (front != -1) { + inds[n] = front; + vals[n++] = d1 * diff(x1, x2, x3) + d2 * diff_primey(x1, x2, x3) - d2 * advy(x1, x2, x3); + } + if (back != -1) { + inds[n] = back; + vals[n++] = e1 * diff(x1, x2, x3) - e2 * diff_primey(x1, x2, x3) + e2 * advy(x1, x2, x3); + } + if (bottom != -1) { + inds[n] = bottom; + vals[n++] = f1 * diff(x1, x2, x3) + f2 * diff_primez(x1, x2, x3) - f2 * advz(x1, x2, x3); + } + if (top != -1) { + inds[n] = top; + vals[n++] = g1 * diff(x1, x2, x3) - g2 * diff_primez(x1, x2, x3) + g2 * advz(x1, x2, x3); } - bool keepBCs = this->list_.get("keepBCs", false); - - //Diffusion stencil - Scalar c1 = (Scalar) -one / (stretchx*stretchx); - Scalar b1 = (Scalar) -one / (stretchx*stretchx); - Scalar g1 = (Scalar) -one / (stretchy*stretchy); - Scalar f1 = (Scalar) -one / (stretchy*stretchy); - Scalar e1 = (Scalar) -one / (stretchz*stretchz); - Scalar d1 = (Scalar) -one / (stretchz*stretchz); - Scalar a1 = -(c1 + b1 + g1 + f1 + e1 + d1); - - //Advection stencil - Scalar c2 = (Scalar) one / (2.0 * stretchx); - Scalar b2 = (Scalar) one / (2.0 * stretchx); - Scalar g2 = (Scalar) one / (2.0 * stretchy); - Scalar f2 = (Scalar) one / (2.0 * stretchy); - Scalar e2 = (Scalar) one / (2.0 * stretchz); - Scalar d2 = (Scalar) one / (2.0 * stretchz); - - //this->A_ = Cross3D(this->Map_, nx, ny, nz, center, left, right, front, back, down, up, this->DirichletBC_, keepBCs); - LocalOrdinal nnz = 7; - - Teuchos::RCP mtx = Galeri::Xpetra::MatrixTraits::Build(this->Map_, nnz); - - LocalOrdinal numMyElements = this->Map_->getLocalNumElements(); - GlobalOrdinal indexBase = this->Map_->getIndexBase(); - - Teuchos::ArrayView myGlobalElements = (this->Map_)->getLocalElementList(); - - GlobalOrdinal center, left, right, bottom, top, front, back; - std::vector inds(nnz); - std::vector vals(nnz); - - // e - // b a c - // d - // + f bottom and g top - for (LocalOrdinal i = 0; i < numMyElements; ++i) { - size_t n = 0; - - center = myGlobalElements[i] - indexBase; - - //Determine coordinates - Scalar x3 = (Scalar) (std::floor( center/( nx*ny ) )) * stretchz; - int plane = center % (nx*ny); - Scalar x1 = (Scalar) (plane % nx) * stretchx; - Scalar x2 = (Scalar) (std::floor( plane/nx )) * stretchy; - - - Galeri::Xpetra::GetNeighboursCartesian3d(center, nx, ny, nz, - left, right, front, back, bottom, top); - - bool isDirichlet = (left == -1 && (this->DirichletBC_ & DIR_LEFT)) || - (right == -1 && (this->DirichletBC_ & DIR_RIGHT)) || - (front == -1 && (this->DirichletBC_ & DIR_BOTTOM)) || - (back == -1 && (this->DirichletBC_ & DIR_TOP)) || - (front == -1 && (this->DirichletBC_ & DIR_FRONT)) || - (back == -1 && (this->DirichletBC_ & DIR_BACK)); - - if (isDirichlet && keepBCs) { - // Dirichlet unknown we want to keep - inds[n] = center; - vals[n++] = Teuchos::ScalarTraits::one(); - - } else { - // See comments about weird in Cross2D - if (left != -1) { inds[n] = left; vals[n++] = b1 * diff(x1,x2,x3) + b2 * diff_primex(x1,x2,x3) - b2 * advx(x1,x2,x3); } - if (right != -1) { inds[n] = right; vals[n++] = c1 * diff(x1,x2,x3) - c2 * diff_primex(x1,x2,x3) + c2 * advx(x1,x2,x3); } - if (front != -1) { inds[n] = front; vals[n++] = d1 * diff(x1,x2,x3) + d2 * diff_primey(x1,x2,x3) - d2 * advy(x1,x2,x3); } - if (back != -1) { inds[n] = back; vals[n++] = e1 * diff(x1,x2,x3) - e2 * diff_primey(x1,x2,x3) + e2 * advy(x1,x2,x3); } - if (bottom != -1) { inds[n] = bottom; vals[n++] = f1 * diff(x1,x2,x3) + f2 * diff_primez(x1,x2,x3) - f2 * advz(x1,x2,x3); } - if (top != -1) { inds[n] = top; vals[n++] = g1 * diff(x1,x2,x3) - g2 * diff_primez(x1,x2,x3) + g2 * advz(x1,x2,x3); } - - // diagonal - Scalar z = a1 * diff(x1,x2,x3) + reac(x1,x2,x3); - if (Galeri::Xpetra::IsBoundary3d(center, nx, ny, nz) && !isDirichlet) { - // Neumann boundary unknown (diagonal = sum of all offdiagonal) - z = Teuchos::ScalarTraits::zero(); - for (size_t j = 0; j < n; j++) - z -= vals[j]; - } - inds[n] = center; - vals[n++] = z; - } + // diagonal + Scalar z = a1 * diff(x1, x2, x3) + reac(x1, x2, x3); + if (Galeri::Xpetra::IsBoundary3d(center, nx, ny, nz) && !isDirichlet) { + // Neumann boundary unknown (diagonal = sum of all offdiagonal) + z = Teuchos::ScalarTraits::zero(); for (size_t j = 0; j < n; j++) - inds[j] += indexBase; - - Teuchos::ArrayView iv(&inds[0], n); - Teuchos::ArrayView av(&vals[0], n); - mtx->insertGlobalValues(myGlobalElements[i], iv, av); + z -= vals[j]; } - - mtx->fillComplete(); - this->A_ = mtx; - - this->A_->setObjectLabel(this->getObjectLabel()); - return this->A_; + inds[n] = center; + vals[n++] = z; } + for (size_t j = 0; j < n; j++) + inds[j] += indexBase; + + Teuchos::ArrayView iv(&inds[0], n); + Teuchos::ArrayView av(&vals[0], n); + mtx->insertGlobalValues(myGlobalElements[i], iv, av); + } + mtx->fillComplete(); + this->A_ = mtx; + this->A_->setObjectLabel(this->getObjectLabel()); + return this->A_; +} - } // namespace Xpetra +} // namespace Xpetra -} // namespace ADR +} // namespace ADR -#endif // CREATEADRMATRIX_HPP +#endif // CREATEADRMATRIX_HPP diff --git a/packages/muelu/research/max/AdditiveMG/CreateBrickMap.hpp b/packages/muelu/research/max/AdditiveMG/CreateBrickMap.hpp index 96ec3ba27c69..4a544e827ff8 100644 --- a/packages/muelu/research/max/AdditiveMG/CreateBrickMap.hpp +++ b/packages/muelu/research/max/AdditiveMG/CreateBrickMap.hpp @@ -1,103 +1,89 @@ -void createBrickMap1D( int numGlobalElements, std::vector& ind, Teuchos::RCP< const Teuchos::Comm > comm) -{ - //INPUT: numGlobalElements = number of finite difference nodes along x - //INPUT: ind = vector containing indices of the rows owned by the current MPI processor - //INPUT: comm = MPI communicator (MPI_COMM_WORLD) - - int mypid = comm->getRank(); - - ind.reserve (static_cast( numGlobalElements/( comm->getSize()-1 ) + 1 )); - if( mypid !=0 && mypid!=comm->getSize()-1 ) - for(int i = 0; i <= ( static_cast(numGlobalElements/( comm->getSize()-1 ))) - 1; ++i) - ind.emplace_back( (mypid-1) * static_cast(numGlobalElements/( comm->getSize()-1 )) + i ); - - if( mypid==comm->getSize()-1 ) - for(int i = (mypid-1) * static_cast(numGlobalElements/( comm->getSize()-1 )); i != numGlobalElements; ++i)\ - ind.emplace_back( i ); -} - - - +void createBrickMap1D(int numGlobalElements, std::vector& ind, Teuchos::RCP > comm) { + // INPUT: numGlobalElements = number of finite difference nodes along x + // INPUT: ind = vector containing indices of the rows owned by the current MPI processor + // INPUT: comm = MPI communicator (MPI_COMM_WORLD) -void createBrickMap2D( int nx, int brick_sizex, int brick_sizey, std::vector& ind, Teuchos::RCP< const Teuchos::Comm > comm) -{ + int mypid = comm->getRank(); - //INPUT: nx = number of finite difference nodes along x - //INPUT: brick_sizex = size of a brick aggregate along x-direction - //INPUT: brick_sizey = size of a brick aggregate along y-direction - //INPUT: ind = vector containing indices of the rows owned by the current MPI processor - //INPUT: comm = MPI communicator (MPI_COMM_WORLD) - - int mypid = comm->getRank(); + ind.reserve(static_cast(numGlobalElements / (comm->getSize() - 1) + 1)); + if (mypid != 0 && mypid != comm->getSize() - 1) + for (int i = 0; i <= (static_cast(numGlobalElements / (comm->getSize() - 1))) - 1; ++i) + ind.emplace_back((mypid - 1) * static_cast(numGlobalElements / (comm->getSize() - 1)) + i); - ind.reserve(brick_sizex * brick_sizey); - - int ndx = nx /brick_sizex; + if (mypid == comm->getSize() - 1) + for (int i = (mypid - 1) * static_cast(numGlobalElements / (comm->getSize() - 1)); i != numGlobalElements; ++i) + ind.emplace_back(i); +} - if( mypid !=0 ) - { - int grid_row = std::ceil(static_cast(mypid)/ndx); - int ypos = grid_row; - int xpos; +void createBrickMap2D(int nx, int brick_sizex, int brick_sizey, std::vector& ind, Teuchos::RCP > comm) { + // INPUT: nx = number of finite difference nodes along x + // INPUT: brick_sizex = size of a brick aggregate along x-direction + // INPUT: brick_sizey = size of a brick aggregate along y-direction + // INPUT: ind = vector containing indices of the rows owned by the current MPI processor + // INPUT: comm = MPI communicator (MPI_COMM_WORLD) - if( 0 != mypid%ndx ) - xpos = mypid%ndx; - else - xpos = ndx; + int mypid = comm->getRank(); - int preliminary = nx * brick_sizey * (ypos - 1); + ind.reserve(brick_sizex * brick_sizey); - for( int row=0; row(mypid) / ndx); + int ypos = grid_row; + int xpos; + if (0 != mypid % ndx) + xpos = mypid % ndx; + else + xpos = ndx; + int preliminary = nx * brick_sizey * (ypos - 1); -void createBrickMap3D( int nx, int ny, int brick_sizex, int brick_sizey, int brick_sizez, std::vector& ind, Teuchos::RCP< const Teuchos::Comm > comm) -{ + for (int row = 0; row < brick_sizey; ++row) + for (int col = brick_sizex * (xpos - 1) + 1; col <= brick_sizex * xpos; ++col) + ind.emplace_back(preliminary + row * nx + col - 1); + } +} - //INPUT: nx = number of finite difference nodes along x - //INPUT: ny = number of finite difference nodes along y - //INPUT: brick_sizex = size of a brick aggregate along x-direction - //INPUT: brick_sizey = size of a brick aggregate along y-direction - //INPUT: ind = vector containing indices of the rows owned by the current MPI processor - //INPUT: comm = MPI communicator (MPI_COMM_WORLD) +void createBrickMap3D(int nx, int ny, int brick_sizex, int brick_sizey, int brick_sizez, std::vector& ind, Teuchos::RCP > comm) { + // INPUT: nx = number of finite difference nodes along x + // INPUT: ny = number of finite difference nodes along y + // INPUT: brick_sizex = size of a brick aggregate along x-direction + // INPUT: brick_sizey = size of a brick aggregate along y-direction + // INPUT: ind = vector containing indices of the rows owned by the current MPI processor + // INPUT: comm = MPI communicator (MPI_COMM_WORLD) - int mypid = comm->getRank(); + int mypid = comm->getRank(); - ind.reserve(brick_sizex * brick_sizey * brick_sizez); + ind.reserve(brick_sizex * brick_sizey * brick_sizez); - // determine the number of subdomains along x and y directions - int ndx = nx /brick_sizex; - int ndy = ny /brick_sizey; + // determine the number of subdomains along x and y directions + int ndx = nx / brick_sizex; + int ndy = ny / brick_sizey; - if( mypid !=0 ) - { - int grid_plane = std::ceil( static_cast(mypid)/(ndx*ndy) ); - int plane_id = mypid % ( ndx*ndy ); + if (mypid != 0) { + int grid_plane = std::ceil(static_cast(mypid) / (ndx * ndy)); + int plane_id = mypid % (ndx * ndy); - if(0 == plane_id) - plane_id = ndx*ndy; + if (0 == plane_id) + plane_id = ndx * ndy; - int plane_row = std::ceil(static_cast(plane_id)/ndx); - int ypos = plane_row; - int xpos; + int plane_row = std::ceil(static_cast(plane_id) / ndx); + int ypos = plane_row; + int xpos; - if( 0 != plane_id%ndx ) - xpos = plane_id%ndx; - else - xpos = ndx; + if (0 != plane_id % ndx) + xpos = plane_id % ndx; + else + xpos = ndx; - int preliminary = nx * ny * brick_sizez * (grid_plane - 1) + nx * brick_sizey * (ypos - 1); + int preliminary = nx * ny * brick_sizez * (grid_plane - 1) + nx * brick_sizey * (ypos - 1); - for(int l = 0; l #include - // Belos provides Krylov solvers #include #include @@ -76,7 +75,7 @@ #include #include -//ADR subdirectory +// ADR subdirectory #include "CreateADRMatrix.hpp" #include #include @@ -87,24 +86,23 @@ #include "BAP.hpp" #include "CreateBrickMap.hpp" -int main(int argc, char *argv[]) { - +int main(int argc, char* argv[]) { // Define default types - typedef double scalar_type; - typedef int local_ordinal_type; - typedef int global_ordinal_type; + typedef double scalar_type; + typedef int local_ordinal_type; + typedef int global_ordinal_type; typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType node_type; // Convenient typedef's - typedef Tpetra::Operator operator_type; - typedef Tpetra::CrsMatrix crs_matrix_type; - typedef Tpetra::RowMatrix row_matrix_type; - typedef Tpetra::Vector vector_type; - typedef Tpetra::MultiVector multivector_type; - typedef Tpetra::Map driver_map_type; + typedef Tpetra::Operator operator_type; + typedef Tpetra::CrsMatrix crs_matrix_type; + typedef Tpetra::RowMatrix row_matrix_type; + typedef Tpetra::Vector vector_type; + typedef Tpetra::MultiVector multivector_type; + typedef Tpetra::Map driver_map_type; typedef MueLu::TpetraOperator muelu_tpetra_operator_type; - typedef MueLu::Utilities MueLuUtilities; + typedef MueLu::Utilities MueLuUtilities; typedef Belos::LinearProblem linear_problem_type; typedef Belos::SolverManager belos_solver_manager_type; @@ -112,50 +110,50 @@ int main(int argc, char *argv[]) { typedef Belos::BlockGmresSolMgr belos_gmres_manager_type; typedef Belos::BiCGStabSolMgr belos_bicgstab_manager_type; - typedef Ifpack2::Preconditioner precond_type; + typedef Ifpack2::Preconditioner precond_type; - //MueLu_UseShortNames.hpp wants these typedefs. - typedef scalar_type Scalar; - typedef local_ordinal_type LocalOrdinal; + // MueLu_UseShortNames.hpp wants these typedefs. + typedef scalar_type Scalar; + typedef local_ordinal_type LocalOrdinal; typedef global_ordinal_type GlobalOrdinal; - typedef node_type Node; -# include + typedef node_type Node; +#include - typedef Galeri::Xpetra::Problem GaleriXpetraProblem; - typedef ADR::Xpetra::Problem ADRXpetraProblem; + typedef Galeri::Xpetra::Problem GaleriXpetraProblem; + typedef ADR::Xpetra::Problem ADRXpetraProblem; - using Teuchos::RCP; // reference count pointers - using Teuchos::rcp; // reference count pointers + using Teuchos::RCP; // reference count pointers + using Teuchos::rcp; // reference count pointers // // MPI initialization using Teuchos // Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL); - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); - int mypid = comm->getRank(); -/* - int subCommRank[3]={0,1,2}; - Teuchos::ArrayView arraySubCommRank(subCommRank, 3); - auto subComm = comm->createSubcommunicator(arraySubCommRank); -*/ + RCP > comm = Teuchos::DefaultComm::getComm(); + int mypid = comm->getRank(); + /* + int subCommRank[3]={0,1,2}; + Teuchos::ArrayView arraySubCommRank(subCommRank, 3); + auto subComm = comm->createSubcommunicator(arraySubCommRank); + */ Teuchos::CommandLineProcessor clp(false); - global_ordinal_type maxIts = 10000; - scalar_type tol = 1e-10; - std::string solverOptionsFile = "final_parser.xml"; - std::string krylovSolverType = "bicgstab"; + global_ordinal_type maxIts = 10000; + scalar_type tol = 1e-10; + std::string solverOptionsFile = "final_parser.xml"; + std::string krylovSolverType = "bicgstab"; - clp.setOption("xmlFile", &solverOptionsFile, "XML file containing MueLu solver parameters"); - clp.setOption("maxits", &maxIts, "maximum number of Krylov iterations"); - clp.setOption("tol", &tol, "tolerance for Krylov solver"); - clp.setOption("krylovType", &krylovSolverType, "cg or gmres solver"); + clp.setOption("xmlFile", &solverOptionsFile, "XML file containing MueLu solver parameters"); + clp.setOption("maxits", &maxIts, "maximum number of Krylov iterations"); + clp.setOption("tol", &tol, "tolerance for Krylov solver"); + clp.setOption("krylovType", &krylovSolverType, "cg or gmres solver"); switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } Teuchos::ParameterList xmlParams; @@ -166,7 +164,7 @@ int main(int argc, char *argv[]) { problemParams = xmlParams.sublist(static_cast("Problem")); // Problem definition - std::string problem_type = problemParams.get(static_cast("problem type")); + std::string problem_type = problemParams.get(static_cast("problem type")); // Parameters @@ -183,14 +181,12 @@ int main(int argc, char *argv[]) { MueLu::DomainPartitioning domain; int keep_boundary = 0; - Scalar stretchx = (Scalar) Lx/nx; - Scalar stretchy = (Scalar) Ly/ny; - Scalar stretchz = (Scalar) Lz/nz; + Scalar stretchx = (Scalar)Lx / nx; + Scalar stretchy = (Scalar)Ly / ny; + Scalar stretchz = (Scalar)Lz / nz; - - ADR::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, problem_type, keep_boundary , stretchx, stretchy, stretchz); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra - + ADR::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, problem_type, keep_boundary, stretchx, stretchy, stretchz); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra // // Construct the problem @@ -200,57 +196,55 @@ int main(int argc, char *argv[]) { RCP xpetraMap; std::vector ind; - //BRICK SIZE + // BRICK SIZE int brick_sizex = mueluParams.get(static_cast("aggregation: brick x size")); int brick_sizey = mueluParams.get(static_cast("aggregation: brick y size")); int brick_sizez = mueluParams.get(static_cast("aggregation: brick z size")); - //Creation of the map where processor 0 gets nothing at the fine level - if( comm->getSize()>1 ) - { - if(problem_type == "ADR1D") - createBrickMap1D(matrixParameters.GetNumGlobalElements(), ind, comm ); + // Creation of the map where processor 0 gets nothing at the fine level + if (comm->getSize() > 1) { + if (problem_type == "ADR1D") + createBrickMap1D(matrixParameters.GetNumGlobalElements(), ind, comm); - else if(problem_type == "ADR2D") - createBrickMap2D( nx, brick_sizex, brick_sizey, ind, comm ); + else if (problem_type == "ADR2D") + createBrickMap2D(nx, brick_sizex, brick_sizey, ind, comm); - else if(problem_type == "ADR3D") - createBrickMap3D( nx, ny, brick_sizex, brick_sizey, brick_sizez, ind, comm ); + else if (problem_type == "ADR3D") + createBrickMap3D(nx, ny, brick_sizex, brick_sizey, brick_sizez, ind, comm); - ind.shrink_to_fit(); + ind.shrink_to_fit(); - Teuchos::ArrayView elementList (ind); - xpetraMap = MapFactory::Build(Xpetra::UseTpetra,matrixParameters.GetNumGlobalElements(), elementList, indexBase, comm); - } - else if( comm->getSize()==1 ) - xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), indexBase, comm); + Teuchos::ArrayView elementList(ind); + xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), elementList, indexBase, comm); + } else if (comm->getSize() == 1) + xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), indexBase, comm); RCP coordinates; if (problem_type == "ADR1D") - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", xpetraMap, matrixParameters.GetParameterList()); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", xpetraMap, matrixParameters.GetParameterList()); else if (problem_type == "ADR2D") - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", xpetraMap, matrixParameters.GetParameterList()); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", xpetraMap, matrixParameters.GetParameterList()); else if (problem_type == "ADR3D") - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", xpetraMap, matrixParameters.GetParameterList()); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", xpetraMap, matrixParameters.GetParameterList()); RCP Pr = ADR::Xpetra::BuildProblem(matrixParameters.GetMatrixType(), xpetraMap, matrixParameters.GetParameterList()); - RCP xpetraA = Pr->BuildMatrix(); + RCP xpetraA = Pr->BuildMatrix(); - RCP A = MueLuUtilities::Op2NonConstTpetraCrs(xpetraA); + RCP A = MueLuUtilities::Op2NonConstTpetraCrs(xpetraA); RCP map = MueLuUtilities::Map2TpetraMap(*xpetraMap); - // =================================================== - // Domain Decomposition Preconditioner - // =================================== + // =================================================== + // Domain Decomposition Preconditioner + // =================================== - //Creation of the MueLu list for the DD preconditioner + // Creation of the MueLu list for the DD preconditioner RCP dd_list = rcp(new Teuchos::ParameterList()); dd_list->setName("MueLu"); dd_list->set("verbosity", "low"); dd_list->set("number of equations", 1); dd_list->set("max levels", 1); - dd_list->set("coarse: type", "SCHWARZ"); //FOR A ONE LEVEL PRECONDITIONER THE COARSE LEVEL IS INTERPRETED AS SMOOTHING LEVEL + dd_list->set("coarse: type", "SCHWARZ"); // FOR A ONE LEVEL PRECONDITIONER THE COARSE LEVEL IS INTERPRETED AS SMOOTHING LEVEL Teuchos::ParameterList& dd_smooth_sublist = dd_list->sublist("coarse: params"); dd_smooth_sublist.set("schwarz: overlap level", 0); @@ -263,30 +257,30 @@ int main(int argc, char *argv[]) { coarse_subdomain_solver.set("fact: relative threshold", 1.); coarse_subdomain_solver.set("fact: relax value", 0.); - RCP B_DD = MueLu::CreateTpetraPreconditioner( (RCP)A, *dd_list ); + RCP B_DD = MueLu::CreateTpetraPreconditioner((RCP)A, *dd_list); + + // =================================================== + // Multi Grid Preconditioner + // =================================== - // =================================================== - // Multi Grid Preconditioner - // =================================== - RCP M; - M = MueLu::CreateTpetraPreconditioner( (RCP)A, mueluParams, Utilities::MV2NonConstTpetraMV(coordinates) ); + M = MueLu::CreateTpetraPreconditioner((RCP)A, mueluParams, Utilities::MV2NonConstTpetraMV(coordinates)); - RCP X_muelu = rcp(new multivector_type(map,1)); - RCP B = rcp(new multivector_type(map,1)); + RCP X_muelu = rcp(new multivector_type(map, 1)); + RCP B = rcp(new multivector_type(map, 1)); RCP Problem_muelu; - X_muelu->putScalar((scalar_type) 0.0); + X_muelu->putScalar((scalar_type)0.0); B->randomize(); Problem_muelu = rcp(new linear_problem_type(A, X_muelu, B)); RCP belosList = rcp(new Teuchos::ParameterList()); - belosList->set("Maximum Iterations", maxIts); // Maximum number of iterations allowed - belosList->set("Convergence Tolerance", tol); // Relative convergence tolerance requested - //belosList->set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); - belosList->set("Verbosity", Belos::Errors); - belosList->set("Output Frequency", 1); - belosList->set("Output Style", Belos::Brief); + belosList->set("Maximum Iterations", maxIts); // Maximum number of iterations allowed + belosList->set("Convergence Tolerance", tol); // Relative convergence tolerance requested + // belosList->set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); + belosList->set("Verbosity", Belos::Errors); + belosList->set("Output Frequency", 1); + belosList->set("Output Style", Belos::Brief); belosList->set("Implicit Residual Scaling", "None"); RCP solver; if (krylovSolverType == "cg") @@ -298,38 +292,35 @@ int main(int argc, char *argv[]) { else throw std::invalid_argument("bad Krylov solver type"); -for(int trial = 1; trial<=number_runs; ++trial) -{ - - X_muelu->putScalar((scalar_type) 0.0); - B->randomize(); - - // - // Set up Krylov solver and iterate. - // - - Problem_muelu = rcp(new linear_problem_type(A, X_muelu, B)); - Problem_muelu->setRightPrec(M); - Problem_muelu->setProblem(); - - solver->setProblem(Problem_muelu); - solver->solve(); - int numIterations_muelu = solver->getNumIters(); - - Teuchos::Array::magnitudeType> normVec_muelu(1); - multivector_type residual_muelu(B->getMap(),1); - A->apply(*X_muelu, residual_muelu); - residual_muelu.update(1.0, *B, -1.0); - residual_muelu.norm2(normVec_muelu); - if (mypid == 0) { - std::cout << "number of iterations with MueLu preconditioner= " << numIterations_muelu << std::endl; - std::cout << "||Residual|| = " << normVec_muelu[0] << std::endl; - } -} - + for (int trial = 1; trial <= number_runs; ++trial) { + X_muelu->putScalar((scalar_type)0.0); + B->randomize(); + + // + // Set up Krylov solver and iterate. + // + + Problem_muelu = rcp(new linear_problem_type(A, X_muelu, B)); + Problem_muelu->setRightPrec(M); + Problem_muelu->setProblem(); + + solver->setProblem(Problem_muelu); + solver->solve(); + int numIterations_muelu = solver->getNumIters(); + + Teuchos::Array::magnitudeType> normVec_muelu(1); + multivector_type residual_muelu(B->getMap(), 1); + A->apply(*X_muelu, residual_muelu); + residual_muelu.update(1.0, *B, -1.0); + residual_muelu.norm2(normVec_muelu); + if (mypid == 0) { + std::cout << "number of iterations with MueLu preconditioner= " << numIterations_muelu << std::endl; + std::cout << "||Residual|| = " << normVec_muelu[0] << std::endl; + } + } - #include - Teuchos::TimeMonitor::summarize (); +#include + Teuchos::TimeMonitor::summarize(); return EXIT_SUCCESS; } diff --git a/packages/muelu/research/max/AdditiveMG/MultiplicativeStride.cpp b/packages/muelu/research/max/AdditiveMG/MultiplicativeStride.cpp index 141915443d21..3355a1d8bf37 100644 --- a/packages/muelu/research/max/AdditiveMG/MultiplicativeStride.cpp +++ b/packages/muelu/research/max/AdditiveMG/MultiplicativeStride.cpp @@ -60,7 +60,6 @@ #include - // Belos provides Krylov solvers #include #include @@ -76,7 +75,7 @@ #include #include -//ADR subdirectory +// ADR subdirectory #include "CreateADRMatrix.hpp" #include #include @@ -85,23 +84,22 @@ #include "Smooth_Prolongation.cpp" int main(int argc, char *argv[]) { - // Define default types - typedef double scalar_type; - typedef int local_ordinal_type; - typedef int global_ordinal_type; + typedef double scalar_type; + typedef int local_ordinal_type; + typedef int global_ordinal_type; typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType node_type; // Convenient typedef's - typedef Tpetra::Operator operator_type; - typedef Tpetra::CrsMatrix crs_matrix_type; - typedef Tpetra::RowMatrix row_matrix_type; - typedef Tpetra::Vector vector_type; - typedef Tpetra::MultiVector multivector_type; - typedef Tpetra::Map driver_map_type; + typedef Tpetra::Operator operator_type; + typedef Tpetra::CrsMatrix crs_matrix_type; + typedef Tpetra::RowMatrix row_matrix_type; + typedef Tpetra::Vector vector_type; + typedef Tpetra::MultiVector multivector_type; + typedef Tpetra::Map driver_map_type; typedef MueLu::TpetraOperator muelu_tpetra_operator_type; - typedef MueLu::Utilities MueLuUtilities; + typedef MueLu::Utilities MueLuUtilities; typedef Belos::LinearProblem linear_problem_type; typedef Belos::SolverManager belos_solver_manager_type; @@ -109,50 +107,50 @@ int main(int argc, char *argv[]) { typedef Belos::BlockGmresSolMgr belos_gmres_manager_type; typedef Belos::BiCGStabSolMgr belos_bicgstab_manager_type; - typedef Ifpack2::Preconditioner precond_type; + typedef Ifpack2::Preconditioner precond_type; - //MueLu_UseShortNames.hpp wants these typedefs. - typedef scalar_type Scalar; - typedef local_ordinal_type LocalOrdinal; + // MueLu_UseShortNames.hpp wants these typedefs. + typedef scalar_type Scalar; + typedef local_ordinal_type LocalOrdinal; typedef global_ordinal_type GlobalOrdinal; - typedef node_type Node; -# include + typedef node_type Node; +#include - typedef Galeri::Xpetra::Problem GaleriXpetraProblem; - typedef ADR::Xpetra::Problem ADRXpetraProblem; + typedef Galeri::Xpetra::Problem GaleriXpetraProblem; + typedef ADR::Xpetra::Problem ADRXpetraProblem; - using Teuchos::RCP; // reference count pointers - using Teuchos::rcp; // reference count pointers + using Teuchos::RCP; // reference count pointers + using Teuchos::rcp; // reference count pointers // // MPI initialization using Teuchos // Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL); - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); - int mypid = comm->getRank(); -/* - int subCommRank[3]={0,1,2}; - Teuchos::ArrayView arraySubCommRank(subCommRank, 3); - auto subComm = comm->createSubcommunicator(arraySubCommRank); -*/ + RCP > comm = Teuchos::DefaultComm::getComm(); + int mypid = comm->getRank(); + /* + int subCommRank[3]={0,1,2}; + Teuchos::ArrayView arraySubCommRank(subCommRank, 3); + auto subComm = comm->createSubcommunicator(arraySubCommRank); + */ Teuchos::CommandLineProcessor clp(false); - global_ordinal_type maxIts = 10000; - scalar_type tol = 1e-10; - std::string solverOptionsFile = "final_parser.xml"; - std::string krylovSolverType = "bicgstab"; + global_ordinal_type maxIts = 10000; + scalar_type tol = 1e-10; + std::string solverOptionsFile = "final_parser.xml"; + std::string krylovSolverType = "bicgstab"; - clp.setOption("xmlFile", &solverOptionsFile, "XML file containing MueLu solver parameters"); - clp.setOption("maxits", &maxIts, "maximum number of Krylov iterations"); - clp.setOption("tol", &tol, "tolerance for Krylov solver"); - clp.setOption("krylovType", &krylovSolverType, "cg or gmres solver"); + clp.setOption("xmlFile", &solverOptionsFile, "XML file containing MueLu solver parameters"); + clp.setOption("maxits", &maxIts, "maximum number of Krylov iterations"); + clp.setOption("tol", &tol, "tolerance for Krylov solver"); + clp.setOption("krylovType", &krylovSolverType, "cg or gmres solver"); switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } Teuchos::ParameterList xmlParams; @@ -163,7 +161,7 @@ int main(int argc, char *argv[]) { problemParams = xmlParams.sublist(static_cast("Problem")); // Problem definition - std::string problem_type = problemParams.get(static_cast("problem type")); + std::string problem_type = problemParams.get(static_cast("problem type")); // Parameters @@ -180,14 +178,12 @@ int main(int argc, char *argv[]) { MueLu::DomainPartitioning domain; int keep_boundary = 0; - Scalar stretchx = (Scalar) Lx/nx; - Scalar stretchy = (Scalar) Ly/ny; - Scalar stretchz = (Scalar) Lz/nz; + Scalar stretchx = (Scalar)Lx / nx; + Scalar stretchy = (Scalar)Ly / ny; + Scalar stretchz = (Scalar)Lz / nz; - - ADR::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, problem_type, keep_boundary , stretchx, stretchy, stretchz); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra - + ADR::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, problem_type, keep_boundary, stretchx, stretchy, stretchz); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra // // Construct the problem @@ -197,94 +193,91 @@ int main(int argc, char *argv[]) { RCP xpetraMap; std::vector ind; - //Creation of the map where processor 0 gets nothing at the fine level - if( comm->getSize()>1 ) - { - ind.reserve (static_cast( matrixParameters.GetNumGlobalElements()/( comm->getSize()-1 ) + 1 )); - if( mypid !=0 && mypid!=comm->getSize()-1 ) - for(int i = 0; i <= ( static_cast(matrixParameters.GetNumGlobalElements()/( comm->getSize()-1 ))) - 1; ++i) - ind.emplace_back( (mypid-1) * static_cast(matrixParameters.GetNumGlobalElements()/( comm->getSize()-1 )) + i ); - - if( mypid==comm->getSize()-1 ) - for(int i = (mypid-1) * static_cast(matrixParameters.GetNumGlobalElements()/( comm->getSize()-1 )); i != matrixParameters.GetNumGlobalElements(); ++i) - ind.emplace_back( i ); - - ind.shrink_to_fit(); - - Teuchos::ArrayView elementList (ind); - xpetraMap = MapFactory::Build(Xpetra::UseTpetra,matrixParameters.GetNumGlobalElements(), elementList, indexBase, comm); - } - else if( comm->getSize()==1 ) - xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), indexBase, comm); + // Creation of the map where processor 0 gets nothing at the fine level + if (comm->getSize() > 1) { + ind.reserve(static_cast(matrixParameters.GetNumGlobalElements() / (comm->getSize() - 1) + 1)); + if (mypid != 0 && mypid != comm->getSize() - 1) + for (int i = 0; i <= (static_cast(matrixParameters.GetNumGlobalElements() / (comm->getSize() - 1))) - 1; ++i) + ind.emplace_back((mypid - 1) * static_cast(matrixParameters.GetNumGlobalElements() / (comm->getSize() - 1)) + i); + + if (mypid == comm->getSize() - 1) + for (int i = (mypid - 1) * static_cast(matrixParameters.GetNumGlobalElements() / (comm->getSize() - 1)); i != matrixParameters.GetNumGlobalElements(); ++i) + ind.emplace_back(i); + + ind.shrink_to_fit(); + + Teuchos::ArrayView elementList(ind); + xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), elementList, indexBase, comm); + } else if (comm->getSize() == 1) + xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), indexBase, comm); RCP coordinates; if (problem_type == "ADR1D") - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", xpetraMap, matrixParameters.GetParameterList()); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", xpetraMap, matrixParameters.GetParameterList()); else if (problem_type == "ADR2D") - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", xpetraMap, matrixParameters.GetParameterList()); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", xpetraMap, matrixParameters.GetParameterList()); else if (problem_type == "ADR3D") - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", xpetraMap, matrixParameters.GetParameterList()); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", xpetraMap, matrixParameters.GetParameterList()); RCP Pr = ADR::Xpetra::BuildProblem(matrixParameters.GetMatrixType(), xpetraMap, matrixParameters.GetParameterList()); - RCP xpetraA = Pr->BuildMatrix(); + RCP xpetraA = Pr->BuildMatrix(); - RCP A = MueLuUtilities::Op2NonConstTpetraCrs(xpetraA); + RCP A = MueLuUtilities::Op2NonConstTpetraCrs(xpetraA); RCP map = MueLuUtilities::Map2TpetraMap(*xpetraMap); // Construct a multigrid preconditioner - RCP M = MueLu::CreateTpetraPreconditioner( (RCP)A, mueluParams, Utilities::MV2NonConstTpetraMV(coordinates) ); - - RCP X = rcp(new multivector_type(map,1)); - RCP B = rcp(new multivector_type(map,1)); - -for(int iter = 1; iter <= number_runs; ++iter) -{ - X->putScalar((scalar_type) 0.0); - B->randomize(); - - // - // Set up Krylov solver and iterate. - // - - RCP X_muelu = rcp(new multivector_type(map,1)); - RCP Problem_muelu = rcp(new linear_problem_type(A, X_muelu, B)); - Problem_muelu->setRightPrec(M); - Problem_muelu->setProblem(); - - RCP belosList = rcp(new Teuchos::ParameterList()); - belosList->set("Maximum Iterations", maxIts); // Maximum number of iterations allowed - belosList->set("Convergence Tolerance", tol); // Relative convergence tolerance requested - //belosList->set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); - belosList->set("Verbosity", Belos::Errors); - belosList->set("Output Frequency", 1); - belosList->set("Output Style", Belos::Brief); - belosList->set("Implicit Residual Scaling", "None"); - RCP solver; - if (krylovSolverType == "cg") - solver = rcp(new belos_pseudocg_manager_type(Problem_muelu, belosList)); - else if (krylovSolverType == "gmres") - solver = rcp(new belos_gmres_manager_type(Problem_muelu, belosList)); - else if (krylovSolverType == "bicgstab") - solver = rcp(new belos_bicgstab_manager_type(Problem_muelu, belosList)); - else - throw std::invalid_argument("bad Krylov solver type"); - - solver->solve(); - int numIterations_muelu = solver->getNumIters(); - - Teuchos::Array::magnitudeType> normVec_muelu(1); - multivector_type residual_muelu(B->getMap(),1); - A->apply(*X_muelu, residual_muelu); - residual_muelu.update(1.0, *B, -1.0); - residual_muelu.norm2(normVec_muelu); - if (mypid == 0) { - std::cout << "number of iterations with MueLu preconditioner= " << numIterations_muelu << std::endl; - std::cout << "||Residual|| = " << normVec_muelu[0] << std::endl; - } -} - #include - Teuchos::TimeMonitor::summarize (); + RCP M = MueLu::CreateTpetraPreconditioner((RCP)A, mueluParams, Utilities::MV2NonConstTpetraMV(coordinates)); + + RCP X = rcp(new multivector_type(map, 1)); + RCP B = rcp(new multivector_type(map, 1)); + + for (int iter = 1; iter <= number_runs; ++iter) { + X->putScalar((scalar_type)0.0); + B->randomize(); + + // + // Set up Krylov solver and iterate. + // + + RCP X_muelu = rcp(new multivector_type(map, 1)); + RCP Problem_muelu = rcp(new linear_problem_type(A, X_muelu, B)); + Problem_muelu->setRightPrec(M); + Problem_muelu->setProblem(); + + RCP belosList = rcp(new Teuchos::ParameterList()); + belosList->set("Maximum Iterations", maxIts); // Maximum number of iterations allowed + belosList->set("Convergence Tolerance", tol); // Relative convergence tolerance requested + // belosList->set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); + belosList->set("Verbosity", Belos::Errors); + belosList->set("Output Frequency", 1); + belosList->set("Output Style", Belos::Brief); + belosList->set("Implicit Residual Scaling", "None"); + RCP solver; + if (krylovSolverType == "cg") + solver = rcp(new belos_pseudocg_manager_type(Problem_muelu, belosList)); + else if (krylovSolverType == "gmres") + solver = rcp(new belos_gmres_manager_type(Problem_muelu, belosList)); + else if (krylovSolverType == "bicgstab") + solver = rcp(new belos_bicgstab_manager_type(Problem_muelu, belosList)); + else + throw std::invalid_argument("bad Krylov solver type"); + + solver->solve(); + int numIterations_muelu = solver->getNumIters(); + + Teuchos::Array::magnitudeType> normVec_muelu(1); + multivector_type residual_muelu(B->getMap(), 1); + A->apply(*X_muelu, residual_muelu); + residual_muelu.update(1.0, *B, -1.0); + residual_muelu.norm2(normVec_muelu); + if (mypid == 0) { + std::cout << "number of iterations with MueLu preconditioner= " << numIterations_muelu << std::endl; + std::cout << "||Residual|| = " << normVec_muelu[0] << std::endl; + } + } +#include + Teuchos::TimeMonitor::summarize(); return EXIT_SUCCESS; } diff --git a/packages/muelu/research/max/AdditiveMG/Repartition_ADR.cpp b/packages/muelu/research/max/AdditiveMG/Repartition_ADR.cpp index 0b50560d27c8..89a8148d94e4 100644 --- a/packages/muelu/research/max/AdditiveMG/Repartition_ADR.cpp +++ b/packages/muelu/research/max/AdditiveMG/Repartition_ADR.cpp @@ -79,7 +79,7 @@ #include #include -//ADR subdirectory +// ADR subdirectory #include "CreateADRMatrix.hpp" #include #include @@ -98,22 +98,21 @@ #include int main(int argc, char *argv[]) { - // Define default types - typedef double scalar_type; - typedef int local_ordinal_type; - typedef int global_ordinal_type; + typedef double scalar_type; + typedef int local_ordinal_type; + typedef int global_ordinal_type; typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType node_type; // Convenient typedef's - typedef Tpetra::Operator operator_type; - typedef Tpetra::CrsMatrix crs_matrix_type; - typedef Tpetra::Vector vector_type; - typedef Tpetra::MultiVector multivector_type; - typedef Tpetra::Map driver_map_type; + typedef Tpetra::Operator operator_type; + typedef Tpetra::CrsMatrix crs_matrix_type; + typedef Tpetra::Vector vector_type; + typedef Tpetra::MultiVector multivector_type; + typedef Tpetra::Map driver_map_type; typedef MueLu::TpetraOperator muelu_tpetra_operator_type; - typedef MueLu::Utilities MueLuUtilities; + typedef MueLu::Utilities MueLuUtilities; typedef Belos::LinearProblem linear_problem_type; typedef Belos::SolverManager belos_solver_manager_type; @@ -121,73 +120,71 @@ int main(int argc, char *argv[]) { typedef Belos::BlockGmresSolMgr belos_gmres_manager_type; typedef Belos::BiCGStabSolMgr belos_bicgstab_manager_type; - //MueLu_UseShortNames.hpp wants these typedefs. - typedef scalar_type Scalar; - typedef local_ordinal_type LocalOrdinal; + // MueLu_UseShortNames.hpp wants these typedefs. + typedef scalar_type Scalar; + typedef local_ordinal_type LocalOrdinal; typedef global_ordinal_type GlobalOrdinal; - typedef node_type Node; -# include + typedef node_type Node; +#include - typedef Galeri::Xpetra::Problem GaleriXpetraProblem; - typedef ADR::Xpetra::Problem ADRXpetraProblem; + typedef Galeri::Xpetra::Problem GaleriXpetraProblem; + typedef ADR::Xpetra::Problem ADRXpetraProblem; - using Teuchos::RCP; // reference count pointers - using Teuchos::rcp; // reference count pointers + using Teuchos::RCP; // reference count pointers + using Teuchos::rcp; // reference count pointers // // MPI initialization using Teuchos // Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL); - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); - int mypid = comm->getRank(); -/* - int subCommRank[3]={0,1,2}; - Teuchos::ArrayView arraySubCommRank(subCommRank, 3); - auto subComm = comm->createSubcommunicator(arraySubCommRank); -*/ + RCP > comm = Teuchos::DefaultComm::getComm(); + int mypid = comm->getRank(); + /* + int subCommRank[3]={0,1,2}; + Teuchos::ArrayView arraySubCommRank(subCommRank, 3); + auto subComm = comm->createSubcommunicator(arraySubCommRank); + */ Teuchos::CommandLineProcessor clp(false); // Problem definition - std::string problem_type = "ADR2D"; + std::string problem_type = "ADR2D"; // Parameters - Scalar Lx = 6.0; - Scalar Ly = 6.0; - Scalar Lz = 6.0; + Scalar Lx = 6.0; + Scalar Ly = 6.0; + Scalar Lz = 6.0; global_ordinal_type nx = 50; global_ordinal_type ny = 50; global_ordinal_type nz = 50; int keep_boundary = 0; - Scalar stretchx = (Scalar) Lx/nx; - Scalar stretchy = (Scalar) Ly/ny; - Scalar stretchz = (Scalar) Lz/nz; - - - ADR::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, problem_type, keep_boundary , stretchx, stretchy, stretchz); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra + Scalar stretchx = (Scalar)Lx / nx; + Scalar stretchy = (Scalar)Ly / ny; + Scalar stretchz = (Scalar)Lz / nz; + ADR::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, problem_type, keep_boundary, stretchx, stretchy, stretchz); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra - global_ordinal_type maxIts = 10000; - scalar_type tol = 1e-10; - std::string solverOptionsFile = "dd.xml"; - std::string krylovSolverType = "bicgstab"; + global_ordinal_type maxIts = 10000; + scalar_type tol = 1e-10; + std::string solverOptionsFile = "dd.xml"; + std::string krylovSolverType = "bicgstab"; - clp.setOption("xmlFile", &solverOptionsFile, "XML file containing MueLu solver parameters"); - clp.setOption("maxits", &maxIts, "maximum number of Krylov iterations"); - clp.setOption("tol", &tol, "tolerance for Krylov solver"); - clp.setOption("krylovType", &krylovSolverType, "cg or gmres solver"); + clp.setOption("xmlFile", &solverOptionsFile, "XML file containing MueLu solver parameters"); + clp.setOption("maxits", &maxIts, "maximum number of Krylov iterations"); + clp.setOption("tol", &tol, "tolerance for Krylov solver"); + clp.setOption("krylovType", &krylovSolverType, "cg or gmres solver"); switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } - + if (xpetraParameters.GetLib() == Xpetra::UseEpetra) { throw std::invalid_argument("This example only supports Tpetra."); } @@ -200,60 +197,55 @@ int main(int argc, char *argv[]) { // global_ordinal_type indexBase = 0; - RCP xpetraMap; - std::vector indices; - - //Creation of the map where processor 0 gets nothing at the fine level - if( comm->getSize()>1 ) - { - indices.reserve (static_cast( matrixParameters.GetNumGlobalElements()/( comm->getSize()-1 ) + 1 )); - if( mypid !=0 && mypid!=comm->getSize()-1 ) - for(int i = 0; i <= ( static_cast(matrixParameters.GetNumGlobalElements()/( comm->getSize()-1 ))) - 1; ++i) - indices.emplace_back( (mypid-1) * static_cast(matrixParameters.GetNumGlobalElements()/( comm->getSize()-1 )) + i ); - - if( mypid==comm->getSize()-1 ) - for(int i = (mypid-1) * static_cast(matrixParameters.GetNumGlobalElements()/( comm->getSize()-1 )); i != matrixParameters.GetNumGlobalElements(); ++i)\ - indices.emplace_back( i ); - - indices.shrink_to_fit(); - - Teuchos::ArrayView elementList (indices); - xpetraMap = MapFactory::Build(Xpetra::UseTpetra,matrixParameters.GetNumGlobalElements(), elementList, indexBase, comm); - } - else if( comm->getSize()==1 ) - { - xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), indexBase, comm); - } - - //============================================================================================================ + RCP xpetraMap; + std::vector indices; + + // Creation of the map where processor 0 gets nothing at the fine level + if (comm->getSize() > 1) { + indices.reserve(static_cast(matrixParameters.GetNumGlobalElements() / (comm->getSize() - 1) + 1)); + if (mypid != 0 && mypid != comm->getSize() - 1) + for (int i = 0; i <= (static_cast(matrixParameters.GetNumGlobalElements() / (comm->getSize() - 1))) - 1; ++i) + indices.emplace_back((mypid - 1) * static_cast(matrixParameters.GetNumGlobalElements() / (comm->getSize() - 1)) + i); + + if (mypid == comm->getSize() - 1) + for (int i = (mypid - 1) * static_cast(matrixParameters.GetNumGlobalElements() / (comm->getSize() - 1)); i != matrixParameters.GetNumGlobalElements(); ++i) + indices.emplace_back(i); + + indices.shrink_to_fit(); + + Teuchos::ArrayView elementList(indices); + xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), elementList, indexBase, comm); + } else if (comm->getSize() == 1) { + xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), indexBase, comm); + } + + //============================================================================================================ RCP coordinates; - //RCP xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), indexBase, comm); - //RCP xpetraMap = Galeri::Xpetra::CreateMap(Xpetra::UseTpetra, "Cartesian2D", comm, matrixParameters.GetParameterList()); + // RCP xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), indexBase, comm); + // RCP xpetraMap = Galeri::Xpetra::CreateMap(Xpetra::UseTpetra, "Cartesian2D", comm, matrixParameters.GetParameterList()); - std::cout<<"Before coordinates"<("2D", xpetraMap, matrixParameters.GetParameterList()); - std::cout<<"After coordinates"<("2D", xpetraMap, matrixParameters.GetParameterList()); + std::cout << "After coordinates" << std::endl; RCP Pr = ADR::Xpetra::BuildProblem(matrixParameters.GetMatrixType(), xpetraMap, matrixParameters.GetParameterList()); - RCP xpetraA = Pr->BuildMatrix(); + RCP xpetraA = Pr->BuildMatrix(); - RCP A = MueLuUtilities::Op2NonConstTpetraCrs(xpetraA); - RCP map = MueLuUtilities::Map2TpetraMap(*xpetraMap); + RCP A = MueLuUtilities::Op2NonConstTpetraCrs(xpetraA); + RCP map = MueLuUtilities::Map2TpetraMap(*xpetraMap); // // Construct a multigrid preconditioner // - RCP M = MueLu::CreateTpetraPreconditioner( (RCP)A, mueluParams, Utilities::MV2NonConstTpetraMV(coordinates) ); - - + RCP M = MueLu::CreateTpetraPreconditioner((RCP)A, mueluParams, Utilities::MV2NonConstTpetraMV(coordinates)); - RCP X = rcp(new multivector_type(map,1)); - RCP B = rcp(new multivector_type(map,1)); + RCP X = rcp(new multivector_type(map, 1)); + RCP B = rcp(new multivector_type(map, 1)); - X->putScalar((scalar_type) 0.0); + X->putScalar((scalar_type)0.0); B->randomize(); RCP Problem = rcp(new linear_problem_type(A, X, B)); @@ -263,13 +255,13 @@ int main(int argc, char *argv[]) { // // Set up Krylov solver and iterate. // - + RCP belosList = rcp(new ParameterList()); - belosList->set("Maximum Iterations", maxIts); // Maximum number of iterations allowed - belosList->set("Convergence Tolerance", tol); // Relative convergence tolerance requested - belosList->set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); - belosList->set("Output Frequency", 1); - belosList->set("Output Style", Belos::Brief); + belosList->set("Maximum Iterations", maxIts); // Maximum number of iterations allowed + belosList->set("Convergence Tolerance", tol); // Relative convergence tolerance requested + belosList->set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); + belosList->set("Output Frequency", 1); + belosList->set("Output Style", Belos::Brief); belosList->set("Implicit Residual Scaling", "None"); RCP solver; if (krylovSolverType == "cg") @@ -285,8 +277,8 @@ int main(int argc, char *argv[]) { int numIterations = solver->getNumIters(); Teuchos::Array::magnitudeType> normVec(1); - multivector_type Ax(B->getMap(),1); - multivector_type residual(B->getMap(),1); + multivector_type Ax(B->getMap(), 1); + multivector_type residual(B->getMap(), 1); A->apply(*X, residual); residual.update(1.0, *B, -1.0); residual.norm2(normVec); @@ -294,6 +286,6 @@ int main(int argc, char *argv[]) { std::cout << "number of iterations = " << numIterations << std::endl; std::cout << "||Residual|| = " << normVec[0] << std::endl; } - + return EXIT_SUCCESS; } diff --git a/packages/muelu/research/max/AdditiveMG/Smooth_Prolongation.cpp b/packages/muelu/research/max/AdditiveMG/Smooth_Prolongation.cpp index 2b2e3148ee3d..50fef96d0c62 100644 --- a/packages/muelu/research/max/AdditiveMG/Smooth_Prolongation.cpp +++ b/packages/muelu/research/max/AdditiveMG/Smooth_Prolongation.cpp @@ -1,255 +1,234 @@ #include "Smooth_Prolongation.hpp" -namespace MueLu{ +namespace MueLu { -AdditiveVariant::AdditiveVariant( RCP A, RCP coords, DomainPartitioning domain) -{ +AdditiveVariant::AdditiveVariant(RCP A, RCP coords, DomainPartitioning domain) { + domain_ = domain; - domain_ = domain; + TEUCHOS_TEST_FOR_EXCEPT(!(GlobalComm_.is_null())); + GlobalComm_ = A->getComm(); + TEUCHOS_TEST_FOR_EXCEPT(GlobalComm_.is_null()); - TEUCHOS_TEST_FOR_EXCEPT( !( GlobalComm_.is_null() ) ); - GlobalComm_ = A->getComm(); - TEUCHOS_TEST_FOR_EXCEPT( GlobalComm_.is_null() ); + TEUCHOS_TEST_FOR_EXCEPT(!(coords_.is_null())); + coords_ = coords; + TEUCHOS_TEST_FOR_EXCEPT((coords_.is_null())); - TEUCHOS_TEST_FOR_EXCEPT( !( coords_.is_null() ) ); - coords_ = coords; - TEUCHOS_TEST_FOR_EXCEPT( ( coords_.is_null() ) ); + TEUCHOS_TEST_FOR_EXCEPT(!(DomainMap_.is_null())); + DomainMap_ = A->getDomainMap(); + TEUCHOS_TEST_FOR_EXCEPT(DomainMap_.is_null()); - TEUCHOS_TEST_FOR_EXCEPT( !( DomainMap_.is_null() ) ); - DomainMap_ = A->getDomainMap(); - TEUCHOS_TEST_FOR_EXCEPT( DomainMap_.is_null() ); + TEUCHOS_TEST_FOR_EXCEPT(!(RangeMap_.is_null())); + RangeMap_ = A->getRangeMap(); + TEUCHOS_TEST_FOR_EXCEPT(RangeMap_.is_null()); - TEUCHOS_TEST_FOR_EXCEPT( !( RangeMap_.is_null() ) ); - RangeMap_ = A->getRangeMap(); - TEUCHOS_TEST_FOR_EXCEPT( RangeMap_.is_null() ); - - TEUCHOS_TEST_FOR_EXCEPT( !( B_fine_.is_null() ) || !( B_coarse_.is_null() ) ); - AdditiveFineSmoother( A ); - AdditiveCoarseSolver( A ); - TEUCHOS_TEST_FOR_EXCEPT( B_fine_.is_null() || B_coarse_.is_null() ); - - /*TEUCHOS_TEST_FOR_EXCEPT( !( B_DD_.is_null() ) || !( B_coarse_.is_null() ) ); - AdditiveFineSmoother( A ); - AdditiveCoarseSolver( A ); - TEUCHOS_TEST_FOR_EXCEPT( B_DD_.is_null() || B_coarse_.is_null() ); - TEUCHOS_TEST_FOR_EXCEPT( !B_DD_->isInitialized() || !B_DD_->isComputed() );*/ + TEUCHOS_TEST_FOR_EXCEPT(!(B_fine_.is_null()) || !(B_coarse_.is_null())); + AdditiveFineSmoother(A); + AdditiveCoarseSolver(A); + TEUCHOS_TEST_FOR_EXCEPT(B_fine_.is_null() || B_coarse_.is_null()); + /*TEUCHOS_TEST_FOR_EXCEPT( !( B_DD_.is_null() ) || !( B_coarse_.is_null() ) ); + AdditiveFineSmoother( A ); + AdditiveCoarseSolver( A ); + TEUCHOS_TEST_FOR_EXCEPT( B_DD_.is_null() || B_coarse_.is_null() ); + TEUCHOS_TEST_FOR_EXCEPT( !B_DD_->isInitialized() || !B_DD_->isComputed() );*/ } - -void -AdditiveVariant::AdditiveFineSmoother( RCP A ) -{ - - //Creation of the MueLu list for the DD preconditioner - RCP dd_list = rcp(new Teuchos::ParameterList()); - dd_list->setName("MueLu"); - dd_list->set("verbosity", "low"); - dd_list->set("number of equations", 1); - dd_list->set("max levels", 1); - dd_list->set("coarse: type", "SCHWARZ"); //FOR A ONE LEVEL PRECONDITIONER THE COARSE LEVEL IS INTERPRETED AS SMOOTHING LEVEL - - ParameterList& dd_smooth_sublist = dd_list->sublist("coarse: params"); - dd_smooth_sublist.set("schwarz: overlap level", 0); - dd_smooth_sublist.set("schwarz: combine mode", "Zero"); - dd_smooth_sublist.set("subdomain solver name", "RILUK"); - - ParameterList& coarse_subdomain_solver = dd_smooth_sublist.sublist("subdomain solver parameters"); - coarse_subdomain_solver.set("fact: iluk level-of-fill", 3); - coarse_subdomain_solver.set("fact: absolute threshold", 0.); - coarse_subdomain_solver.set("fact: relative threshold", 1.); - coarse_subdomain_solver.set("fact: relax value", 0.); - - B_fine_ = CreateTpetraPreconditioner( (RCP)A, *dd_list ); - - /*B_DD_ = rcp( new Ifpack2::AdditiveSchwarz< row_matrix_type, precond_type >(A, 0) ); - B_DD_->initialize(); - B_DD_->compute();*/ - +void AdditiveVariant::AdditiveFineSmoother(RCP A) { + // Creation of the MueLu list for the DD preconditioner + RCP dd_list = rcp(new Teuchos::ParameterList()); + dd_list->setName("MueLu"); + dd_list->set("verbosity", "low"); + dd_list->set("number of equations", 1); + dd_list->set("max levels", 1); + dd_list->set("coarse: type", "SCHWARZ"); // FOR A ONE LEVEL PRECONDITIONER THE COARSE LEVEL IS INTERPRETED AS SMOOTHING LEVEL + + ParameterList& dd_smooth_sublist = dd_list->sublist("coarse: params"); + dd_smooth_sublist.set("schwarz: overlap level", 0); + dd_smooth_sublist.set("schwarz: combine mode", "Zero"); + dd_smooth_sublist.set("subdomain solver name", "RILUK"); + + ParameterList& coarse_subdomain_solver = dd_smooth_sublist.sublist("subdomain solver parameters"); + coarse_subdomain_solver.set("fact: iluk level-of-fill", 3); + coarse_subdomain_solver.set("fact: absolute threshold", 0.); + coarse_subdomain_solver.set("fact: relative threshold", 1.); + coarse_subdomain_solver.set("fact: relax value", 0.); + + B_fine_ = CreateTpetraPreconditioner((RCP)A, *dd_list); + + /*B_DD_ = rcp( new Ifpack2::AdditiveSchwarz< row_matrix_type, precond_type >(A, 0) ); + B_DD_->initialize(); + B_DD_->compute();*/ } +void AdditiveVariant::AdditiveCoarseSolver(RCP A) { + // Creation of the MueLu list for the DD preconditioner + RCP coarse_list = rcp(new Teuchos::ParameterList()); + coarse_list->setName("MueLu"); + coarse_list->set("verbosity", "low"); + coarse_list->set("number of equations", 1); + coarse_list->set("max levels", 2); + coarse_list->set("multigrid algorithm", "unsmoothed"); + coarse_list->set("aggregation: type", "brick"); + coarse_list->set("aggregation: brick x size", domain_.bricksize_x); + coarse_list->set("aggregation: brick y size", domain_.bricksize_y); + coarse_list->set("aggregation: brick z size", domain_.bricksize_z); + coarse_list->set("aggregation: drop scheme", "classical"); + coarse_list->set("smoother: pre or post", "none"); + coarse_list->set("repartition: enable", true); + coarse_list->set("repartition: partitioner", "zoltan"); + coarse_list->set("repartition: start level", 1); + coarse_list->set("repartition: min rows per proc", static_cast(A->getGlobalNumRows())); + coarse_list->set("repartition: max imbalance", 1.2); + coarse_list->set("repartition: remap parts", false); + coarse_list->set("coarse: type", "SCHWARZ"); -void -AdditiveVariant::AdditiveCoarseSolver( RCP A ) -{ - //Creation of the MueLu list for the DD preconditioner - RCP coarse_list = rcp(new Teuchos::ParameterList()); - coarse_list->setName("MueLu"); - coarse_list->set("verbosity", "low"); - coarse_list->set("number of equations", 1); - coarse_list->set("max levels", 2); - coarse_list->set("multigrid algorithm", "unsmoothed"); - coarse_list->set("aggregation: type", "brick"); - coarse_list->set("aggregation: brick x size", domain_.bricksize_x); - coarse_list->set("aggregation: brick y size", domain_.bricksize_y); - coarse_list->set("aggregation: brick z size", domain_.bricksize_z); - coarse_list->set("aggregation: drop scheme", "classical"); - coarse_list->set("smoother: pre or post", "none"); - coarse_list->set("repartition: enable", true); - coarse_list->set("repartition: partitioner", "zoltan"); - coarse_list->set("repartition: start level", 1); - coarse_list->set("repartition: min rows per proc", static_cast(A->getGlobalNumRows()) ); - coarse_list->set("repartition: max imbalance", 1.2); - coarse_list->set("repartition: remap parts", false); - coarse_list->set("coarse: type", "SCHWARZ"); - - //Creation of Sublist for smoother - ParameterList& coarse_smooth_sublist = coarse_list->sublist("coarse: params"); - coarse_smooth_sublist.set("schwarz: overlap level", 0); - coarse_smooth_sublist.set("schwarz: combine mode", "Zero"); - coarse_smooth_sublist.set("subdomain solver name", "RILUK"); - - //Creation of the sublist for the subdomain solver - ParameterList& coarse_subdomain_solver = coarse_smooth_sublist.sublist("subdomain solver parameters"); - coarse_subdomain_solver.set("fact: iluk level-of-fill", 3); - coarse_subdomain_solver.set("fact: absolute threshold", 0.); - coarse_subdomain_solver.set("fact: relative threshold", 1.); - coarse_subdomain_solver.set("fact: relax value", 0.); - - /*ParameterList& coarse_file_sublist = coarse_list->sublist("export data"); - coarse_file_sublist.set("A", "{0}"); - coarse_file_sublist.set("P", "{0,1}"); - coarse_file_sublist.set("R", "{1}");*/ - - //Manual set up of the prolongation and restriction - MueLu::ParameterListInterpreter mueLuFactory( *coarse_list ); - RCP > H = mueLuFactory.CreateHierarchy(); - H->setVerbLevel(Teuchos::VERB_HIGH); - RCP mueluA = MueLu::TpetraCrs_To_XpetraMatrix(A); - - RCP > coords = Xpetra::toXpetra( coords_ ); - - H->GetLevel(0)->Set("A", mueluA); - H->GetLevel(0)->Set("Coordinates", coords); - - // Multigrid setup phase - mueLuFactory.SetupHierarchy(*H); - - RCP L = H->GetLevel(1); - - RCP> prolong, restr; - - if (L->IsAvailable("P")) - prolong = L->template Get< RCP> >("P"); - - if (L->IsAvailable("R")) - restr = L->template Get< RCP> >("R"); - - RCP tpetra_prolong = MueLuUtilities::Op2NonConstTpetraCrs(prolong); - RCP tpetra_restr = MueLuUtilities::Op2NonConstTpetraCrs(restr); - - int mypid = GlobalComm_->getRank(); - GlobalComm_->barrier(); - - // We have to transform P into a condensed multivector - RCP identity_shrunk = rcp( new multivector_type(tpetra_prolong->getDomainMap(), 3) ); - Teuchos::ArrayView myIdentityGlobalElements = tpetra_prolong->getDomainMap()->getLocalElementList(); - typedef typename Teuchos::ArrayView::const_iterator iter_type; - - int my_color = (mypid-1)%3; - Teuchos::ArrayRCP localMV = identity_shrunk->getDataNonConst(my_color); - - for (iter_type it = myIdentityGlobalElements.begin(); it != myIdentityGlobalElements.end(); ++it) { - const local_ordinal_type i_local = *it; - const local_ordinal_type aux = identity_shrunk->getMap()->getLocalElement (i_local); - localMV[aux] = 1.0; - } - - RCP P_shrunk = rcp( new multivector_type(tpetra_prolong->getRangeMap(), 3) ); - tpetra_prolong->apply(*identity_shrunk, *P_shrunk); - RCP AP_shrunk = rcp( new multivector_type(A->getRangeMap(), 3) ); - A->apply(*P_shrunk, *AP_shrunk); - - TEUCHOS_TEST_FOR_EXCEPT( B_fine_.is_null() ); - - //======================================================================================================== - - // CREATION OF BAP - - RCP BAP_multivector = rcp(new multivector_type(B_fine_->getRangeMap(),AP_shrunk->getNumVectors())); - RCP BAP_shrunk = rcp(new multivector_type(B_fine_->getRangeMap(),AP_shrunk->getNumVectors())); - B_fine_->apply(*AP_shrunk, *BAP_shrunk, Teuchos::NO_TRANS, Teuchos::ScalarTraits< scalar_type >::one(), Teuchos::ScalarTraits< scalar_type >::zero()); - - //I just need this to generate the right colMap to populate BAP - RCP AP = rcp( new crs_matrix_type( tpetra_prolong->getRowMap(), tpetra_prolong->getColMap(), tpetra_prolong->getGlobalNumRows() ) ); - Tpetra::MatrixMatrix::Multiply(*A, false, *tpetra_prolong, false, *AP, true); - - GlobalComm_->barrier(); - - RCP BAP = rcp( new crs_matrix_type( tpetra_prolong->getRowMap(), AP->getColMap(), tpetra_prolong->getGlobalNumCols() ) ); - Teuchos::ArrayView myLocalElements = BAP->getRowMap()->getLocalElementList(); - - for(int color = 0; color<3; ++color) - { - Teuchos::ArrayRCP localBAP = BAP_shrunk->getData(color); - - for (iter_type it = myLocalElements.begin(); it != myLocalElements.end(); ++it) - { - const local_ordinal_type i_local = *it; - const local_ordinal_type aux = BAP->getRowMap()->getLocalElement (i_local); - - std::vector BAP_inds; - std::vector BAP_vals; - - local_ordinal_type aux2; + // Creation of Sublist for smoother + ParameterList& coarse_smooth_sublist = coarse_list->sublist("coarse: params"); + coarse_smooth_sublist.set("schwarz: overlap level", 0); + coarse_smooth_sublist.set("schwarz: combine mode", "Zero"); + coarse_smooth_sublist.set("subdomain solver name", "RILUK"); + + // Creation of the sublist for the subdomain solver + ParameterList& coarse_subdomain_solver = coarse_smooth_sublist.sublist("subdomain solver parameters"); + coarse_subdomain_solver.set("fact: iluk level-of-fill", 3); + coarse_subdomain_solver.set("fact: absolute threshold", 0.); + coarse_subdomain_solver.set("fact: relative threshold", 1.); + coarse_subdomain_solver.set("fact: relax value", 0.); + + /*ParameterList& coarse_file_sublist = coarse_list->sublist("export data"); +coarse_file_sublist.set("A", "{0}"); +coarse_file_sublist.set("P", "{0,1}"); +coarse_file_sublist.set("R", "{1}");*/ + + // Manual set up of the prolongation and restriction + MueLu::ParameterListInterpreter mueLuFactory(*coarse_list); + RCP> H = mueLuFactory.CreateHierarchy(); + H->setVerbLevel(Teuchos::VERB_HIGH); + RCP mueluA = MueLu::TpetraCrs_To_XpetraMatrix(A); + + RCP> coords = Xpetra::toXpetra(coords_); + + H->GetLevel(0)->Set("A", mueluA); + H->GetLevel(0)->Set("Coordinates", coords); + + // Multigrid setup phase + mueLuFactory.SetupHierarchy(*H); + + RCP L = H->GetLevel(1); + + RCP> prolong, restr; + + if (L->IsAvailable("P")) + prolong = L->template Get>>("P"); + + if (L->IsAvailable("R")) + restr = L->template Get>>("R"); + + RCP tpetra_prolong = MueLuUtilities::Op2NonConstTpetraCrs(prolong); + RCP tpetra_restr = MueLuUtilities::Op2NonConstTpetraCrs(restr); + + int mypid = GlobalComm_->getRank(); + GlobalComm_->barrier(); + + // We have to transform P into a condensed multivector + RCP identity_shrunk = rcp(new multivector_type(tpetra_prolong->getDomainMap(), 3)); + Teuchos::ArrayView myIdentityGlobalElements = tpetra_prolong->getDomainMap()->getLocalElementList(); + typedef typename Teuchos::ArrayView::const_iterator iter_type; + + int my_color = (mypid - 1) % 3; + Teuchos::ArrayRCP localMV = identity_shrunk->getDataNonConst(my_color); + + for (iter_type it = myIdentityGlobalElements.begin(); it != myIdentityGlobalElements.end(); ++it) { + const local_ordinal_type i_local = *it; + const local_ordinal_type aux = identity_shrunk->getMap()->getLocalElement(i_local); + localMV[aux] = 1.0; + } + + RCP P_shrunk = rcp(new multivector_type(tpetra_prolong->getRangeMap(), 3)); + tpetra_prolong->apply(*identity_shrunk, *P_shrunk); + RCP AP_shrunk = rcp(new multivector_type(A->getRangeMap(), 3)); + A->apply(*P_shrunk, *AP_shrunk); + + TEUCHOS_TEST_FOR_EXCEPT(B_fine_.is_null()); + + //======================================================================================================== + + // CREATION OF BAP + + RCP BAP_multivector = rcp(new multivector_type(B_fine_->getRangeMap(), AP_shrunk->getNumVectors())); + RCP BAP_shrunk = rcp(new multivector_type(B_fine_->getRangeMap(), AP_shrunk->getNumVectors())); + B_fine_->apply(*AP_shrunk, *BAP_shrunk, Teuchos::NO_TRANS, Teuchos::ScalarTraits::one(), Teuchos::ScalarTraits::zero()); + + // I just need this to generate the right colMap to populate BAP + RCP AP = rcp(new crs_matrix_type(tpetra_prolong->getRowMap(), tpetra_prolong->getColMap(), tpetra_prolong->getGlobalNumRows())); + Tpetra::MatrixMatrix::Multiply(*A, false, *tpetra_prolong, false, *AP, true); + + GlobalComm_->barrier(); + + RCP BAP = rcp(new crs_matrix_type(tpetra_prolong->getRowMap(), AP->getColMap(), tpetra_prolong->getGlobalNumCols())); + Teuchos::ArrayView myLocalElements = BAP->getRowMap()->getLocalElementList(); - if( (mypid-1)%3==color && (mypid-1)>= 0 && (mypid-1)getGlobalNumCols() ) - aux2 = BAP->getColMap()->getLocalElement (mypid-1); - else if( (mypid-2)%3==color && (mypid-2)>= 0 && (mypid-2)getGlobalNumCols() ) - aux2 = BAP->getColMap()->getLocalElement (mypid-2); - else if( (mypid)%3==color && (mypid)>= 0 && (mypid)getGlobalNumCols() ) - aux2 = BAP->getColMap()->getLocalElement (mypid); + for (int color = 0; color < 3; ++color) { + Teuchos::ArrayRCP localBAP = BAP_shrunk->getData(color); - if(aux2>=0) - { - BAP_inds.emplace_back(aux2); - BAP_vals.emplace_back(localBAP[aux]); - BAP->insertLocalValues(aux, BAP_inds, BAP_vals); - } - } - - } - BAP->fillComplete( tpetra_prolong->getDomainMap(), tpetra_prolong->getRangeMap() ); + for (iter_type it = myLocalElements.begin(); it != myLocalElements.end(); ++it) { + const local_ordinal_type i_local = *it; + const local_ordinal_type aux = BAP->getRowMap()->getLocalElement(i_local); - //============================================================================================================= + std::vector BAP_inds; + std::vector BAP_vals; - RCP Pbar = Tpetra::MatrixMatrix::add(1.0, false, *tpetra_prolong, -1.0, false, *BAP); - RCP mueluPbar = MueLu::TpetraCrs_To_XpetraMatrix(Pbar); + local_ordinal_type aux2; - H->GetLevel(1)->Set("Pbar", mueluPbar); + if ((mypid - 1) % 3 == color && (mypid - 1) >= 0 && (mypid - 1) < tpetra_prolong->getGlobalNumCols()) + aux2 = BAP->getColMap()->getLocalElement(mypid - 1); + else if ((mypid - 2) % 3 == color && (mypid - 2) >= 0 && (mypid - 2) < tpetra_prolong->getGlobalNumCols()) + aux2 = BAP->getColMap()->getLocalElement(mypid - 2); + else if ((mypid) % 3 == color && (mypid) >= 0 && (mypid) < tpetra_prolong->getGlobalNumCols()) + aux2 = BAP->getColMap()->getLocalElement(mypid); - H->IsPreconditioner(true); - B_coarse_ = rcp(new muelu_tpetra_operator_type(H)); + if (aux2 >= 0) { + BAP_inds.emplace_back(aux2); + BAP_vals.emplace_back(localBAP[aux]); + BAP->insertLocalValues(aux, BAP_inds, BAP_vals); + } + } + } + BAP->fillComplete(tpetra_prolong->getDomainMap(), tpetra_prolong->getRangeMap()); -} + //============================================================================================================= + RCP Pbar = Tpetra::MatrixMatrix::add(1.0, false, *tpetra_prolong, -1.0, false, *BAP); + RCP mueluPbar = MueLu::TpetraCrs_To_XpetraMatrix(Pbar); -void AdditiveVariant::apply( const multivector_type &r, multivector_type &Pr, Teuchos::ETransp mode = Teuchos::NO_TRANS, scalar_type alpha = Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta = Teuchos::ScalarTraits< scalar_type >::zero() ) const -{ + H->GetLevel(1)->Set("Pbar", mueluPbar); + H->IsPreconditioner(true); + B_coarse_ = rcp(new muelu_tpetra_operator_type(H)); +} - Tpetra::Export Export_fine1(r.getMap(), DomainMap_); - Tpetra::Export Export_fine2(RangeMap_, r.getMap()); +void AdditiveVariant::apply(const multivector_type& r, multivector_type& Pr, Teuchos::ETransp mode = Teuchos::NO_TRANS, scalar_type alpha = Teuchos::ScalarTraits::one(), scalar_type beta = Teuchos::ScalarTraits::zero()) const { + Tpetra::Export Export_fine1(r.getMap(), DomainMap_); + Tpetra::Export Export_fine2(RangeMap_, r.getMap()); - multivector_type r_fine(DomainMap_, 1); - r_fine.doImport(r, Export_fine1, Tpetra::INSERT); - multivector_type B_fine_Pr(RangeMap_,1); - B_fine_->apply(r_fine, B_fine_Pr, mode, alpha, beta); - multivector_type B_Pr1(r.getMap(),1); - B_Pr1.doImport(B_fine_Pr, Export_fine2, Tpetra::INSERT); + multivector_type r_fine(DomainMap_, 1); + r_fine.doImport(r, Export_fine1, Tpetra::INSERT); + multivector_type B_fine_Pr(RangeMap_, 1); + B_fine_->apply(r_fine, B_fine_Pr, mode, alpha, beta); + multivector_type B_Pr1(r.getMap(), 1); + B_Pr1.doImport(B_fine_Pr, Export_fine2, Tpetra::INSERT); - multivector_type r_coarse(DomainMap_, 1); - r_coarse.doImport(r, Export_fine1, Tpetra::INSERT); - multivector_type B_coarse_Pr(RangeMap_,1); - B_coarse_->apply(r_coarse, B_coarse_Pr, mode, alpha, beta); - multivector_type B_Pr2(r.getMap(),1); - B_Pr2.doImport(B_coarse_Pr, Export_fine2, Tpetra::INSERT); + multivector_type r_coarse(DomainMap_, 1); + r_coarse.doImport(r, Export_fine1, Tpetra::INSERT); + multivector_type B_coarse_Pr(RangeMap_, 1); + B_coarse_->apply(r_coarse, B_coarse_Pr, mode, alpha, beta); + multivector_type B_Pr2(r.getMap(), 1); + B_Pr2.doImport(B_coarse_Pr, Export_fine2, Tpetra::INSERT); - multivector_type B_Pr_sum(r.getMap(),1); - B_Pr_sum.update(1.0, B_Pr1, 1.0, B_Pr2, 0.0);//Careful to set the correct coefficients!!! + multivector_type B_Pr_sum(r.getMap(), 1); + B_Pr_sum.update(1.0, B_Pr1, 1.0, B_Pr2, 0.0); // Careful to set the correct coefficients!!! - Pr = B_Pr_sum; + Pr = B_Pr_sum; } - -} +} // namespace MueLu diff --git a/packages/muelu/research/max/AdditiveMG/Smooth_Prolongation.hpp b/packages/muelu/research/max/AdditiveMG/Smooth_Prolongation.hpp index de1ec4059d26..a2e478819805 100644 --- a/packages/muelu/research/max/AdditiveMG/Smooth_Prolongation.hpp +++ b/packages/muelu/research/max/AdditiveMG/Smooth_Prolongation.hpp @@ -17,81 +17,72 @@ #include -namespace MueLu -{ +namespace MueLu { // Define default types -typedef double scalar_type; -typedef int local_ordinal_type; -typedef int global_ordinal_type; +typedef double scalar_type; +typedef int local_ordinal_type; +typedef int global_ordinal_type; typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType node_type; -typedef Tpetra::Operator operator_type; +typedef Tpetra::Operator operator_type; -typedef Tpetra::CrsMatrix crs_matrix_type; -typedef Tpetra::RowMatrix row_matrix_type; -typedef Tpetra::MultiVector multivector_type; -typedef Tpetra::Map driver_map_type; +typedef Tpetra::CrsMatrix crs_matrix_type; +typedef Tpetra::RowMatrix row_matrix_type; +typedef Tpetra::MultiVector multivector_type; +typedef Tpetra::Map driver_map_type; typedef MueLu::TpetraOperator muelu_tpetra_operator_type; -typedef MueLu::Utilities MueLuUtilities; +typedef MueLu::Utilities MueLuUtilities; -typedef Xpetra::Matrix xpetra_matrix; +typedef Xpetra::Matrix xpetra_matrix; typedef Ifpack2::Preconditioner precond_type; +struct DomainPartitioning { + global_ordinal_type nx = 0; + global_ordinal_type ny = 0; + global_ordinal_type nz = 0; -struct DomainPartitioning{ - - global_ordinal_type nx = 0; - global_ordinal_type ny = 0; - global_ordinal_type nz = 0; - - global_ordinal_type bricksize_x = 0; - global_ordinal_type bricksize_y = 0; - global_ordinal_type bricksize_z = 0; - + global_ordinal_type bricksize_x = 0; + global_ordinal_type bricksize_y = 0; + global_ordinal_type bricksize_z = 0; }; +class AdditiveVariant : public operator_type { + public: + AdditiveVariant(RCP, RCP, DomainPartitioning); -class AdditiveVariant: public operator_type{ - - public: - - AdditiveVariant( RCP, RCP, DomainPartitioning ); - - void apply( const multivector_type &, multivector_type &, Teuchos::ETransp, scalar_type, scalar_type ) const; - - bool hasTransposeApply()const{return false;} - - RCP< const driver_map_type > getDomainMap() const{return DomainMap_;}; + void apply(const multivector_type &, multivector_type &, Teuchos::ETransp, scalar_type, scalar_type) const; - RCP< const driver_map_type > getRangeMap() const{return RangeMap_;}; + bool hasTransposeApply() const { return false; } - private: + RCP getDomainMap() const { return DomainMap_; }; - DomainPartitioning domain_; + RCP getRangeMap() const { return RangeMap_; }; - RCP< const Teuchos::Comm > GlobalComm_; + private: + DomainPartitioning domain_; - RCP< multivector_type > coords_; + RCP > GlobalComm_; - RCP< const driver_map_type > DomainMap_; + RCP coords_; - RCP< const driver_map_type > RangeMap_; + RCP DomainMap_; - //MueLu Preconditioner to store the smoother at the fine level - RCP B_fine_ = Teuchos::null; + RCP RangeMap_; - //RCP > B_DD_; + // MueLu Preconditioner to store the smoother at the fine level + RCP B_fine_ = Teuchos::null; - //MueLu Preconditioner to store - RCP B_coarse_ = Teuchos::null; + // RCP > B_DD_; - void AdditiveFineSmoother( RCP ); + // MueLu Preconditioner to store + RCP B_coarse_ = Teuchos::null; - void AdditiveCoarseSolver( RCP ); + void AdditiveFineSmoother(RCP); + void AdditiveCoarseSolver(RCP); }; -} +} // namespace MueLu diff --git a/packages/muelu/research/max/AdditiveMG/SmoothedAdditiveBricks.cpp b/packages/muelu/research/max/AdditiveMG/SmoothedAdditiveBricks.cpp index 3cc47a5d7616..edd37f3e5c90 100644 --- a/packages/muelu/research/max/AdditiveMG/SmoothedAdditiveBricks.cpp +++ b/packages/muelu/research/max/AdditiveMG/SmoothedAdditiveBricks.cpp @@ -77,7 +77,7 @@ extern clock_t Timers_Max[4]; #include #include -//ADR subdirectory +// ADR subdirectory #include "CreateADRMatrix.hpp" #include #include @@ -88,24 +88,23 @@ extern clock_t Timers_Max[4]; #include "BAP.hpp" #include "CreateBrickMap.hpp" -int main(int argc, char *argv[]) { - +int main(int argc, char* argv[]) { // Define default types - typedef double scalar_type; - typedef int local_ordinal_type; - typedef int global_ordinal_type; + typedef double scalar_type; + typedef int local_ordinal_type; + typedef int global_ordinal_type; typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType node_type; // Convenient typedef's - typedef Tpetra::Operator operator_type; - typedef Tpetra::CrsMatrix crs_matrix_type; - typedef Tpetra::RowMatrix row_matrix_type; - typedef Tpetra::Vector vector_type; - typedef Tpetra::MultiVector multivector_type; - typedef Tpetra::Map driver_map_type; + typedef Tpetra::Operator operator_type; + typedef Tpetra::CrsMatrix crs_matrix_type; + typedef Tpetra::RowMatrix row_matrix_type; + typedef Tpetra::Vector vector_type; + typedef Tpetra::MultiVector multivector_type; + typedef Tpetra::Map driver_map_type; typedef MueLu::TpetraOperator muelu_tpetra_operator_type; - typedef MueLu::Utilities MueLuUtilities; + typedef MueLu::Utilities MueLuUtilities; typedef Belos::LinearProblem linear_problem_type; typedef Belos::SolverManager belos_solver_manager_type; @@ -113,50 +112,50 @@ int main(int argc, char *argv[]) { typedef Belos::BlockGmresSolMgr belos_gmres_manager_type; typedef Belos::BiCGStabSolMgr belos_bicgstab_manager_type; - typedef Ifpack2::Preconditioner precond_type; + typedef Ifpack2::Preconditioner precond_type; - //MueLu_UseShortNames.hpp wants these typedefs. - typedef scalar_type Scalar; - typedef local_ordinal_type LocalOrdinal; + // MueLu_UseShortNames.hpp wants these typedefs. + typedef scalar_type Scalar; + typedef local_ordinal_type LocalOrdinal; typedef global_ordinal_type GlobalOrdinal; - typedef node_type Node; -# include + typedef node_type Node; +#include - typedef Galeri::Xpetra::Problem GaleriXpetraProblem; - typedef ADR::Xpetra::Problem ADRXpetraProblem; + typedef Galeri::Xpetra::Problem GaleriXpetraProblem; + typedef ADR::Xpetra::Problem ADRXpetraProblem; - using Teuchos::RCP; // reference count pointers - using Teuchos::rcp; // reference count pointers + using Teuchos::RCP; // reference count pointers + using Teuchos::rcp; // reference count pointers // // MPI initialization using Teuchos // Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL); - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); - int mypid = comm->getRank(); -/* - int subCommRank[3]={0,1,2}; - Teuchos::ArrayView arraySubCommRank(subCommRank, 3); - auto subComm = comm->createSubcommunicator(arraySubCommRank); -*/ + RCP> comm = Teuchos::DefaultComm::getComm(); + int mypid = comm->getRank(); + /* + int subCommRank[3]={0,1,2}; + Teuchos::ArrayView arraySubCommRank(subCommRank, 3); + auto subComm = comm->createSubcommunicator(arraySubCommRank); + */ Teuchos::CommandLineProcessor clp(false); - global_ordinal_type maxIts = 10000; - scalar_type tol = 1e-10; - std::string solverOptionsFile = "final_parser.xml"; - std::string krylovSolverType = "bicgstab"; + global_ordinal_type maxIts = 10000; + scalar_type tol = 1e-10; + std::string solverOptionsFile = "final_parser.xml"; + std::string krylovSolverType = "bicgstab"; - clp.setOption("xmlFile", &solverOptionsFile, "XML file containing MueLu solver parameters"); - clp.setOption("maxits", &maxIts, "maximum number of Krylov iterations"); - clp.setOption("tol", &tol, "tolerance for Krylov solver"); - clp.setOption("krylovType", &krylovSolverType, "cg or gmres solver"); + clp.setOption("xmlFile", &solverOptionsFile, "XML file containing MueLu solver parameters"); + clp.setOption("maxits", &maxIts, "maximum number of Krylov iterations"); + clp.setOption("tol", &tol, "tolerance for Krylov solver"); + clp.setOption("krylovType", &krylovSolverType, "cg or gmres solver"); switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } Teuchos::ParameterList xmlParams; @@ -167,7 +166,7 @@ int main(int argc, char *argv[]) { problemParams = xmlParams.sublist(static_cast("Problem")); // Problem definition - std::string problem_type = problemParams.get(static_cast("problem type")); + std::string problem_type = problemParams.get(static_cast("problem type")); // Parameters @@ -184,14 +183,13 @@ int main(int argc, char *argv[]) { MueLu::DomainPartitioning domain; int keep_boundary = 0; - Scalar stretchx = (Scalar) Lx/nx; - Scalar stretchy = (Scalar) Ly/ny; - Scalar stretchz = (Scalar) Lz/nz; + Scalar stretchx = (Scalar)Lx / nx; + Scalar stretchy = (Scalar)Ly / ny; + Scalar stretchz = (Scalar)Lz / nz; + ADR::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, problem_type, keep_boundary, stretchx, stretchy, stretchz); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra - ADR::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, problem_type, keep_boundary , stretchx, stretchy, stretchz); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra - if (xpetraParameters.GetLib() == Xpetra::UseEpetra) { throw std::invalid_argument("This example only supports Tpetra."); } @@ -204,59 +202,56 @@ int main(int argc, char *argv[]) { RCP xpetraMap; std::vector ind; - //BRICK SIZE + // BRICK SIZE int brick_sizex = mueluParams.get(static_cast("aggregation: brick x size")); int brick_sizey = mueluParams.get(static_cast("aggregation: brick y size")); int brick_sizez = mueluParams.get(static_cast("aggregation: brick z size")); - //Creation of the map where processor 0 gets nothing at the fine level - if( comm->getSize()>1 ) - { - if(problem_type == "ADR1D") - createBrickMap1D(matrixParameters.GetNumGlobalElements(), ind, comm ); - - else if(problem_type == "ADR2D") - createBrickMap2D( nx, brick_sizex, brick_sizey, ind, comm ); + // Creation of the map where processor 0 gets nothing at the fine level + if (comm->getSize() > 1) { + if (problem_type == "ADR1D") + createBrickMap1D(matrixParameters.GetNumGlobalElements(), ind, comm); - else if(problem_type == "ADR3D") - createBrickMap3D( nx, ny, brick_sizex, brick_sizey, brick_sizez, ind, comm ); + else if (problem_type == "ADR2D") + createBrickMap2D(nx, brick_sizex, brick_sizey, ind, comm); - ind.shrink_to_fit(); + else if (problem_type == "ADR3D") + createBrickMap3D(nx, ny, brick_sizex, brick_sizey, brick_sizez, ind, comm); - Teuchos::ArrayView elementList (ind); - xpetraMap = MapFactory::Build(Xpetra::UseTpetra,matrixParameters.GetNumGlobalElements(), elementList, indexBase, comm); - } - else if( comm->getSize()==1 ) - xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), indexBase, comm); + ind.shrink_to_fit(); + Teuchos::ArrayView elementList(ind); + xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), elementList, indexBase, comm); + } else if (comm->getSize() == 1) + xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), indexBase, comm); RCP coordinates; if (problem_type == "ADR1D") - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", xpetraMap, matrixParameters.GetParameterList()); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", xpetraMap, matrixParameters.GetParameterList()); else if (problem_type == "ADR2D") - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", xpetraMap, matrixParameters.GetParameterList()); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", xpetraMap, matrixParameters.GetParameterList()); else if (problem_type == "ADR3D") - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", xpetraMap, matrixParameters.GetParameterList()); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", xpetraMap, matrixParameters.GetParameterList()); - RCP Pr = ADR::Xpetra::BuildProblem(matrixParameters.GetMatrixType(), xpetraMap, matrixParameters.GetParameterList()); - RCP xpetraA = Pr->BuildMatrix(); + RCP Pr = ADR::Xpetra::BuildProblem(matrixParameters.GetMatrixType(), xpetraMap, matrixParameters.GetParameterList()); + RCP xpetraA = Pr->BuildMatrix(); - RCP A = MueLuUtilities::Op2NonConstTpetraCrs(xpetraA); + RCP A = MueLuUtilities::Op2NonConstTpetraCrs(xpetraA); RCP map = MueLuUtilities::Map2TpetraMap(*xpetraMap); - // =================================================== - // Domain Decomposition Preconditioner - // =================================== + // =================================================== + // Domain Decomposition Preconditioner + // =================================== - //Creation of the MueLu list for the DD preconditioner + // Creation of the MueLu list for the DD preconditioner RCP dd_list = rcp(new Teuchos::ParameterList()); dd_list->setName("MueLu"); dd_list->set("verbosity", "low"); dd_list->set("number of equations", 1); dd_list->set("max levels", 1); - dd_list->set("coarse: type", "SCHWARZ"); //FOR A ONE LEVEL PRECONDITIONER THE COARSE LEVEL IS INTERPRETED AS SMOOTHING LEVEL - + dd_list->set("coarse: type", "SCHWARZ"); // FOR A ONE LEVEL PRECONDITIONER THE COARSE LEVEL IS INTERPRETED AS SMOOTHING LEVEL + Teuchos::ParameterList& dd_smooth_sublist = dd_list->sublist("coarse: params"); dd_smooth_sublist.set("schwarz: overlap level", 0); dd_smooth_sublist.set("schwarz: combine mode", "Zero"); @@ -268,152 +263,137 @@ int main(int argc, char *argv[]) { coarse_subdomain_solver.set("fact: relative threshold", 1.); coarse_subdomain_solver.set("fact: relax value", 0.); - RCP B_DD = MueLu::CreateTpetraPreconditioner( (RCP)A, *dd_list ); + RCP B_DD = MueLu::CreateTpetraPreconditioner((RCP)A, *dd_list); - // =================================================== - // Multi Grid Preconditioner - // =================================== + // =================================================== + // Multi Grid Preconditioner + // =================================== RCP M; - //Manual set up of the prolongation and restriction - MueLu::ParameterListInterpreter mueLuFactory( mueluParams ); - RCP > H = mueLuFactory.CreateHierarchy(); + // Manual set up of the prolongation and restriction + MueLu::ParameterListInterpreter mueLuFactory(mueluParams); + RCP> H = mueLuFactory.CreateHierarchy(); H->setVerbLevel(Teuchos::VERB_HIGH); - + H->GetLevel(0)->Set("A", xpetraA); H->GetLevel(0)->Set("Coordinates", coordinates); // Multigrid setup phase - mueLuFactory.SetupHierarchy(*H); + mueLuFactory.SetupHierarchy(*H); RCP L = H->GetLevel(1); - RCP> prolong, restr; + RCP> prolong, restr; if (L->IsAvailable("P")) - prolong = L->template Get< RCP> >("P"); + prolong = L->template Get>>("P"); if (L->IsAvailable("R")) - restr = L->template Get< RCP> >("R"); + restr = L->template Get>>("R"); RCP tpetra_prolong = MueLuUtilities::Op2NonConstTpetraCrs(prolong); - RCP tpetra_restr = MueLuUtilities::Op2NonConstTpetraCrs(restr); + RCP tpetra_restr = MueLuUtilities::Op2NonConstTpetraCrs(restr); - #include - RCP PbarSetUp = Teuchos::TimeMonitor::getNewCounter("Pbar: SetUp"); +#include + RCP PbarSetUp = Teuchos::TimeMonitor::getNewCounter("Pbar: SetUp"); PbarSetUp->start(); RCP mueluPbar; - + // We have to transform P into a condensed multivector - RCP identity_shrunk = rcp( new multivector_type(tpetra_prolong->getDomainMap(), std::pow(3, coordinates->getNumVectors()) ) ); + RCP identity_shrunk = rcp(new multivector_type(tpetra_prolong->getDomainMap(), std::pow(3, coordinates->getNumVectors()))); Teuchos::ArrayView myIdentityGlobalElements = tpetra_prolong->getDomainMap()->getLocalElementList(); typedef typename Teuchos::ArrayView::const_iterator iter_type; - for(int trial = 1; trial<=number_runs; ++trial) - { - if(1 == coordinates->getNumVectors()) - { - - for(int j = 0; j<3; ++j) - { - int color = j; - - Teuchos::ArrayRCP localMV = identity_shrunk->getDataNonConst(color); - - for (iter_type it = myIdentityGlobalElements.begin(); it != myIdentityGlobalElements.end(); ++it) { - const local_ordinal_type i_local = *it; - const local_ordinal_type aux = identity_shrunk->getMap()->getLocalElement (i_local); - int local_color = (i_local)%3; - - if( local_color == color ) - localMV[aux] = 1.0; - - } - } - } - else if(2 == coordinates->getNumVectors()) - { - - for(int j = 0; j<9; ++j) - { - int color = j; - - Teuchos::ArrayRCP localMV = identity_shrunk->getDataNonConst(color); - - for (iter_type it = myIdentityGlobalElements.begin(); it != myIdentityGlobalElements.end(); ++it) { - const local_ordinal_type i_local = *it; - const local_ordinal_type aux = identity_shrunk->getMap()->getLocalElement (i_local); - const local_ordinal_type local_color = coloring2D( i_local+1, std::sqrt( tpetra_prolong->getGlobalNumCols() ) ); - - if( local_color == color ) - localMV[aux] = 1.0; - - } - } - } - else if(3 == coordinates->getNumVectors()) - { - //const local_ordinal_type local_color = coloring3D( mypid, std::cbrt( tpetra_prolong->getGlobalNumCols() ), std::cbrt( tpetra_prolong->getGlobalNumCols() ) ); - - for(int j = 0; j<27; ++j) - { - int color = j; - - Teuchos::ArrayRCP localMV = identity_shrunk->getDataNonConst(color); - - for (iter_type it = myIdentityGlobalElements.begin(); it != myIdentityGlobalElements.end(); ++it) { - const local_ordinal_type i_local = *it; - const local_ordinal_type aux = identity_shrunk->getMap()->getLocalElement (i_local); - const local_ordinal_type local_color = coloring3D( i_local+1, std::cbrt( tpetra_prolong->getGlobalNumCols() ), std::cbrt( tpetra_prolong->getGlobalNumCols() ) ); - - if( local_color == color ) - localMV[aux] = 1.0; - } - } - } - - - RCP P_shrunk = rcp( new multivector_type( tpetra_prolong->getRangeMap(), std::pow(3, coordinates->getNumVectors()) ) ); - tpetra_prolong->apply(*identity_shrunk, *P_shrunk); - RCP AP_shrunk = rcp( new multivector_type( A->getRangeMap(), std::pow(3, coordinates->getNumVectors()) ) ); - A->apply(*P_shrunk, *AP_shrunk); - - //======================================================================================================== - - // CREATION OF BAP - - RCP BAP_shrunk = rcp(new multivector_type(B_DD->getRangeMap(),AP_shrunk->getNumVectors())); - B_DD->apply(*AP_shrunk, *BAP_shrunk, Teuchos::NO_TRANS, Teuchos::ScalarTraits< scalar_type >::one(), Teuchos::ScalarTraits< scalar_type >::zero()); - - //Columns Map for BAP - std::vector indBAPcolMap; - if(1 == coordinates->getNumVectors()) - neighbours1D( tpetra_prolong, indBAPcolMap, comm ); - else if(2 == coordinates->getNumVectors()) - neighbours2D( tpetra_prolong, indBAPcolMap, comm, std::sqrt(tpetra_prolong->getGlobalNumCols()) ); - else if(3 == coordinates->getNumVectors()) - neighbours3D( tpetra_prolong, indBAPcolMap, comm, std::cbrt(tpetra_prolong->getGlobalNumCols()), std::cbrt(tpetra_prolong->getGlobalNumCols()) ); - - Teuchos::ArrayView elementListBAP (indBAPcolMap); - Teuchos::RCP< Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > BAPcolMap = rcp( new Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type >( static_cast(tpetra_prolong->getGlobalNumCols()), elementListBAP, indexBase, comm ) ); - - RCP BAP = rcp( new crs_matrix_type( tpetra_prolong->getRowMap(), BAPcolMap, tpetra_prolong->getGlobalNumCols() ) ); - Teuchos::ArrayView myLocalElements = BAP->getRowMap()->getLocalElementList(); - - - if(1 == coordinates->getNumVectors()) - BAP1D(BAP, tpetra_prolong, BAP_shrunk, comm); - else if(2 == coordinates->getNumVectors()) - BAP2D( BAP, tpetra_prolong, BAP_shrunk, comm, std::sqrt(tpetra_prolong->getGlobalNumCols()) ); - else if(3 == coordinates->getNumVectors()) - BAP3D( BAP, tpetra_prolong, BAP_shrunk, comm, std::cbrt(tpetra_prolong->getGlobalNumCols()), std::cbrt(tpetra_prolong->getGlobalNumCols()) ); - - BAP->fillComplete( tpetra_prolong->getDomainMap(), tpetra_prolong->getRangeMap() ); - -//============================================================================================================= - RCP Pbar = Tpetra::MatrixMatrix::add(1.0, false, *tpetra_prolong, -1.0, false, *BAP); - mueluPbar = MueLu::TpetraCrs_To_XpetraMatrix(Pbar); + for (int trial = 1; trial <= number_runs; ++trial) { + if (1 == coordinates->getNumVectors()) { + for (int j = 0; j < 3; ++j) { + int color = j; + + Teuchos::ArrayRCP localMV = identity_shrunk->getDataNonConst(color); + + for (iter_type it = myIdentityGlobalElements.begin(); it != myIdentityGlobalElements.end(); ++it) { + const local_ordinal_type i_local = *it; + const local_ordinal_type aux = identity_shrunk->getMap()->getLocalElement(i_local); + int local_color = (i_local) % 3; + + if (local_color == color) + localMV[aux] = 1.0; + } + } + } else if (2 == coordinates->getNumVectors()) { + for (int j = 0; j < 9; ++j) { + int color = j; + + Teuchos::ArrayRCP localMV = identity_shrunk->getDataNonConst(color); + + for (iter_type it = myIdentityGlobalElements.begin(); it != myIdentityGlobalElements.end(); ++it) { + const local_ordinal_type i_local = *it; + const local_ordinal_type aux = identity_shrunk->getMap()->getLocalElement(i_local); + const local_ordinal_type local_color = coloring2D(i_local + 1, std::sqrt(tpetra_prolong->getGlobalNumCols())); + + if (local_color == color) + localMV[aux] = 1.0; + } + } + } else if (3 == coordinates->getNumVectors()) { + // const local_ordinal_type local_color = coloring3D( mypid, std::cbrt( tpetra_prolong->getGlobalNumCols() ), std::cbrt( tpetra_prolong->getGlobalNumCols() ) ); + + for (int j = 0; j < 27; ++j) { + int color = j; + + Teuchos::ArrayRCP localMV = identity_shrunk->getDataNonConst(color); + + for (iter_type it = myIdentityGlobalElements.begin(); it != myIdentityGlobalElements.end(); ++it) { + const local_ordinal_type i_local = *it; + const local_ordinal_type aux = identity_shrunk->getMap()->getLocalElement(i_local); + const local_ordinal_type local_color = coloring3D(i_local + 1, std::cbrt(tpetra_prolong->getGlobalNumCols()), std::cbrt(tpetra_prolong->getGlobalNumCols())); + + if (local_color == color) + localMV[aux] = 1.0; + } + } + } + + RCP P_shrunk = rcp(new multivector_type(tpetra_prolong->getRangeMap(), std::pow(3, coordinates->getNumVectors()))); + tpetra_prolong->apply(*identity_shrunk, *P_shrunk); + RCP AP_shrunk = rcp(new multivector_type(A->getRangeMap(), std::pow(3, coordinates->getNumVectors()))); + A->apply(*P_shrunk, *AP_shrunk); + + //======================================================================================================== + + // CREATION OF BAP + + RCP BAP_shrunk = rcp(new multivector_type(B_DD->getRangeMap(), AP_shrunk->getNumVectors())); + B_DD->apply(*AP_shrunk, *BAP_shrunk, Teuchos::NO_TRANS, Teuchos::ScalarTraits::one(), Teuchos::ScalarTraits::zero()); + + // Columns Map for BAP + std::vector indBAPcolMap; + if (1 == coordinates->getNumVectors()) + neighbours1D(tpetra_prolong, indBAPcolMap, comm); + else if (2 == coordinates->getNumVectors()) + neighbours2D(tpetra_prolong, indBAPcolMap, comm, std::sqrt(tpetra_prolong->getGlobalNumCols())); + else if (3 == coordinates->getNumVectors()) + neighbours3D(tpetra_prolong, indBAPcolMap, comm, std::cbrt(tpetra_prolong->getGlobalNumCols()), std::cbrt(tpetra_prolong->getGlobalNumCols())); + + Teuchos::ArrayView elementListBAP(indBAPcolMap); + Teuchos::RCP> BAPcolMap = rcp(new Tpetra::Map(static_cast(tpetra_prolong->getGlobalNumCols()), elementListBAP, indexBase, comm)); + + RCP BAP = rcp(new crs_matrix_type(tpetra_prolong->getRowMap(), BAPcolMap, tpetra_prolong->getGlobalNumCols())); + Teuchos::ArrayView myLocalElements = BAP->getRowMap()->getLocalElementList(); + + if (1 == coordinates->getNumVectors()) + BAP1D(BAP, tpetra_prolong, BAP_shrunk, comm); + else if (2 == coordinates->getNumVectors()) + BAP2D(BAP, tpetra_prolong, BAP_shrunk, comm, std::sqrt(tpetra_prolong->getGlobalNumCols())); + else if (3 == coordinates->getNumVectors()) + BAP3D(BAP, tpetra_prolong, BAP_shrunk, comm, std::cbrt(tpetra_prolong->getGlobalNumCols()), std::cbrt(tpetra_prolong->getGlobalNumCols())); + + BAP->fillComplete(tpetra_prolong->getDomainMap(), tpetra_prolong->getRangeMap()); + + //============================================================================================================= + RCP Pbar = Tpetra::MatrixMatrix::add(1.0, false, *tpetra_prolong, -1.0, false, *BAP); + mueluPbar = MueLu::TpetraCrs_To_XpetraMatrix(Pbar); } PbarSetUp->stop(); @@ -423,28 +403,28 @@ int main(int argc, char *argv[]) { M = rcp(new muelu_tpetra_operator_type(H)); - //Intermediate print before zeroing out the global timers (needed to split set up timing and solve timing) + // Intermediate print before zeroing out the global timers (needed to split set up timing and solve timing) Teuchos::TimeMonitor::summarize(); Teuchos::TimeMonitor::zeroOutTimers(); //=============================================================================================================== // Linear Solver - RCP X_muelu = rcp(new multivector_type(map,1)); - RCP B = rcp(new multivector_type(map,1)); + RCP X_muelu = rcp(new multivector_type(map, 1)); + RCP B = rcp(new multivector_type(map, 1)); RCP Problem_muelu; - X_muelu->putScalar((scalar_type) 0.0); + X_muelu->putScalar((scalar_type)0.0); B->randomize(); Problem_muelu = rcp(new linear_problem_type(A, X_muelu, B)); RCP belosList = rcp(new Teuchos::ParameterList()); - belosList->set("Maximum Iterations", maxIts); // Maximum number of iterations allowed - belosList->set("Convergence Tolerance", tol); // Relative convergence tolerance requested - //belosList->set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); - belosList->set("Verbosity", Belos::Errors); - belosList->set("Output Frequency", 1); - belosList->set("Output Style", Belos::Brief); + belosList->set("Maximum Iterations", maxIts); // Maximum number of iterations allowed + belosList->set("Convergence Tolerance", tol); // Relative convergence tolerance requested + // belosList->set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); + belosList->set("Verbosity", Belos::Errors); + belosList->set("Output Frequency", 1); + belosList->set("Output Style", Belos::Brief); belosList->set("Implicit Residual Scaling", "None"); RCP solver; if (krylovSolverType == "cg") @@ -456,64 +436,62 @@ int main(int argc, char *argv[]) { else throw std::invalid_argument("bad Krylov solver type"); -for(int trial = 1; trial<=number_runs; ++trial) -{ - - X_muelu->putScalar((scalar_type) 0.0); - B->randomize(); - - // - // Set up Krylov solver and iterate. - // - - Problem_muelu = rcp(new linear_problem_type(A, X_muelu, B)); - Problem_muelu->setRightPrec(M); - Problem_muelu->setProblem(); - - solver->setProblem(Problem_muelu); - solver->solve(); - int numIterations_muelu = solver->getNumIters(); - - Teuchos::Array::magnitudeType> normVec_muelu(1); - multivector_type residual_muelu(B->getMap(),1); - A->apply(*X_muelu, residual_muelu); - residual_muelu.update(1.0, *B, -1.0); - residual_muelu.norm2(normVec_muelu); - if (mypid == 0) { - std::cout << "number of iterations with MueLu preconditioner= " << numIterations_muelu << std::endl; - std::cout << "||Residual|| = " << normVec_muelu[0] << std::endl; - } -} + for (int trial = 1; trial <= number_runs; ++trial) { + X_muelu->putScalar((scalar_type)0.0); + B->randomize(); + + // + // Set up Krylov solver and iterate. + // + + Problem_muelu = rcp(new linear_problem_type(A, X_muelu, B)); + Problem_muelu->setRightPrec(M); + Problem_muelu->setProblem(); + + solver->setProblem(Problem_muelu); + solver->solve(); + int numIterations_muelu = solver->getNumIters(); + + Teuchos::Array::magnitudeType> normVec_muelu(1); + multivector_type residual_muelu(B->getMap(), 1); + A->apply(*X_muelu, residual_muelu); + residual_muelu.update(1.0, *B, -1.0); + residual_muelu.norm2(normVec_muelu); + if (mypid == 0) { + std::cout << "number of iterations with MueLu preconditioner= " << numIterations_muelu << std::endl; + std::cout << "||Residual|| = " << normVec_muelu[0] << std::endl; + } + } - Teuchos::TimeMonitor::summarize (); -/* - //Row Map for Timers vectors - std::vector indTimerMap; - - indTimerMap.emplace_back(mypid); - - Teuchos::ArrayView elementListTimer (indTimerMap); - Teuchos::RCP< Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > TimerMap = rcp( new Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type >( static_cast(comm->getSize()), elementListTimer, indexBase, comm ) ); - - RCP TimerRestr = rcp(new multivector_type(TimerMap,1)); - RCP TimerProlong = rcp(new multivector_type(TimerMap,1)); - RCP TimerFine = rcp(new multivector_type(TimerMap,1)); - RCP TimerCoarse = rcp(new multivector_type(TimerMap,1)); - - Teuchos::ArrayRCP localMV = TimerRestr->getDataNonConst(0); - const local_ordinal_type aux = TimerMap->getLocalElement (mypid); - localMV[aux] = (static_cast(Timers_Max[0])/CLOCKS_PER_SEC/number_runs); - localMV = TimerProlong->getDataNonConst(0); - localMV[aux] = (static_cast(Timers_Max[1])/CLOCKS_PER_SEC/number_runs); - localMV = TimerFine->getDataNonConst(0); - localMV[aux] = (static_cast(Timers_Max[2])/CLOCKS_PER_SEC/number_runs); - localMV = TimerCoarse->getDataNonConst(0); - localMV[aux] = (static_cast(Timers_Max[3])/CLOCKS_PER_SEC/number_runs); - - Tpetra::MatrixMarket::Writer::writeDenseFile("TimeRestr.mtx", TimerRestr); - Tpetra::MatrixMarket::Writer::writeDenseFile("TimeProlong.mtx", TimerProlong); - Tpetra::MatrixMarket::Writer::writeDenseFile("TimeFine.mtx", TimerFine); - Tpetra::MatrixMarket::Writer::writeDenseFile("TimeCoarse.mtx", TimerCoarse); -*/ + Teuchos::TimeMonitor::summarize(); + /* + //Row Map for Timers vectors + std::vector indTimerMap; + + indTimerMap.emplace_back(mypid); + + Teuchos::ArrayView elementListTimer (indTimerMap); + Teuchos::RCP< Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > TimerMap = rcp( new Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type >( static_cast(comm->getSize()), elementListTimer, indexBase, comm ) ); + + RCP TimerRestr = rcp(new multivector_type(TimerMap,1)); + RCP TimerProlong = rcp(new multivector_type(TimerMap,1)); + RCP TimerFine = rcp(new multivector_type(TimerMap,1)); + RCP TimerCoarse = rcp(new multivector_type(TimerMap,1)); + + Teuchos::ArrayRCP localMV = TimerRestr->getDataNonConst(0); + const local_ordinal_type aux = TimerMap->getLocalElement (mypid); + localMV[aux] = (static_cast(Timers_Max[0])/CLOCKS_PER_SEC/number_runs); + localMV = TimerProlong->getDataNonConst(0); + localMV[aux] = (static_cast(Timers_Max[1])/CLOCKS_PER_SEC/number_runs); + localMV = TimerFine->getDataNonConst(0); + localMV[aux] = (static_cast(Timers_Max[2])/CLOCKS_PER_SEC/number_runs); + localMV = TimerCoarse->getDataNonConst(0); + localMV[aux] = (static_cast(Timers_Max[3])/CLOCKS_PER_SEC/number_runs); + + Tpetra::MatrixMarket::Writer::writeDenseFile("TimeRestr.mtx", TimerRestr); + Tpetra::MatrixMarket::Writer::writeDenseFile("TimeProlong.mtx", TimerProlong); + Tpetra::MatrixMarket::Writer::writeDenseFile("TimeFine.mtx", TimerFine); + Tpetra::MatrixMarket::Writer::writeDenseFile("TimeCoarse.mtx", TimerCoarse); + */ return EXIT_SUCCESS; } diff --git a/packages/muelu/research/max/AdditiveMG/SmoothedAdditiveStride.cpp b/packages/muelu/research/max/AdditiveMG/SmoothedAdditiveStride.cpp index 903af95b2380..40e12892d214 100644 --- a/packages/muelu/research/max/AdditiveMG/SmoothedAdditiveStride.cpp +++ b/packages/muelu/research/max/AdditiveMG/SmoothedAdditiveStride.cpp @@ -61,7 +61,6 @@ #include #include - // Belos provides Krylov solvers #include #include @@ -77,7 +76,7 @@ #include #include -//ADR subdirectory +// ADR subdirectory #include "CreateADRMatrix.hpp" #include #include @@ -87,24 +86,23 @@ //#include "coloring.hpp" #include "BAP.hpp" -int main(int argc, char *argv[]) { - +int main(int argc, char* argv[]) { // Define default types - typedef double scalar_type; - typedef int local_ordinal_type; - typedef int global_ordinal_type; + typedef double scalar_type; + typedef int local_ordinal_type; + typedef int global_ordinal_type; typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType node_type; // Convenient typedef's - typedef Tpetra::Operator operator_type; - typedef Tpetra::CrsMatrix crs_matrix_type; - typedef Tpetra::RowMatrix row_matrix_type; - typedef Tpetra::Vector vector_type; - typedef Tpetra::MultiVector multivector_type; - typedef Tpetra::Map driver_map_type; + typedef Tpetra::Operator operator_type; + typedef Tpetra::CrsMatrix crs_matrix_type; + typedef Tpetra::RowMatrix row_matrix_type; + typedef Tpetra::Vector vector_type; + typedef Tpetra::MultiVector multivector_type; + typedef Tpetra::Map driver_map_type; typedef MueLu::TpetraOperator muelu_tpetra_operator_type; - typedef MueLu::Utilities MueLuUtilities; + typedef MueLu::Utilities MueLuUtilities; typedef Belos::LinearProblem linear_problem_type; typedef Belos::SolverManager belos_solver_manager_type; @@ -112,50 +110,50 @@ int main(int argc, char *argv[]) { typedef Belos::BlockGmresSolMgr belos_gmres_manager_type; typedef Belos::BiCGStabSolMgr belos_bicgstab_manager_type; - typedef Ifpack2::Preconditioner precond_type; + typedef Ifpack2::Preconditioner precond_type; - //MueLu_UseShortNames.hpp wants these typedefs. - typedef scalar_type Scalar; - typedef local_ordinal_type LocalOrdinal; + // MueLu_UseShortNames.hpp wants these typedefs. + typedef scalar_type Scalar; + typedef local_ordinal_type LocalOrdinal; typedef global_ordinal_type GlobalOrdinal; - typedef node_type Node; -# include + typedef node_type Node; +#include - typedef Galeri::Xpetra::Problem GaleriXpetraProblem; - typedef ADR::Xpetra::Problem ADRXpetraProblem; + typedef Galeri::Xpetra::Problem GaleriXpetraProblem; + typedef ADR::Xpetra::Problem ADRXpetraProblem; - using Teuchos::RCP; // reference count pointers - using Teuchos::rcp; // reference count pointers + using Teuchos::RCP; // reference count pointers + using Teuchos::rcp; // reference count pointers // // MPI initialization using Teuchos // Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL); - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); - int mypid = comm->getRank(); -/* - int subCommRank[3]={0,1,2}; - Teuchos::ArrayView arraySubCommRank(subCommRank, 3); - auto subComm = comm->createSubcommunicator(arraySubCommRank); -*/ + RCP> comm = Teuchos::DefaultComm::getComm(); + int mypid = comm->getRank(); + /* + int subCommRank[3]={0,1,2}; + Teuchos::ArrayView arraySubCommRank(subCommRank, 3); + auto subComm = comm->createSubcommunicator(arraySubCommRank); + */ Teuchos::CommandLineProcessor clp(false); - global_ordinal_type maxIts = 10000; - scalar_type tol = 1e-10; - std::string solverOptionsFile = "final_parser.xml"; - std::string krylovSolverType = "bicgstab"; + global_ordinal_type maxIts = 10000; + scalar_type tol = 1e-10; + std::string solverOptionsFile = "final_parser.xml"; + std::string krylovSolverType = "bicgstab"; - clp.setOption("xmlFile", &solverOptionsFile, "XML file containing MueLu solver parameters"); - clp.setOption("maxits", &maxIts, "maximum number of Krylov iterations"); - clp.setOption("tol", &tol, "tolerance for Krylov solver"); - clp.setOption("krylovType", &krylovSolverType, "cg or gmres solver"); + clp.setOption("xmlFile", &solverOptionsFile, "XML file containing MueLu solver parameters"); + clp.setOption("maxits", &maxIts, "maximum number of Krylov iterations"); + clp.setOption("tol", &tol, "tolerance for Krylov solver"); + clp.setOption("krylovType", &krylovSolverType, "cg or gmres solver"); switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } Teuchos::ParameterList xmlParams; @@ -166,7 +164,7 @@ int main(int argc, char *argv[]) { problemParams = xmlParams.sublist(static_cast("Problem")); // Problem definition - std::string problem_type = problemParams.get(static_cast("problem type")); + std::string problem_type = problemParams.get(static_cast("problem type")); // Parameters @@ -183,14 +181,12 @@ int main(int argc, char *argv[]) { MueLu::DomainPartitioning domain; int keep_boundary = 0; - Scalar stretchx = (Scalar) Lx/nx; - Scalar stretchy = (Scalar) Ly/ny; - Scalar stretchz = (Scalar) Lz/nz; + Scalar stretchx = (Scalar)Lx / nx; + Scalar stretchy = (Scalar)Ly / ny; + Scalar stretchz = (Scalar)Lz / nz; - - ADR::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, problem_type, keep_boundary , stretchx, stretchy, stretchz); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra - + ADR::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, problem_type, keep_boundary, stretchx, stretchy, stretchz); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra // // Construct the problem @@ -200,54 +196,51 @@ int main(int argc, char *argv[]) { RCP xpetraMap; std::vector ind; - //Creation of the map where processor 0 gets nothing at the fine level - if( comm->getSize()>1 ) - { - ind.reserve (static_cast( matrixParameters.GetNumGlobalElements()/( comm->getSize()-1 ) + 1 )); - if( mypid !=0 && mypid!=comm->getSize()-1 ) - for(int i = 0; i <= ( static_cast(matrixParameters.GetNumGlobalElements()/( comm->getSize()-1 ))) - 1; ++i) - ind.emplace_back( (mypid-1) * static_cast(matrixParameters.GetNumGlobalElements()/( comm->getSize()-1 )) + i ); - - if( mypid==comm->getSize()-1 ) - for(int i = (mypid-1) * static_cast(matrixParameters.GetNumGlobalElements()/( comm->getSize()-1 )); i != matrixParameters.GetNumGlobalElements(); ++i) - ind.emplace_back( i ); + // Creation of the map where processor 0 gets nothing at the fine level + if (comm->getSize() > 1) { + ind.reserve(static_cast(matrixParameters.GetNumGlobalElements() / (comm->getSize() - 1) + 1)); + if (mypid != 0 && mypid != comm->getSize() - 1) + for (int i = 0; i <= (static_cast(matrixParameters.GetNumGlobalElements() / (comm->getSize() - 1))) - 1; ++i) + ind.emplace_back((mypid - 1) * static_cast(matrixParameters.GetNumGlobalElements() / (comm->getSize() - 1)) + i); - ind.shrink_to_fit(); + if (mypid == comm->getSize() - 1) + for (int i = (mypid - 1) * static_cast(matrixParameters.GetNumGlobalElements() / (comm->getSize() - 1)); i != matrixParameters.GetNumGlobalElements(); ++i) + ind.emplace_back(i); + ind.shrink_to_fit(); - Teuchos::ArrayView elementList (ind); - xpetraMap = MapFactory::Build(Xpetra::UseTpetra,matrixParameters.GetNumGlobalElements(), elementList, indexBase, comm); - } - else if( comm->getSize()==1 ) - xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), indexBase, comm); + Teuchos::ArrayView elementList(ind); + xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), elementList, indexBase, comm); + } else if (comm->getSize() == 1) + xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), indexBase, comm); RCP coordinates; if (problem_type == "ADR1D") - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", xpetraMap, matrixParameters.GetParameterList()); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", xpetraMap, matrixParameters.GetParameterList()); else if (problem_type == "ADR2D") - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", xpetraMap, matrixParameters.GetParameterList()); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", xpetraMap, matrixParameters.GetParameterList()); else if (problem_type == "ADR3D") - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", xpetraMap, matrixParameters.GetParameterList()); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", xpetraMap, matrixParameters.GetParameterList()); RCP Pr = ADR::Xpetra::BuildProblem(matrixParameters.GetMatrixType(), xpetraMap, matrixParameters.GetParameterList()); - RCP xpetraA = Pr->BuildMatrix(); + RCP xpetraA = Pr->BuildMatrix(); - RCP A = MueLuUtilities::Op2NonConstTpetraCrs(xpetraA); + RCP A = MueLuUtilities::Op2NonConstTpetraCrs(xpetraA); RCP map = MueLuUtilities::Map2TpetraMap(*xpetraMap); - // =================================================== - // Domain Decomposition Preconditioner - // =================================== + // =================================================== + // Domain Decomposition Preconditioner + // =================================== - //Creation of the MueLu list for the DD preconditioner + // Creation of the MueLu list for the DD preconditioner RCP dd_list = rcp(new Teuchos::ParameterList()); dd_list->setName("MueLu"); dd_list->set("verbosity", "low"); dd_list->set("number of equations", 1); dd_list->set("max levels", 1); - dd_list->set("coarse: type", "SCHWARZ"); //FOR A ONE LEVEL PRECONDITIONER THE COARSE LEVEL IS INTERPRETED AS SMOOTHING LEVEL - + dd_list->set("coarse: type", "SCHWARZ"); // FOR A ONE LEVEL PRECONDITIONER THE COARSE LEVEL IS INTERPRETED AS SMOOTHING LEVEL + Teuchos::ParameterList& dd_smooth_sublist = dd_list->sublist("coarse: params"); dd_smooth_sublist.set("schwarz: overlap level", 0); dd_smooth_sublist.set("schwarz: combine mode", "Zero"); @@ -259,157 +252,150 @@ int main(int argc, char *argv[]) { coarse_subdomain_solver.set("fact: relative threshold", 1.); coarse_subdomain_solver.set("fact: relax value", 0.); - RCP B_DD = MueLu::CreateTpetraPreconditioner( (RCP)A, *dd_list ); + RCP B_DD = MueLu::CreateTpetraPreconditioner((RCP)A, *dd_list); - // =================================================== - // Multi Grid Preconditioner - // =================================== + // =================================================== + // Multi Grid Preconditioner + // =================================== RCP M; - //Manual set up of the prolongation and restriction - MueLu::ParameterListInterpreter mueLuFactory( mueluParams ); - RCP > H = mueLuFactory.CreateHierarchy(); + // Manual set up of the prolongation and restriction + MueLu::ParameterListInterpreter mueLuFactory(mueluParams); + RCP> H = mueLuFactory.CreateHierarchy(); H->setVerbLevel(Teuchos::VERB_HIGH); - + H->GetLevel(0)->Set("A", xpetraA); H->GetLevel(0)->Set("Coordinates", coordinates); // Multigrid setup phase - mueLuFactory.SetupHierarchy(*H); + mueLuFactory.SetupHierarchy(*H); RCP L = H->GetLevel(1); - RCP> prolong, restr; + RCP> prolong, restr; if (L->IsAvailable("P")) - prolong = L->template Get< RCP> >("P"); + prolong = L->template Get>>("P"); if (L->IsAvailable("R")) - restr = L->template Get< RCP> >("R"); + restr = L->template Get>>("R"); RCP tpetra_prolong = MueLuUtilities::Op2NonConstTpetraCrs(prolong); - RCP tpetra_restr = MueLuUtilities::Op2NonConstTpetraCrs(restr); + RCP tpetra_restr = MueLuUtilities::Op2NonConstTpetraCrs(restr); - Tpetra::MatrixMarket::Writer::writeSparseFile("P.mtx", tpetra_prolong); //Auxiliary prints introduced to generate pictures + Tpetra::MatrixMarket::Writer::writeSparseFile("P.mtx", tpetra_prolong); // Auxiliary prints introduced to generate pictures RCP mueluPbar; // We have to transform P into a condensed multivector - RCP identity_shrunk = rcp( new multivector_type(tpetra_prolong->getDomainMap(), 3) ); + RCP identity_shrunk = rcp(new multivector_type(tpetra_prolong->getDomainMap(), 3)); Teuchos::ArrayView myIdentityGlobalElements = tpetra_prolong->getDomainMap()->getLocalElementList(); typedef typename Teuchos::ArrayView::const_iterator iter_type; -for(int trial = 1; trial<=number_runs; ++trial) -{ - for(int j = 0; jgetGlobalNumCols(); ++j) - { - int color = j%3; - - Teuchos::ArrayRCP localMV = identity_shrunk->getDataNonConst(color); - - for (iter_type it = myIdentityGlobalElements.begin(); it != myIdentityGlobalElements.end(); ++it) { - const local_ordinal_type i_local = *it; - const local_ordinal_type aux = identity_shrunk->getMap()->getLocalElement (i_local); - - if(i_local%3 == color) - localMV[aux] = 1.0; - - } + for (int trial = 1; trial <= number_runs; ++trial) { + for (int j = 0; j < tpetra_prolong->getGlobalNumCols(); ++j) { + int color = j % 3; + + Teuchos::ArrayRCP localMV = identity_shrunk->getDataNonConst(color); + + for (iter_type it = myIdentityGlobalElements.begin(); it != myIdentityGlobalElements.end(); ++it) { + const local_ordinal_type i_local = *it; + const local_ordinal_type aux = identity_shrunk->getMap()->getLocalElement(i_local); + + if (i_local % 3 == color) + localMV[aux] = 1.0; + } } - RCP P_shrunk = rcp( new multivector_type(tpetra_prolong->getRangeMap(), 3) ); - tpetra_prolong->apply(*identity_shrunk, *P_shrunk); - RCP AP_shrunk = rcp( new multivector_type(A->getRangeMap(), 3) ); - A->apply(*P_shrunk, *AP_shrunk); + RCP P_shrunk = rcp(new multivector_type(tpetra_prolong->getRangeMap(), 3)); + tpetra_prolong->apply(*identity_shrunk, *P_shrunk); + RCP AP_shrunk = rcp(new multivector_type(A->getRangeMap(), 3)); + A->apply(*P_shrunk, *AP_shrunk); -//======================================================================================================== + //======================================================================================================== -// CREATION OF BAP + // CREATION OF BAP - RCP BAP_multivector = rcp(new multivector_type(B_DD->getRangeMap(),AP_shrunk->getNumVectors())); - RCP BAP_shrunk = rcp(new multivector_type(B_DD->getRangeMap(),AP_shrunk->getNumVectors())); - B_DD->apply(*AP_shrunk, *BAP_shrunk, Teuchos::NO_TRANS, Teuchos::ScalarTraits< scalar_type >::one(), Teuchos::ScalarTraits< scalar_type >::zero()); + RCP BAP_multivector = rcp(new multivector_type(B_DD->getRangeMap(), AP_shrunk->getNumVectors())); + RCP BAP_shrunk = rcp(new multivector_type(B_DD->getRangeMap(), AP_shrunk->getNumVectors())); + B_DD->apply(*AP_shrunk, *BAP_shrunk, Teuchos::NO_TRANS, Teuchos::ScalarTraits::one(), Teuchos::ScalarTraits::zero()); - //I just need this to generate the right colMap to populate BAP - RCP AP = rcp( new crs_matrix_type( tpetra_prolong->getRowMap(), tpetra_prolong->getColMap(), tpetra_prolong->getGlobalNumRows() ) ); - Tpetra::MatrixMatrix::Multiply(*A, false, *tpetra_prolong, false, *AP, true); + // I just need this to generate the right colMap to populate BAP + RCP AP = rcp(new crs_matrix_type(tpetra_prolong->getRowMap(), tpetra_prolong->getColMap(), tpetra_prolong->getGlobalNumRows())); + Tpetra::MatrixMatrix::Multiply(*A, false, *tpetra_prolong, false, *AP, true); - //Columns Map for BAP - std::vector indBAPcolMap; - for(int i = 0; i(tpetra_prolong->getGlobalNumCols()); ++i) - indBAPcolMap.emplace_back(i); + // Columns Map for BAP + std::vector indBAPcolMap; + for (int i = 0; i < static_cast(tpetra_prolong->getGlobalNumCols()); ++i) + indBAPcolMap.emplace_back(i); - Teuchos::ArrayView elementListBAP (indBAPcolMap); - Teuchos::RCP< Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > BAPcolMap = rcp( new Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type >( static_cast(tpetra_prolong->getGlobalNumCols()), elementListBAP, indexBase, comm ) ); + Teuchos::ArrayView elementListBAP(indBAPcolMap); + Teuchos::RCP> BAPcolMap = rcp(new Tpetra::Map(static_cast(tpetra_prolong->getGlobalNumCols()), elementListBAP, indexBase, comm)); - RCP BAP = rcp( new crs_matrix_type( tpetra_prolong->getRowMap(), BAPcolMap, tpetra_prolong->getGlobalNumCols() ) ); - Teuchos::ArrayView myLocalElements = BAP->getRowMap()->getLocalElementList(); + RCP BAP = rcp(new crs_matrix_type(tpetra_prolong->getRowMap(), BAPcolMap, tpetra_prolong->getGlobalNumCols())); + Teuchos::ArrayView myLocalElements = BAP->getRowMap()->getLocalElementList(); - for(int color = 0; color<3; ++color) - { + for (int color = 0; color < 3; ++color) { Teuchos::ArrayRCP localBAP = BAP_shrunk->getData(color); - - for (iter_type it = myLocalElements.begin(); it != myLocalElements.end(); ++it) - { - const local_ordinal_type i_local = *it; - const local_ordinal_type aux = BAP->getRowMap()->getLocalElement (i_local); - - std::vector BAP_inds; - std::vector BAP_vals; - - local_ordinal_type aux2; - - if( (mypid-1)%3==color && (mypid-1)>= 0 && (mypid-1)getGlobalNumCols() ) - aux2 = BAP->getColMap()->getLocalElement (mypid-1); - else if( (mypid-2)%3==color && (mypid-2)>= 0 && (mypid-2)getGlobalNumCols() ) - aux2 = BAP->getColMap()->getLocalElement (mypid-2); - else if( (mypid)%3==color && (mypid)>= 0 && (mypid)getGlobalNumCols() ) - aux2 = BAP->getColMap()->getLocalElement (mypid); - - if(aux2>=0) - { - BAP_inds.emplace_back(aux2); - BAP_vals.emplace_back(localBAP[aux]); - BAP->insertLocalValues(aux, BAP_inds, BAP_vals); - } + + for (iter_type it = myLocalElements.begin(); it != myLocalElements.end(); ++it) { + const local_ordinal_type i_local = *it; + const local_ordinal_type aux = BAP->getRowMap()->getLocalElement(i_local); + + std::vector BAP_inds; + std::vector BAP_vals; + + local_ordinal_type aux2; + + if ((mypid - 1) % 3 == color && (mypid - 1) >= 0 && (mypid - 1) < tpetra_prolong->getGlobalNumCols()) + aux2 = BAP->getColMap()->getLocalElement(mypid - 1); + else if ((mypid - 2) % 3 == color && (mypid - 2) >= 0 && (mypid - 2) < tpetra_prolong->getGlobalNumCols()) + aux2 = BAP->getColMap()->getLocalElement(mypid - 2); + else if ((mypid) % 3 == color && (mypid) >= 0 && (mypid) < tpetra_prolong->getGlobalNumCols()) + aux2 = BAP->getColMap()->getLocalElement(mypid); + + if (aux2 >= 0) { + BAP_inds.emplace_back(aux2); + BAP_vals.emplace_back(localBAP[aux]); + BAP->insertLocalValues(aux, BAP_inds, BAP_vals); + } } - - } - BAP->fillComplete( tpetra_prolong->getDomainMap(), tpetra_prolong->getRangeMap() ); + } + BAP->fillComplete(tpetra_prolong->getDomainMap(), tpetra_prolong->getRangeMap()); - //============================================================================================================= - RCP Pbar = Tpetra::MatrixMatrix::add(1.0, false, *tpetra_prolong, -1.0, false, *BAP); - mueluPbar = MueLu::TpetraCrs_To_XpetraMatrix(Pbar); - Tpetra::MatrixMarket::Writer::writeSparseFile("Pbar.mtx", Pbar); //Auxiliary prints introduced to generate pictures -} + //============================================================================================================= + RCP Pbar = Tpetra::MatrixMatrix::add(1.0, false, *tpetra_prolong, -1.0, false, *BAP); + mueluPbar = MueLu::TpetraCrs_To_XpetraMatrix(Pbar); + Tpetra::MatrixMarket::Writer::writeSparseFile("Pbar.mtx", Pbar); // Auxiliary prints introduced to generate pictures + } H->GetLevel(1)->Set("Pbar", mueluPbar); H->IsPreconditioner(true); M = rcp(new muelu_tpetra_operator_type(H)); - //Intermediate print before zeroing out the global timers (needed to split set up timing and solve timing) + // Intermediate print before zeroing out the global timers (needed to split set up timing and solve timing) Teuchos::TimeMonitor::summarize(); Teuchos::TimeMonitor::zeroOutTimers(); //====================================================================================================================== // Linear Solver - - RCP X_muelu = rcp(new multivector_type(map,1)); - RCP B = rcp(new multivector_type(map,1)); + + RCP X_muelu = rcp(new multivector_type(map, 1)); + RCP B = rcp(new multivector_type(map, 1)); RCP Problem_muelu; - X_muelu->putScalar((scalar_type) 0.0); + X_muelu->putScalar((scalar_type)0.0); B->randomize(); Problem_muelu = rcp(new linear_problem_type(A, X_muelu, B)); RCP belosList = rcp(new Teuchos::ParameterList()); - belosList->set("Maximum Iterations", maxIts); // Maximum number of iterations allowed - belosList->set("Convergence Tolerance", tol); // Relative convergence tolerance requested - //belosList->set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); - belosList->set("Verbosity", Belos::Errors); - belosList->set("Output Frequency", 1); - belosList->set("Output Style", Belos::Brief); + belosList->set("Maximum Iterations", maxIts); // Maximum number of iterations allowed + belosList->set("Convergence Tolerance", tol); // Relative convergence tolerance requested + // belosList->set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); + belosList->set("Verbosity", Belos::Errors); + belosList->set("Output Frequency", 1); + belosList->set("Output Style", Belos::Brief); belosList->set("Implicit Residual Scaling", "None"); RCP solver; if (krylovSolverType == "cg") @@ -421,36 +407,34 @@ for(int trial = 1; trial<=number_runs; ++trial) else throw std::invalid_argument("bad Krylov solver type"); -for(int trial = 1; trial<=number_runs; ++trial) -{ - - X_muelu->putScalar((scalar_type) 0.0); - B->randomize(); - - // - // Set up Krylov solver and iterate. - // - - Problem_muelu = rcp(new linear_problem_type(A, X_muelu, B)); - Problem_muelu->setRightPrec(M); - Problem_muelu->setProblem(); - - solver->setProblem(Problem_muelu); - solver->solve(); - int numIterations_muelu = solver->getNumIters(); - - Teuchos::Array::magnitudeType> normVec_muelu(1); - multivector_type residual_muelu(B->getMap(),1); - A->apply(*X_muelu, residual_muelu); - residual_muelu.update(1.0, *B, -1.0); - residual_muelu.norm2(normVec_muelu); - if (mypid == 0) { - std::cout << "number of iterations with MueLu preconditioner= " << numIterations_muelu << std::endl; - std::cout << "||Residual|| = " << normVec_muelu[0] << std::endl; - } -} + for (int trial = 1; trial <= number_runs; ++trial) { + X_muelu->putScalar((scalar_type)0.0); + B->randomize(); + + // + // Set up Krylov solver and iterate. + // + + Problem_muelu = rcp(new linear_problem_type(A, X_muelu, B)); + Problem_muelu->setRightPrec(M); + Problem_muelu->setProblem(); + + solver->setProblem(Problem_muelu); + solver->solve(); + int numIterations_muelu = solver->getNumIters(); + + Teuchos::Array::magnitudeType> normVec_muelu(1); + multivector_type residual_muelu(B->getMap(), 1); + A->apply(*X_muelu, residual_muelu); + residual_muelu.update(1.0, *B, -1.0); + residual_muelu.norm2(normVec_muelu); + if (mypid == 0) { + std::cout << "number of iterations with MueLu preconditioner= " << numIterations_muelu << std::endl; + std::cout << "||Residual|| = " << normVec_muelu[0] << std::endl; + } + } - Teuchos::TimeMonitor::summarize (); + Teuchos::TimeMonitor::summarize(); return EXIT_SUCCESS; } diff --git a/packages/muelu/research/max/AdditiveMG/Test.cpp b/packages/muelu/research/max/AdditiveMG/Test.cpp index ae74f6890fd7..4bbd26304e8b 100644 --- a/packages/muelu/research/max/AdditiveMG/Test.cpp +++ b/packages/muelu/research/max/AdditiveMG/Test.cpp @@ -5,13 +5,11 @@ #include #include - -//Galeri +// Galeri #include "Galeri_Maps.h" #include "Galeri_CrsMatrices.h" #include "Galeri_Utils.h" - // MueLu #include "MueLu.hpp" #include @@ -26,8 +24,6 @@ #include #include - - #ifdef HAVE_MPI #include "Epetra_MpiComm.h" #include "mpi.h" @@ -46,8 +42,7 @@ using namespace Galeri; // main driver // // =========== // -int main(int argc, char* argv[]) -{ +int main(int argc, char* argv[]) { #ifdef HAVE_MPI MPI_Init(&argc, &argv); Epetra_MpiComm Comm(MPI_COMM_WORLD); @@ -60,9 +55,9 @@ int main(int argc, char* argv[]) // Matrix * LHS = RHS // // with Matrix arising from a 5-point formula discretization. - - Epetra_Map* Map = 0; - Epetra_RowMatrix* Matrix = 0; + + Epetra_Map* Map = 0; + Epetra_RowMatrix* Matrix = 0; Teuchos::ParameterList GaleriList; // dimension of the problem is nx x ny @@ -72,12 +67,13 @@ int main(int argc, char* argv[]) GaleriList.set("mx", Comm.NumProc()); GaleriList.set("my", 1); - try - { - Map = CreateMap("Cartesian2D", Comm, GaleriList); + try { + Map = CreateMap("Cartesian2D", Comm, GaleriList); Matrix = CreateCrsMatrix("Laplace2D", Map, GaleriList); - Epetra_Vector ExactSolution(*Map); ExactSolution.Random(); - Epetra_Vector LHS(*Map); LHS.PutScalar(0.0); + Epetra_Vector ExactSolution(*Map); + ExactSolution.Random(); + Epetra_Vector LHS(*Map); + LHS.PutScalar(0.0); Epetra_Vector RHS(*Map); Matrix->Multiply(false, ExactSolution, RHS); @@ -88,7 +84,7 @@ int main(int argc, char* argv[]) // used, for example AztecOO, Amesos. IFPACK and ML can be used to define a // preconditioner for Matrix. Here we use a simple solver, based on // LAPACK, that is meant for simple testing only. - + Solve(Problem); // and we compute the norm of the true residual. @@ -99,9 +95,7 @@ int main(int argc, char* argv[]) delete Map; delete Matrix; - } - catch (Galeri::Exception& rhs) - { + } catch (Galeri::Exception& rhs) { if (Comm.MyPID() == 0) rhs.Print(); exit(EXIT_FAILURE); @@ -111,5 +105,5 @@ int main(int argc, char* argv[]) MPI_Finalize(); #endif - return(EXIT_SUCCESS); + return (EXIT_SUCCESS); } diff --git a/packages/muelu/research/max/AdditiveMG/coloring.hpp b/packages/muelu/research/max/AdditiveMG/coloring.hpp index 831e57754f14..2c8246d3c7c3 100644 --- a/packages/muelu/research/max/AdditiveMG/coloring.hpp +++ b/packages/muelu/research/max/AdditiveMG/coloring.hpp @@ -1,93 +1,87 @@ #include +int coloring2D(int mypid, int ndx) { + // INPUT: mypid is the MPI id of the processor + // INPUT: ndx is the nuber of domains along x-direction + // INPUT: ndy is the nuber of domains along y-direction + // OUTPUT: color label associated with the current MPI processor (=subdomain) -int coloring2D(int mypid, int ndx) -{ + // detect the row of the domain grid where the current subdomain is located + int grid_row = std::ceil(static_cast(mypid) / ndx); - //INPUT: mypid is the MPI id of the processor - //INPUT: ndx is the nuber of domains along x-direction - //INPUT: ndy is the nuber of domains along y-direction - //OUTPUT: color label associated with the current MPI processor (=subdomain) - - //detect the row of the domain grid where the current subdomain is located - int grid_row = std::ceil(static_cast(mypid)/ndx); - - //detect the y-coordinate in the domain grid associated with the current subdomain + // detect the y-coordinate in the domain grid associated with the current subdomain int ypos = grid_row % 3; int xpos; int color = -1; - //detect the x-coordinate in the domain grid associated with current subdomain - if( 0 != mypid%ndx ) - xpos = static_cast( mypid - std::floor(static_cast(mypid)/ndx)*ndx )%3; + // detect the x-coordinate in the domain grid associated with current subdomain + if (0 != mypid % ndx) + xpos = static_cast(mypid - std::floor(static_cast(mypid) / ndx) * ndx) % 3; else - xpos = (mypid - ((ypos - 1)*ndx))%3; - - //use x and y coordinates to determine the color of the current subdomain - if ( xpos>0 && ypos>0 ) - color = ( ypos-1 ) * 3 + xpos; - else if ( xpos>0 && ypos==0 ) - color = 6 + xpos; - else if ( xpos==0 && ypos>0 ) - color = ypos * 3; + xpos = (mypid - ((ypos - 1) * ndx)) % 3; + + // use x and y coordinates to determine the color of the current subdomain + if (xpos > 0 && ypos > 0) + color = (ypos - 1) * 3 + xpos; + else if (xpos > 0 && ypos == 0) + color = 6 + xpos; + else if (xpos == 0 && ypos > 0) + color = ypos * 3; else - color = 9; + color = 9; - //TEUCHOS_TEST_FOR_EXCEPT( color < 1 ); - return color-1; + // TEUCHOS_TEST_FOR_EXCEPT( color < 1 ); + return color - 1; } +int coloring3D(int mypid, int ndx, int ndy) { + // INPUT: mypid is the MPI id of the processor + // INPUT: ndx is the nuber of domains along x-direction + // INPUT: ndy is the nuber of domains along y-direction + // OUTPUT: color label associated with the current MPI processor (=subdomain) -int coloring3D(int mypid, int ndx, int ndy) -{ - - //INPUT: mypid is the MPI id of the processor - //INPUT: ndx is the nuber of domains along x-direction - //INPUT: ndy is the nuber of domains along y-direction - //OUTPUT: color label associated with the current MPI processor (=subdomain) - - //detect the plane of the domain grid where the current subdomain resides - int grid_plane = std::ceil( static_cast(mypid)/(ndx*ndy) ); + // detect the plane of the domain grid where the current subdomain resides + int grid_plane = std::ceil(static_cast(mypid) / (ndx * ndy)); - //On the given plane, find the local id of the current subdomain - int plane_id = mypid % ( ndx*ndy ); + // On the given plane, find the local id of the current subdomain + int plane_id = mypid % (ndx * ndy); - //detect the row on the current two-dimensional grid where the current subdomain is located - int plane_row = std::ceil(static_cast(plane_id)/ndx); + // detect the row on the current two-dimensional grid where the current subdomain is located + int plane_row = std::ceil(static_cast(plane_id) / ndx); - //detect the y-coordinate on the given plane associated with the current subdomain + // detect the y-coordinate on the given plane associated with the current subdomain int ypos = plane_row % 3; int xpos; int label_plane; int color = -1; - //detect the x-coordinate on the given plane associated with the current subdomain - if( 0 != plane_id%ndx ) - xpos = static_cast( mypid - std::floor(static_cast(plane_id)/ndx)*ndx ) % 3; + // detect the x-coordinate on the given plane associated with the current subdomain + if (0 != plane_id % ndx) + xpos = static_cast(mypid - std::floor(static_cast(plane_id) / ndx) * ndx) % 3; else - xpos = (plane_id - ((ypos - 1)*ndx))%3; - - //find a two-dimensional coloring to give to the current subdomain according to its position - //in the plane where it resides - if ( xpos>0 && ypos>0 ) - label_plane = ( ypos-1 ) * 3 + xpos; - else if ( xpos>0 && ypos==0 ) - label_plane = 6 + xpos; - else if ( xpos==0 && ypos>0 ) - label_plane = ypos * 3; + xpos = (plane_id - ((ypos - 1) * ndx)) % 3; + + // find a two-dimensional coloring to give to the current subdomain according to its position + // in the plane where it resides + if (xpos > 0 && ypos > 0) + label_plane = (ypos - 1) * 3 + xpos; + else if (xpos > 0 && ypos == 0) + label_plane = 6 + xpos; + else if (xpos == 0 && ypos > 0) + label_plane = ypos * 3; else - label_plane = 9; + label_plane = 9; - //use the two-dimensional coloring to determine the three-dimensional coloring - if ( grid_plane%3 ==1 ) + // use the two-dimensional coloring to determine the three-dimensional coloring + if (grid_plane % 3 == 1) color = label_plane; - else if ( grid_plane%3 == 2 ) + else if (grid_plane % 3 == 2) color = 9 + label_plane; - else if ( grid_plane%3 == 0 ) + else if (grid_plane % 3 == 0) color = 18 + label_plane; - //TEUCHOS_TEST_FOR_EXCEPT( color < 1 ); - return color-1; + // TEUCHOS_TEST_FOR_EXCEPT( color < 1 ); + return color - 1; } diff --git a/packages/muelu/research/max/AdditiveMG/neighbours.hpp b/packages/muelu/research/max/AdditiveMG/neighbours.hpp index 21b0180d34e6..154351934f85 100644 --- a/packages/muelu/research/max/AdditiveMG/neighbours.hpp +++ b/packages/muelu/research/max/AdditiveMG/neighbours.hpp @@ -3,195 +3,172 @@ #include "coloring.hpp" - -typedef Tpetra::CrsMatrix tpetra_matrix_type; -typedef Tpetra::MultiVector::magnitudeType,int,int,KokkosClassic::DefaultNode::DefaultNodeType> tpetra_multivector_type; -typedef typename Teuchos::ArrayView::const_iterator iterator_type; +typedef Tpetra::CrsMatrix tpetra_matrix_type; +typedef Tpetra::MultiVector::magnitudeType, int, int, KokkosClassic::DefaultNode::DefaultNodeType> tpetra_multivector_type; +typedef typename Teuchos::ArrayView::const_iterator iterator_type; typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType node_type2; -void neighbours1D(Teuchos::RCP tpetra_prolong, std::vector& neighbours, Teuchos::RCP< const Teuchos::Comm > comm) -{ - - //INPUT: tpetra_prolong is the tentative prolongator, here it is just used to get the Total number of aggregates given by the nubmer of columns - //INPUT: neighoubrs is a vector where the neoghbours of the current MPI processor (=subdomain) are stored - //INPUT: comm = MPI communicator (MPI_COMM_WORLD) - - neighbours.clear(); - - int mypid = comm->getRank(); +void neighbours1D(Teuchos::RCP tpetra_prolong, std::vector& neighbours, Teuchos::RCP > comm) { + // INPUT: tpetra_prolong is the tentative prolongator, here it is just used to get the Total number of aggregates given by the nubmer of columns + // INPUT: neighoubrs is a vector where the neoghbours of the current MPI processor (=subdomain) are stored + // INPUT: comm = MPI communicator (MPI_COMM_WORLD) - for(int color = 0; color<3; ++color) - { - - if( (mypid-1)%3==color && (mypid-1)>= 0 && (mypid-1)getGlobalNumCols() ) - neighbours.emplace_back(mypid-1); - else if( (mypid-2)%3==color && (mypid-2)>= 0 && (mypid-2)getGlobalNumCols() ) - neighbours.emplace_back(mypid-2); - else if( (mypid)%3==color && (mypid)>= 0 && (mypid)getGlobalNumCols() ) - neighbours.emplace_back(mypid); + neighbours.clear(); - } + int mypid = comm->getRank(); - neighbours.shrink_to_fit(); + for (int color = 0; color < 3; ++color) { + if ((mypid - 1) % 3 == color && (mypid - 1) >= 0 && (mypid - 1) < tpetra_prolong->getGlobalNumCols()) + neighbours.emplace_back(mypid - 1); + else if ((mypid - 2) % 3 == color && (mypid - 2) >= 0 && (mypid - 2) < tpetra_prolong->getGlobalNumCols()) + neighbours.emplace_back(mypid - 2); + else if ((mypid) % 3 == color && (mypid) >= 0 && (mypid) < tpetra_prolong->getGlobalNumCols()) + neighbours.emplace_back(mypid); + } + neighbours.shrink_to_fit(); } +void neighbours2D(Teuchos::RCP tpetra_prolong, std::vector& neighbours, Teuchos::RCP > comm, int ndx) { + // INPUT: tpetra_prolong is the tentative prolongator, here it is just used to get the Total number of aggregates given by the nubmer of columns + // INPUT: neighoubrs is a vector where the neoghbours of the current MPI processor (=subdomain) are stored + // INPUT: comm = MPI communicator (MPI_COMM_WORLD) + // INPUT: ndx is the number of domains along the x-direction -void neighbours2D(Teuchos::RCP tpetra_prolong, std::vector& neighbours, Teuchos::RCP< const Teuchos::Comm > comm, int ndx) -{ - - //INPUT: tpetra_prolong is the tentative prolongator, here it is just used to get the Total number of aggregates given by the nubmer of columns - //INPUT: neighoubrs is a vector where the neoghbours of the current MPI processor (=subdomain) are stored - //INPUT: comm = MPI communicator (MPI_COMM_WORLD) - //INPUT: ndx is the number of domains along the x-direction - - neighbours.clear(); - int mypid = comm->getRank(); - int brick_id = mypid; - int shifted_id = brick_id-1; - - if(mypid>0) - { - for(int color = 0; color<9; ++color) - { - int neighbour = -1; - - //The following if statements control the neighbours of a subdomain in a 2D brick partitioned mesh - //Each subdomains is incorporated in a 3x3 square which is sliced into 3 stripes - //The neighbours of a subdomain are checked plane by plane: in total there are three planes to span - // In case the subdomains sit on a boundary, there are missing neighbours for a specific color - // (this is what the last two conditions of each if statement take care of) - if( coloring2D( brick_id, ndx )==color && (shifted_id)>= 0 && (shifted_id)getGlobalNumCols() ) - neighbour = (shifted_id); + neighbours.clear(); + int mypid = comm->getRank(); + int brick_id = mypid; + int shifted_id = brick_id - 1; - else if( coloring2D( brick_id-1, ndx )==color && (shifted_id-1)>= 0 && (shifted_id-1)getGlobalNumCols() ) - neighbour = (shifted_id-1); + if (mypid > 0) { + for (int color = 0; color < 9; ++color) { + int neighbour = -1; - else if( coloring2D( brick_id+1, ndx )==color && (shifted_id+1)>= 0 && (shifted_id+1)getGlobalNumCols() ) - neighbour = (shifted_id+1); + // The following if statements control the neighbours of a subdomain in a 2D brick partitioned mesh + // Each subdomains is incorporated in a 3x3 square which is sliced into 3 stripes + // The neighbours of a subdomain are checked plane by plane: in total there are three planes to span + // In case the subdomains sit on a boundary, there are missing neighbours for a specific color + // (this is what the last two conditions of each if statement take care of) + if (coloring2D(brick_id, ndx) == color && (shifted_id) >= 0 && (shifted_id) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id); - else if( coloring2D( brick_id-ndx, ndx )==color && (shifted_id-ndx)>= 0 && (shifted_id-ndx)getGlobalNumCols() ) - neighbour = (shifted_id-ndx); + else if (coloring2D(brick_id - 1, ndx) == color && (shifted_id - 1) >= 0 && (shifted_id - 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - 1); - else if( coloring2D( brick_id-ndx-1, ndx )==color && (shifted_id-ndx-1)>= 0 && (shifted_id-ndx-1)getGlobalNumCols() ) - neighbour = (shifted_id-ndx-1); + else if (coloring2D(brick_id + 1, ndx) == color && (shifted_id + 1) >= 0 && (shifted_id + 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + 1); - else if( coloring2D( brick_id-ndx+1, ndx )==color && (shifted_id-ndx+1)>= 0 && (shifted_id-ndx+1)getGlobalNumCols() ) - neighbour = (shifted_id-ndx+1); + else if (coloring2D(brick_id - ndx, ndx) == color && (shifted_id - ndx) >= 0 && (shifted_id - ndx) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx); - else if( coloring2D( brick_id+ndx, ndx )==color && (shifted_id+ndx)>= 0 && (shifted_id+ndx)getGlobalNumCols() ) - neighbour = (shifted_id+ndx); + else if (coloring2D(brick_id - ndx - 1, ndx) == color && (shifted_id - ndx - 1) >= 0 && (shifted_id - ndx - 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx - 1); - else if( coloring2D( brick_id+ndx-1, ndx )==color && (shifted_id+ndx-1)>= 0 && (shifted_id+ndx-1)getGlobalNumCols() ) - neighbour = (shifted_id+ndx-1); + else if (coloring2D(brick_id - ndx + 1, ndx) == color && (shifted_id - ndx + 1) >= 0 && (shifted_id - ndx + 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx + 1); - else if( coloring2D( brick_id+ndx+1, ndx )==color && (shifted_id+ndx+1)>= 0 && (shifted_id+ndx+1)getGlobalNumCols() ) - neighbour = (shifted_id+ndx+1); + else if (coloring2D(brick_id + ndx, ndx) == color && (shifted_id + ndx) >= 0 && (shifted_id + ndx) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx); - if(neighbour>=0) - neighbours.emplace_back(neighbour); + else if (coloring2D(brick_id + ndx - 1, ndx) == color && (shifted_id + ndx - 1) >= 0 && (shifted_id + ndx - 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx - 1); - } - } + else if (coloring2D(brick_id + ndx + 1, ndx) == color && (shifted_id + ndx + 1) >= 0 && (shifted_id + ndx + 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx + 1); - neighbours.shrink_to_fit(); + if (neighbour >= 0) + neighbours.emplace_back(neighbour); + } + } + neighbours.shrink_to_fit(); } - -void neighbours3D(Teuchos::RCP tpetra_prolong, std::vector& neighbours, Teuchos::RCP< const Teuchos::Comm > comm, int ndx, int ndy) -{ - - //INPUT: tpetra_prolong is the tentative prolongator, here it is just used to get the Total number of aggregates given by the nubmer of columns - //INPUT: neighoubrs is a vector where the neoghbours of the current MPI processor (=subdomain) are stored - //INPUT: comm = MPI communicator (MPI_COMM_WORLD) - //INPUT: ndx is the number of domains along the x-direction - //INPUT: ndy is the nubmer of domains along the y-direction +void neighbours3D(Teuchos::RCP tpetra_prolong, std::vector& neighbours, Teuchos::RCP > comm, int ndx, int ndy) { + // INPUT: tpetra_prolong is the tentative prolongator, here it is just used to get the Total number of aggregates given by the nubmer of columns + // INPUT: neighoubrs is a vector where the neoghbours of the current MPI processor (=subdomain) are stored + // INPUT: comm = MPI communicator (MPI_COMM_WORLD) + // INPUT: ndx is the number of domains along the x-direction + // INPUT: ndy is the nubmer of domains along the y-direction neighbours.clear(); - int mypid = comm->getRank(); - int brick_id = mypid; + int mypid = comm->getRank(); + int brick_id = mypid; int shifted_id = brick_id - 1; - if(mypid>0) - { - for(int color = 0; color<27; ++color) - { - int neighbour = -1; - - //The following if statements control the neighbours of a subdomain in a 3D brick partitioned mesh - //Each subdomains is incorporated in a 3x3x3 cube which is sliced into 3 squares living on three different planes - //The neighbours of a subdomain are checked plane by plane: in total there are three planes to span - // In case the subdomains sit on a boundary, there are missing neighbours for a specific color - // (this is what the last two conditions of each if statement take care of) - //Identification of neighbours that are on the current plane - if( coloring3D( brick_id, ndx, ndy )==color && (shifted_id)>= 0 && (shifted_id)getGlobalNumCols() ) - neighbour = (shifted_id); - else if( coloring3D( brick_id-1, ndx, ndy )==color && (shifted_id-1)>= 0 && (shifted_id-1)getGlobalNumCols() ) - neighbour = (shifted_id-1); - else if( coloring3D( brick_id+1, ndx, ndy )==color && (shifted_id+1)>= 0 && (shifted_id+1)getGlobalNumCols() ) - neighbour = (shifted_id+1); - else if( coloring3D( brick_id-ndx, ndx, ndy )==color && (shifted_id-ndx)>= 0 && (shifted_id-ndx)getGlobalNumCols() ) - neighbour = (shifted_id-ndx); - else if( coloring3D( brick_id-ndx-1, ndx, ndy )==color && (shifted_id-ndx-1)>= 0 && (shifted_id-ndx-1)getGlobalNumCols() ) - neighbour = (shifted_id-ndx-1); - else if( coloring3D( brick_id-ndx+1, ndx, ndy )==color && (shifted_id-ndx+1)>= 0 && (shifted_id-ndx+1)getGlobalNumCols() ) - neighbour = (shifted_id-ndx+1); - else if( coloring3D( brick_id+ndx, ndx, ndy )==color && (shifted_id+ndx)>= 0 && (shifted_id+ndx)getGlobalNumCols() ) - neighbour = (shifted_id+ndx); - else if( coloring3D( brick_id+ndx-1, ndx, ndy )==color && (shifted_id+ndx-1)>= 0 && (shifted_id+ndx-1)getGlobalNumCols() ) - neighbour = (shifted_id+ndx-1); - else if( coloring3D( brick_id+ndx+1, ndx, ndy )==color && (shifted_id+ndx+1)>= 0 && (shifted_id+ndx+1)getGlobalNumCols() ) - neighbour = (shifted_id+ndx+1); - - //Identification of the neighbours that are on the plane below - else if( coloring3D( brick_id-ndx*ndy, ndx, ndy )==color && (shifted_id-ndx*ndy)>= 0 && (shifted_id-ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id-ndx*ndy); - else if( coloring3D( brick_id-1-ndx*ndy, ndx, ndy )==color && (shifted_id-1-ndx*ndy)>= 0 && (shifted_id-1-ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id-1-ndx*ndy); - else if( coloring3D( brick_id+1-ndx*ndy, ndx, ndy )==color && (shifted_id+1-ndx*ndy)>= 0 && (shifted_id+1-ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id+1-ndx*ndy); - else if( coloring3D( brick_id-ndx-ndx*ndy, ndx, ndy )==color && (shifted_id-ndx-ndx*ndy)>= 0 && (shifted_id-ndx-ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id-ndx-ndx*ndy); - else if( coloring3D( brick_id-ndx-1-ndx*ndy, ndx, ndy )==color && (shifted_id-ndx-1-ndx*ndy)>= 0 && (shifted_id-ndx-1-ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id-ndx-1-ndx*ndy); - else if( coloring3D( brick_id-ndx+1-ndx*ndy, ndx, ndy )==color && (shifted_id-ndx+1-ndx*ndy)>= 0 && (shifted_id-ndx+1-ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id-ndx+1-ndx*ndy); - else if( coloring3D( brick_id+ndx-ndx*ndy, ndx, ndy )==color && (shifted_id+ndx-ndx*ndy)>= 0 && (shifted_id+ndx-ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id+ndx-ndx*ndy); - else if( coloring3D( brick_id+ndx-1-ndx*ndy, ndx, ndy )==color && (shifted_id+ndx-1-ndx*ndy)>= 0 && (shifted_id+ndx-1-ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id+ndx-1-ndx*ndy); - else if( coloring3D( brick_id+ndx+1-ndx*ndy, ndx, ndy )==color && (shifted_id+ndx+1-ndx*ndy)>= 0 && (shifted_id+ndx+1-ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id+ndx+1-ndx*ndy); - - //Identification of the neighbours that are on the plane above - else if( coloring3D( brick_id+ndx*ndy, ndx, ndy )==color && (shifted_id+ndx*ndy)>= 0 && (shifted_id+ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id+ndx*ndy); - else if( coloring3D( brick_id-1+ndx*ndy, ndx, ndy )==color && (shifted_id-1+ndx*ndy)>= 0 && (shifted_id-1+ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id-1+ndx*ndy); - else if( coloring3D( brick_id+1+ndx*ndy, ndx, ndy )==color && (shifted_id+1+ndx*ndy)>= 0 && (shifted_id+1+ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id+1+ndx*ndy); - else if( coloring3D( brick_id-ndx+ndx*ndy, ndx, ndy )==color && (shifted_id-ndx+ndx*ndy)>= 0 && (shifted_id-ndx+ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id-ndx+ndx*ndy); - else if( coloring3D( brick_id-ndx-1+ndx*ndy, ndx, ndy )==color && (shifted_id-ndx-1+ndx*ndy)>= 0 && (shifted_id-ndx-1+ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id-ndx-1+ndx*ndy); - else if( coloring3D( brick_id-ndx+1+ndx*ndy, ndx, ndy )==color && (shifted_id-ndx+1+ndx*ndy)>= 0 && (shifted_id-ndx+1+ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id-ndx+1+ndx*ndy); - else if( coloring3D( brick_id+ndx+ndx*ndy, ndx, ndy )==color && (shifted_id+ndx+ndx*ndy)>= 0 && (shifted_id+ndx+ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id+ndx+ndx*ndy); - else if( coloring3D( brick_id+ndx-1+ndx*ndy, ndx, ndy )==color && (shifted_id+ndx-1+ndx*ndy)>= 0 && (shifted_id+ndx-1+ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id+ndx-1+ndx*ndy); - else if( coloring3D( brick_id+ndx+1+ndx*ndy, ndx, ndy )==color && (shifted_id+ndx+1+ndx*ndy)>= 0 && (shifted_id+ndx+1+ndx*ndy)getGlobalNumCols() ) - neighbour = (shifted_id+ndx+1+ndx*ndy); - - if(neighbour>=0) - neighbours.emplace_back(neighbour); - - } - } - - neighbours.shrink_to_fit(); - + if (mypid > 0) { + for (int color = 0; color < 27; ++color) { + int neighbour = -1; + + // The following if statements control the neighbours of a subdomain in a 3D brick partitioned mesh + // Each subdomains is incorporated in a 3x3x3 cube which is sliced into 3 squares living on three different planes + // The neighbours of a subdomain are checked plane by plane: in total there are three planes to span + // In case the subdomains sit on a boundary, there are missing neighbours for a specific color + // (this is what the last two conditions of each if statement take care of) + // Identification of neighbours that are on the current plane + if (coloring3D(brick_id, ndx, ndy) == color && (shifted_id) >= 0 && (shifted_id) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id); + else if (coloring3D(brick_id - 1, ndx, ndy) == color && (shifted_id - 1) >= 0 && (shifted_id - 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - 1); + else if (coloring3D(brick_id + 1, ndx, ndy) == color && (shifted_id + 1) >= 0 && (shifted_id + 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + 1); + else if (coloring3D(brick_id - ndx, ndx, ndy) == color && (shifted_id - ndx) >= 0 && (shifted_id - ndx) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx); + else if (coloring3D(brick_id - ndx - 1, ndx, ndy) == color && (shifted_id - ndx - 1) >= 0 && (shifted_id - ndx - 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx - 1); + else if (coloring3D(brick_id - ndx + 1, ndx, ndy) == color && (shifted_id - ndx + 1) >= 0 && (shifted_id - ndx + 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx + 1); + else if (coloring3D(brick_id + ndx, ndx, ndy) == color && (shifted_id + ndx) >= 0 && (shifted_id + ndx) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx); + else if (coloring3D(brick_id + ndx - 1, ndx, ndy) == color && (shifted_id + ndx - 1) >= 0 && (shifted_id + ndx - 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx - 1); + else if (coloring3D(brick_id + ndx + 1, ndx, ndy) == color && (shifted_id + ndx + 1) >= 0 && (shifted_id + ndx + 1) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx + 1); + + // Identification of the neighbours that are on the plane below + else if (coloring3D(brick_id - ndx * ndy, ndx, ndy) == color && (shifted_id - ndx * ndy) >= 0 && (shifted_id - ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx * ndy); + else if (coloring3D(brick_id - 1 - ndx * ndy, ndx, ndy) == color && (shifted_id - 1 - ndx * ndy) >= 0 && (shifted_id - 1 - ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - 1 - ndx * ndy); + else if (coloring3D(brick_id + 1 - ndx * ndy, ndx, ndy) == color && (shifted_id + 1 - ndx * ndy) >= 0 && (shifted_id + 1 - ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + 1 - ndx * ndy); + else if (coloring3D(brick_id - ndx - ndx * ndy, ndx, ndy) == color && (shifted_id - ndx - ndx * ndy) >= 0 && (shifted_id - ndx - ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx - ndx * ndy); + else if (coloring3D(brick_id - ndx - 1 - ndx * ndy, ndx, ndy) == color && (shifted_id - ndx - 1 - ndx * ndy) >= 0 && (shifted_id - ndx - 1 - ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx - 1 - ndx * ndy); + else if (coloring3D(brick_id - ndx + 1 - ndx * ndy, ndx, ndy) == color && (shifted_id - ndx + 1 - ndx * ndy) >= 0 && (shifted_id - ndx + 1 - ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx + 1 - ndx * ndy); + else if (coloring3D(brick_id + ndx - ndx * ndy, ndx, ndy) == color && (shifted_id + ndx - ndx * ndy) >= 0 && (shifted_id + ndx - ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx - ndx * ndy); + else if (coloring3D(brick_id + ndx - 1 - ndx * ndy, ndx, ndy) == color && (shifted_id + ndx - 1 - ndx * ndy) >= 0 && (shifted_id + ndx - 1 - ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx - 1 - ndx * ndy); + else if (coloring3D(brick_id + ndx + 1 - ndx * ndy, ndx, ndy) == color && (shifted_id + ndx + 1 - ndx * ndy) >= 0 && (shifted_id + ndx + 1 - ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx + 1 - ndx * ndy); + + // Identification of the neighbours that are on the plane above + else if (coloring3D(brick_id + ndx * ndy, ndx, ndy) == color && (shifted_id + ndx * ndy) >= 0 && (shifted_id + ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx * ndy); + else if (coloring3D(brick_id - 1 + ndx * ndy, ndx, ndy) == color && (shifted_id - 1 + ndx * ndy) >= 0 && (shifted_id - 1 + ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - 1 + ndx * ndy); + else if (coloring3D(brick_id + 1 + ndx * ndy, ndx, ndy) == color && (shifted_id + 1 + ndx * ndy) >= 0 && (shifted_id + 1 + ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + 1 + ndx * ndy); + else if (coloring3D(brick_id - ndx + ndx * ndy, ndx, ndy) == color && (shifted_id - ndx + ndx * ndy) >= 0 && (shifted_id - ndx + ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx + ndx * ndy); + else if (coloring3D(brick_id - ndx - 1 + ndx * ndy, ndx, ndy) == color && (shifted_id - ndx - 1 + ndx * ndy) >= 0 && (shifted_id - ndx - 1 + ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx - 1 + ndx * ndy); + else if (coloring3D(brick_id - ndx + 1 + ndx * ndy, ndx, ndy) == color && (shifted_id - ndx + 1 + ndx * ndy) >= 0 && (shifted_id - ndx + 1 + ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id - ndx + 1 + ndx * ndy); + else if (coloring3D(brick_id + ndx + ndx * ndy, ndx, ndy) == color && (shifted_id + ndx + ndx * ndy) >= 0 && (shifted_id + ndx + ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx + ndx * ndy); + else if (coloring3D(brick_id + ndx - 1 + ndx * ndy, ndx, ndy) == color && (shifted_id + ndx - 1 + ndx * ndy) >= 0 && (shifted_id + ndx - 1 + ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx - 1 + ndx * ndy); + else if (coloring3D(brick_id + ndx + 1 + ndx * ndy, ndx, ndy) == color && (shifted_id + ndx + 1 + ndx * ndy) >= 0 && (shifted_id + ndx + 1 + ndx * ndy) < tpetra_prolong->getGlobalNumCols()) + neighbour = (shifted_id + ndx + 1 + ndx * ndy); + + if (neighbour >= 0) + neighbours.emplace_back(neighbour); + } + } + + neighbours.shrink_to_fit(); } - - diff --git a/packages/muelu/research/max/AdditiveMG/tentative.cpp b/packages/muelu/research/max/AdditiveMG/tentative.cpp index b7834756e02a..a5e759379a97 100644 --- a/packages/muelu/research/max/AdditiveMG/tentative.cpp +++ b/packages/muelu/research/max/AdditiveMG/tentative.cpp @@ -77,7 +77,7 @@ extern clock_t Timers_Max[4]; #include #include -//ADR subdirectory +// ADR subdirectory #include "CreateADRMatrix.hpp" #include #include @@ -88,24 +88,23 @@ extern clock_t Timers_Max[4]; #include "BAP.hpp" #include "CreateBrickMap.hpp" -int main(int argc, char *argv[]) { - +int main(int argc, char* argv[]) { // Define default types - typedef double scalar_type; - typedef int local_ordinal_type; - typedef int global_ordinal_type; + typedef double scalar_type; + typedef int local_ordinal_type; + typedef int global_ordinal_type; typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType node_type; // Convenient typedef's - typedef Tpetra::Operator operator_type; - typedef Tpetra::CrsMatrix crs_matrix_type; - typedef Tpetra::RowMatrix row_matrix_type; - typedef Tpetra::Vector vector_type; - typedef Tpetra::MultiVector multivector_type; - typedef Tpetra::Map driver_map_type; + typedef Tpetra::Operator operator_type; + typedef Tpetra::CrsMatrix crs_matrix_type; + typedef Tpetra::RowMatrix row_matrix_type; + typedef Tpetra::Vector vector_type; + typedef Tpetra::MultiVector multivector_type; + typedef Tpetra::Map driver_map_type; typedef MueLu::TpetraOperator muelu_tpetra_operator_type; - typedef MueLu::Utilities MueLuUtilities; + typedef MueLu::Utilities MueLuUtilities; typedef Belos::LinearProblem linear_problem_type; typedef Belos::SolverManager belos_solver_manager_type; @@ -113,50 +112,50 @@ int main(int argc, char *argv[]) { typedef Belos::BlockGmresSolMgr belos_gmres_manager_type; typedef Belos::BiCGStabSolMgr belos_bicgstab_manager_type; - typedef Ifpack2::Preconditioner precond_type; + typedef Ifpack2::Preconditioner precond_type; - //MueLu_UseShortNames.hpp wants these typedefs. - typedef scalar_type Scalar; - typedef local_ordinal_type LocalOrdinal; + // MueLu_UseShortNames.hpp wants these typedefs. + typedef scalar_type Scalar; + typedef local_ordinal_type LocalOrdinal; typedef global_ordinal_type GlobalOrdinal; - typedef node_type Node; -# include + typedef node_type Node; +#include - typedef Galeri::Xpetra::Problem GaleriXpetraProblem; - typedef ADR::Xpetra::Problem ADRXpetraProblem; + typedef Galeri::Xpetra::Problem GaleriXpetraProblem; + typedef ADR::Xpetra::Problem ADRXpetraProblem; - using Teuchos::RCP; // reference count pointers - using Teuchos::rcp; // reference count pointers + using Teuchos::RCP; // reference count pointers + using Teuchos::rcp; // reference count pointers // // MPI initialization using Teuchos // Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL); - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); - int mypid = comm->getRank(); -/* - int subCommRank[3]={0,1,2}; - Teuchos::ArrayView arraySubCommRank(subCommRank, 3); - auto subComm = comm->createSubcommunicator(arraySubCommRank); -*/ + RCP> comm = Teuchos::DefaultComm::getComm(); + int mypid = comm->getRank(); + /* + int subCommRank[3]={0,1,2}; + Teuchos::ArrayView arraySubCommRank(subCommRank, 3); + auto subComm = comm->createSubcommunicator(arraySubCommRank); + */ Teuchos::CommandLineProcessor clp(false); - global_ordinal_type maxIts = 10000; - scalar_type tol = 1e-10; - std::string solverOptionsFile = "final_parser.xml"; - std::string krylovSolverType = "bicgstab"; + global_ordinal_type maxIts = 10000; + scalar_type tol = 1e-10; + std::string solverOptionsFile = "final_parser.xml"; + std::string krylovSolverType = "bicgstab"; - clp.setOption("xmlFile", &solverOptionsFile, "XML file containing MueLu solver parameters"); - clp.setOption("maxits", &maxIts, "maximum number of Krylov iterations"); - clp.setOption("tol", &tol, "tolerance for Krylov solver"); - clp.setOption("krylovType", &krylovSolverType, "cg or gmres solver"); + clp.setOption("xmlFile", &solverOptionsFile, "XML file containing MueLu solver parameters"); + clp.setOption("maxits", &maxIts, "maximum number of Krylov iterations"); + clp.setOption("tol", &tol, "tolerance for Krylov solver"); + clp.setOption("krylovType", &krylovSolverType, "cg or gmres solver"); switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } Teuchos::ParameterList xmlParams; @@ -167,7 +166,7 @@ int main(int argc, char *argv[]) { problemParams = xmlParams.sublist(static_cast("Problem")); // Problem definition - std::string problem_type = problemParams.get(static_cast("problem type")); + std::string problem_type = problemParams.get(static_cast("problem type")); // Parameters @@ -182,14 +181,13 @@ int main(int argc, char *argv[]) { MueLu::DomainPartitioning domain; int keep_boundary = 0; - Scalar stretchx = (Scalar) Lx/nx; - Scalar stretchy = (Scalar) Ly/ny; - Scalar stretchz = (Scalar) Lz/nz; + Scalar stretchx = (Scalar)Lx / nx; + Scalar stretchy = (Scalar)Ly / ny; + Scalar stretchz = (Scalar)Lz / nz; + ADR::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, problem_type, keep_boundary, stretchx, stretchy, stretchz); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra - ADR::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, problem_type, keep_boundary , stretchx, stretchy, stretchz); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra - if (xpetraParameters.GetLib() == Xpetra::UseEpetra) { throw std::invalid_argument("This example only supports Tpetra."); } @@ -202,226 +200,206 @@ int main(int argc, char *argv[]) { RCP xpetraMap; std::vector ind; - //BRICK SIZE + // BRICK SIZE int brick_sizex = mueluParams.get(static_cast("aggregation: brick x size")); int brick_sizey = mueluParams.get(static_cast("aggregation: brick y size")); int brick_sizez = mueluParams.get(static_cast("aggregation: brick z size")); - //Creation of the map where processor 0 gets nothing at the fine level - if( comm->getSize()>1 ) - { - if(problem_type == "ADR1D") - createBrickMap1D(matrixParameters.GetNumGlobalElements(), ind, comm ); + // Creation of the map where processor 0 gets nothing at the fine level + if (comm->getSize() > 1) { + if (problem_type == "ADR1D") + createBrickMap1D(matrixParameters.GetNumGlobalElements(), ind, comm); - else if(problem_type == "ADR2D") - createBrickMap2D( nx, brick_sizex, brick_sizey, ind, comm ); + else if (problem_type == "ADR2D") + createBrickMap2D(nx, brick_sizex, brick_sizey, ind, comm); - else if(problem_type == "ADR3D") - createBrickMap3D( nx, ny, brick_sizex, brick_sizey, brick_sizez, ind, comm ); + else if (problem_type == "ADR3D") + createBrickMap3D(nx, ny, brick_sizex, brick_sizey, brick_sizez, ind, comm); - ind.shrink_to_fit(); + ind.shrink_to_fit(); - Teuchos::ArrayView elementList (ind); - xpetraMap = MapFactory::Build(Xpetra::UseTpetra,matrixParameters.GetNumGlobalElements(), elementList, indexBase, comm); - } - else if( comm->getSize()==1 ) - { - xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), indexBase, comm); + Teuchos::ArrayView elementList(ind); + xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), elementList, indexBase, comm); + } else if (comm->getSize() == 1) { + xpetraMap = MapFactory::Build(Xpetra::UseTpetra, matrixParameters.GetNumGlobalElements(), indexBase, comm); } RCP coordinates; - if (problem_type == "ADR1D") - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", xpetraMap, matrixParameters.GetParameterList()); - else if (problem_type == "ADR2D") - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", xpetraMap, matrixParameters.GetParameterList()); - else if (problem_type == "ADR3D") - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", xpetraMap, matrixParameters.GetParameterList()); - - RCP Pr = ADR::Xpetra::BuildProblem(matrixParameters.GetMatrixType(), xpetraMap, matrixParameters.GetParameterList()); - RCP xpetraA = Pr->BuildMatrix(); - - RCP A = MueLuUtilities::Op2NonConstTpetraCrs(xpetraA); - RCP map = MueLuUtilities::Map2TpetraMap(*xpetraMap); - - // =================================================== - // Domain Decomposition Preconditioner - // =================================== - - //Creation of the MueLu list for the DD preconditioner - RCP dd_list = rcp(new Teuchos::ParameterList()); - dd_list->setName("MueLu"); - dd_list->set("verbosity", "low"); - dd_list->set("number of equations", 1); - dd_list->set("max levels", 1); - dd_list->set("coarse: type", "SCHWARZ"); //FOR A ONE LEVEL PRECONDITIONER THE COARSE LEVEL IS INTERPRETED AS SMOOTHING LEVEL - - Teuchos::ParameterList& dd_smooth_sublist = dd_list->sublist("coarse: params"); - dd_smooth_sublist.set("schwarz: overlap level", 0); - dd_smooth_sublist.set("schwarz: combine mode", "Zero"); - dd_smooth_sublist.set("subdomain solver name", "RILUK"); - - Teuchos::ParameterList& coarse_subdomain_solver = dd_smooth_sublist.sublist("subdomain solver parameters"); - coarse_subdomain_solver.set("fact: iluk level-of-fill", 3); - coarse_subdomain_solver.set("fact: absolute thresh/old", 0.); - coarse_subdomain_solver.set("fact: relative threshold", 1.); - coarse_subdomain_solver.set("fact: relax value", 0.); - - RCP B_DD = MueLu::CreateTpetraPreconditioner( (RCP)A, *dd_list ); - - // Construct a multigrid preconditioner - RCP M; - - //Manual set up of the prolongation and restriction - MueLu::ParameterListInterpreter mueLuFactory( mueluParams ); - RCP > H = mueLuFactory.CreateHierarchy(); - H->setVerbLevel(Teuchos::VERB_HIGH); - - H->GetLevel(0)->Set("A", xpetraA); - H->GetLevel(0)->Set("Coordinates", coordinates); - - // Multigrid setup phase - mueLuFactory.SetupHierarchy(*H); - - RCP L = H->GetLevel(1); - - RCP> prolong, restr; - - if (L->IsAvailable("P")) - prolong = L->template Get< RCP> >("P"); - - if (L->IsAvailable("R")) - restr = L->template Get< RCP> >("R"); - - - RCP tpetra_prolong = MueLuUtilities::Op2NonConstTpetraCrs(prolong); - RCP tpetra_restr = MueLuUtilities::Op2NonConstTpetraCrs(restr); - - #include - RCP PbarSetUp = Teuchos::TimeMonitor::getNewCounter("Pbar: SetUp"); - PbarSetUp->start(); - RCP mueluPbar; - - // We have to transform P into a condensed multivector - RCP identity_shrunk = rcp( new multivector_type(tpetra_prolong->getDomainMap(), std::pow(3, coordinates->getNumVectors()) ) ); - Teuchos::ArrayView myIdentityGlobalElements = tpetra_prolong->getDomainMap()->getLocalElementList(); - typedef typename Teuchos::ArrayView::const_iterator iter_type; - - int number_runs = 1; - -for(int trial = 1; trial<=number_runs; ++trial) -{ - if(1 == coordinates->getNumVectors()) - { - - for(int j = 0; j<3; ++j) - { - int color = j; - - Teuchos::ArrayRCP localMV = identity_shrunk->getDataNonConst(color); - - for (iter_type it = myIdentityGlobalElements.begin(); it != myIdentityGlobalElements.end(); ++it) { - const local_ordinal_type i_local = *it; - const local_ordinal_type aux = identity_shrunk->getMap()->getLocalElement (i_local); - int local_color = (i_local)%3; - - if( local_color == color ) - localMV[aux] = 1.0; - - } - } - } - else if(2 == coordinates->getNumVectors()) - { - - for(int j = 0; j<9; ++j) - { - int color = j; - - Teuchos::ArrayRCP localMV = identity_shrunk->getDataNonConst(color); - - for (iter_type it = myIdentityGlobalElements.begin(); it != myIdentityGlobalElements.end(); ++it) { - const local_ordinal_type i_local = *it; - const local_ordinal_type aux = identity_shrunk->getMap()->getLocalElement (i_local); - const local_ordinal_type local_color = coloring2D( i_local+1, std::sqrt( tpetra_prolong->getGlobalNumCols() ) ); - - if( local_color == color ) - { - localMV[aux] = 1.0; - //std::cout<<"ID: "<getNumVectors()) - { - //const local_ordinal_type local_color = coloring3D( mypid, std::cbrt( tpetra_prolong->getGlobalNumCols() ), std::cbrt( tpetra_prolong->getGlobalNumCols() ) ); - - for(int j = 0; j<27; ++j) - { - int color = j; - - Teuchos::ArrayRCP localMV = identity_shrunk->getDataNonConst(color); - - for (iter_type it = myIdentityGlobalElements.begin(); it != myIdentityGlobalElements.end(); ++it) { - const local_ordinal_type i_local = *it; - const local_ordinal_type aux = identity_shrunk->getMap()->getLocalElement (i_local); - const local_ordinal_type local_color = coloring3D( i_local+1, std::cbrt( tpetra_prolong->getGlobalNumCols() ), std::cbrt( tpetra_prolong->getGlobalNumCols() ) ); - - if( local_color == color ) - { - //std::cout<<"i_local: "< P_shrunk = rcp( new multivector_type( tpetra_prolong->getRangeMap(), std::pow(3, coordinates->getNumVectors()) ) ); - tpetra_prolong->apply(*identity_shrunk, *P_shrunk); - RCP AP_shrunk = rcp( new multivector_type( A->getRangeMap(), std::pow(3, coordinates->getNumVectors()) ) ); - A->apply(*P_shrunk, *AP_shrunk); - - //======================================================================================================== - - // CREATION OF BAP - - RCP BAP_shrunk = rcp(new multivector_type(B_DD->getRangeMap(),AP_shrunk->getNumVectors())); - B_DD->apply(*AP_shrunk, *BAP_shrunk, Teuchos::NO_TRANS, Teuchos::ScalarTraits< scalar_type >::one(), Teuchos::ScalarTraits< scalar_type >::zero()); - - //Columns Map for BAP - std::vector indBAPcolMap; - if(1 == coordinates->getNumVectors()) - neighbours1D( tpetra_prolong, indBAPcolMap, comm ); - else if(2 == coordinates->getNumVectors()) - neighbours2D( tpetra_prolong, indBAPcolMap, comm, std::sqrt(tpetra_prolong->getGlobalNumCols()) ); - else if(3 == coordinates->getNumVectors()) - neighbours3D( tpetra_prolong, indBAPcolMap, comm, std::cbrt(tpetra_prolong->getGlobalNumCols()), std::cbrt(tpetra_prolong->getGlobalNumCols()) ); - - Teuchos::ArrayView elementListBAP (indBAPcolMap); - Teuchos::RCP< Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > BAPcolMap = rcp( new Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type >( static_cast(tpetra_prolong->getGlobalNumCols()), elementListBAP, indexBase, comm ) ); - - RCP BAP = rcp( new crs_matrix_type( tpetra_prolong->getRowMap(), BAPcolMap, tpetra_prolong->getGlobalNumCols() ) ); - Teuchos::ArrayView myLocalElements = BAP->getRowMap()->getLocalElementList(); + if (problem_type == "ADR1D") + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", xpetraMap, matrixParameters.GetParameterList()); + else if (problem_type == "ADR2D") + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", xpetraMap, matrixParameters.GetParameterList()); + else if (problem_type == "ADR3D") + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", xpetraMap, matrixParameters.GetParameterList()); + + RCP Pr = ADR::Xpetra::BuildProblem(matrixParameters.GetMatrixType(), xpetraMap, matrixParameters.GetParameterList()); + RCP xpetraA = Pr->BuildMatrix(); + + RCP A = MueLuUtilities::Op2NonConstTpetraCrs(xpetraA); + RCP map = MueLuUtilities::Map2TpetraMap(*xpetraMap); + + // =================================================== + // Domain Decomposition Preconditioner + // =================================== + + // Creation of the MueLu list for the DD preconditioner + RCP dd_list = rcp(new Teuchos::ParameterList()); + dd_list->setName("MueLu"); + dd_list->set("verbosity", "low"); + dd_list->set("number of equations", 1); + dd_list->set("max levels", 1); + dd_list->set("coarse: type", "SCHWARZ"); // FOR A ONE LEVEL PRECONDITIONER THE COARSE LEVEL IS INTERPRETED AS SMOOTHING LEVEL + + Teuchos::ParameterList& dd_smooth_sublist = dd_list->sublist("coarse: params"); + dd_smooth_sublist.set("schwarz: overlap level", 0); + dd_smooth_sublist.set("schwarz: combine mode", "Zero"); + dd_smooth_sublist.set("subdomain solver name", "RILUK"); + + Teuchos::ParameterList& coarse_subdomain_solver = dd_smooth_sublist.sublist("subdomain solver parameters"); + coarse_subdomain_solver.set("fact: iluk level-of-fill", 3); + coarse_subdomain_solver.set("fact: absolute thresh/old", 0.); + coarse_subdomain_solver.set("fact: relative threshold", 1.); + coarse_subdomain_solver.set("fact: relax value", 0.); + + RCP B_DD = MueLu::CreateTpetraPreconditioner((RCP)A, *dd_list); + + // Construct a multigrid preconditioner + RCP M; + + // Manual set up of the prolongation and restriction + MueLu::ParameterListInterpreter mueLuFactory(mueluParams); + RCP> H = mueLuFactory.CreateHierarchy(); + H->setVerbLevel(Teuchos::VERB_HIGH); + + H->GetLevel(0)->Set("A", xpetraA); + H->GetLevel(0)->Set("Coordinates", coordinates); + + // Multigrid setup phase + mueLuFactory.SetupHierarchy(*H); + + RCP L = H->GetLevel(1); + + RCP> prolong, restr; + + if (L->IsAvailable("P")) + prolong = L->template Get>>("P"); + + if (L->IsAvailable("R")) + restr = L->template Get>>("R"); + + RCP tpetra_prolong = MueLuUtilities::Op2NonConstTpetraCrs(prolong); + RCP tpetra_restr = MueLuUtilities::Op2NonConstTpetraCrs(restr); + +#include + RCP PbarSetUp = Teuchos::TimeMonitor::getNewCounter("Pbar: SetUp"); + PbarSetUp->start(); + RCP mueluPbar; + + // We have to transform P into a condensed multivector + RCP identity_shrunk = rcp(new multivector_type(tpetra_prolong->getDomainMap(), std::pow(3, coordinates->getNumVectors()))); + Teuchos::ArrayView myIdentityGlobalElements = tpetra_prolong->getDomainMap()->getLocalElementList(); + typedef typename Teuchos::ArrayView::const_iterator iter_type; + + int number_runs = 1; + + for (int trial = 1; trial <= number_runs; ++trial) { + if (1 == coordinates->getNumVectors()) { + for (int j = 0; j < 3; ++j) { + int color = j; + + Teuchos::ArrayRCP localMV = identity_shrunk->getDataNonConst(color); + + for (iter_type it = myIdentityGlobalElements.begin(); it != myIdentityGlobalElements.end(); ++it) { + const local_ordinal_type i_local = *it; + const local_ordinal_type aux = identity_shrunk->getMap()->getLocalElement(i_local); + int local_color = (i_local) % 3; + + if (local_color == color) + localMV[aux] = 1.0; + } + } + } else if (2 == coordinates->getNumVectors()) { + for (int j = 0; j < 9; ++j) { + int color = j; + + Teuchos::ArrayRCP localMV = identity_shrunk->getDataNonConst(color); + + for (iter_type it = myIdentityGlobalElements.begin(); it != myIdentityGlobalElements.end(); ++it) { + const local_ordinal_type i_local = *it; + const local_ordinal_type aux = identity_shrunk->getMap()->getLocalElement(i_local); + const local_ordinal_type local_color = coloring2D(i_local + 1, std::sqrt(tpetra_prolong->getGlobalNumCols())); + + if (local_color == color) { + localMV[aux] = 1.0; + // std::cout<<"ID: "<getNumVectors()) { + // const local_ordinal_type local_color = coloring3D( mypid, std::cbrt( tpetra_prolong->getGlobalNumCols() ), std::cbrt( tpetra_prolong->getGlobalNumCols() ) ); + + for (int j = 0; j < 27; ++j) { + int color = j; + + Teuchos::ArrayRCP localMV = identity_shrunk->getDataNonConst(color); + + for (iter_type it = myIdentityGlobalElements.begin(); it != myIdentityGlobalElements.end(); ++it) { + const local_ordinal_type i_local = *it; + const local_ordinal_type aux = identity_shrunk->getMap()->getLocalElement(i_local); + const local_ordinal_type local_color = coloring3D(i_local + 1, std::cbrt(tpetra_prolong->getGlobalNumCols()), std::cbrt(tpetra_prolong->getGlobalNumCols())); + + if (local_color == color) { + // std::cout<<"i_local: "< P_shrunk = rcp(new multivector_type(tpetra_prolong->getRangeMap(), std::pow(3, coordinates->getNumVectors()))); + tpetra_prolong->apply(*identity_shrunk, *P_shrunk); + RCP AP_shrunk = rcp(new multivector_type(A->getRangeMap(), std::pow(3, coordinates->getNumVectors()))); + A->apply(*P_shrunk, *AP_shrunk); + + //======================================================================================================== + + // CREATION OF BAP + RCP BAP_shrunk = rcp(new multivector_type(B_DD->getRangeMap(), AP_shrunk->getNumVectors())); + B_DD->apply(*AP_shrunk, *BAP_shrunk, Teuchos::NO_TRANS, Teuchos::ScalarTraits::one(), Teuchos::ScalarTraits::zero()); - if(1 == coordinates->getNumVectors()) + // Columns Map for BAP + std::vector indBAPcolMap; + if (1 == coordinates->getNumVectors()) + neighbours1D(tpetra_prolong, indBAPcolMap, comm); + else if (2 == coordinates->getNumVectors()) + neighbours2D(tpetra_prolong, indBAPcolMap, comm, std::sqrt(tpetra_prolong->getGlobalNumCols())); + else if (3 == coordinates->getNumVectors()) + neighbours3D(tpetra_prolong, indBAPcolMap, comm, std::cbrt(tpetra_prolong->getGlobalNumCols()), std::cbrt(tpetra_prolong->getGlobalNumCols())); + + Teuchos::ArrayView elementListBAP(indBAPcolMap); + Teuchos::RCP> BAPcolMap = rcp(new Tpetra::Map(static_cast(tpetra_prolong->getGlobalNumCols()), elementListBAP, indexBase, comm)); + + RCP BAP = rcp(new crs_matrix_type(tpetra_prolong->getRowMap(), BAPcolMap, tpetra_prolong->getGlobalNumCols())); + Teuchos::ArrayView myLocalElements = BAP->getRowMap()->getLocalElementList(); + + if (1 == coordinates->getNumVectors()) BAP1D(BAP, tpetra_prolong, BAP_shrunk, comm); - else if(2 == coordinates->getNumVectors()) - BAP2D( BAP, tpetra_prolong, BAP_shrunk, comm, std::sqrt(tpetra_prolong->getGlobalNumCols()) ); - else if(3 == coordinates->getNumVectors()) - BAP3D( BAP, tpetra_prolong, BAP_shrunk, comm, std::cbrt(tpetra_prolong->getGlobalNumCols()), std::cbrt(tpetra_prolong->getGlobalNumCols()) ); - - BAP->fillComplete( tpetra_prolong->getDomainMap(), tpetra_prolong->getRangeMap() ); - - //Tpetra::MatrixMarket::Writer::writeSparseFile("BAP.mtx", BAP); -//============================================================================================================= - RCP Pbar = Tpetra::MatrixMatrix::add(1.0, false, *tpetra_prolong, -1.0, false, *BAP); - mueluPbar = MueLu::TpetraCrs_To_XpetraMatrix(Pbar); -} -PbarSetUp->stop(); + else if (2 == coordinates->getNumVectors()) + BAP2D(BAP, tpetra_prolong, BAP_shrunk, comm, std::sqrt(tpetra_prolong->getGlobalNumCols())); + else if (3 == coordinates->getNumVectors()) + BAP3D(BAP, tpetra_prolong, BAP_shrunk, comm, std::cbrt(tpetra_prolong->getGlobalNumCols()), std::cbrt(tpetra_prolong->getGlobalNumCols())); + + BAP->fillComplete(tpetra_prolong->getDomainMap(), tpetra_prolong->getRangeMap()); + + // Tpetra::MatrixMarket::Writer::writeSparseFile("BAP.mtx", BAP); + //============================================================================================================= + RCP Pbar = Tpetra::MatrixMatrix::add(1.0, false, *tpetra_prolong, -1.0, false, *BAP); + mueluPbar = MueLu::TpetraCrs_To_XpetraMatrix(Pbar); + } + PbarSetUp->stop(); H->GetLevel(1)->Set("Pbar", mueluPbar); @@ -429,70 +407,68 @@ PbarSetUp->stop(); M = rcp(new muelu_tpetra_operator_type(H)); - RCP X = rcp(new multivector_type(map,1)); - RCP B = rcp(new multivector_type(map,1)); - -for(int trial = 1; trial<=number_runs; ++trial) -{ - - X->putScalar((scalar_type) 0.0); - B->randomize(); - - // - // Set up Krylov solver and iterate. - // - - RCP X_muelu = rcp(new multivector_type(map,1)); - RCP Problem_muelu = rcp(new linear_problem_type(A, X_muelu, B)); - Problem_muelu->setRightPrec(M); - Problem_muelu->setProblem(); - - RCP belosList = rcp(new Teuchos::ParameterList()); - belosList->set("Maximum Iterations", maxIts); // Maximum number of iterations allowed - belosList->set("Convergence Tolerance", tol); // Relative convergence tolerance requested - //belosList->set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); - belosList->set("Verbosity", Belos::Errors); - belosList->set("Output Frequency", 1); - belosList->set("Output Style", Belos::Brief); - belosList->set("Implicit Residual Scaling", "None"); - RCP solver; - if (krylovSolverType == "cg") - solver = rcp(new belos_pseudocg_manager_type(Problem_muelu, belosList)); - else if (krylovSolverType == "gmres") - solver = rcp(new belos_gmres_manager_type(Problem_muelu, belosList)); - else if (krylovSolverType == "bicgstab") - solver = rcp(new belos_bicgstab_manager_type(Problem_muelu, belosList)); - else - throw std::invalid_argument("bad Krylov solver type"); - - solver->solve(); - int numIterations_muelu = solver->getNumIters(); - - Teuchos::Array::magnitudeType> normVec_muelu(1); - multivector_type residual_muelu(B->getMap(),1); - A->apply(*X_muelu, residual_muelu); - residual_muelu.update(1.0, *B, -1.0); - residual_muelu.norm2(normVec_muelu); - if (mypid == 0) { - std::cout << "number of iterations with MueLu preconditioner= " << numIterations_muelu << std::endl; - std::cout << "||Residual|| = " << normVec_muelu[0] << std::endl; - } -} + RCP X = rcp(new multivector_type(map, 1)); + RCP B = rcp(new multivector_type(map, 1)); + + for (int trial = 1; trial <= number_runs; ++trial) { + X->putScalar((scalar_type)0.0); + B->randomize(); + + // + // Set up Krylov solver and iterate. + // + + RCP X_muelu = rcp(new multivector_type(map, 1)); + RCP Problem_muelu = rcp(new linear_problem_type(A, X_muelu, B)); + Problem_muelu->setRightPrec(M); + Problem_muelu->setProblem(); + + RCP belosList = rcp(new Teuchos::ParameterList()); + belosList->set("Maximum Iterations", maxIts); // Maximum number of iterations allowed + belosList->set("Convergence Tolerance", tol); // Relative convergence tolerance requested + // belosList->set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); + belosList->set("Verbosity", Belos::Errors); + belosList->set("Output Frequency", 1); + belosList->set("Output Style", Belos::Brief); + belosList->set("Implicit Residual Scaling", "None"); + RCP solver; + if (krylovSolverType == "cg") + solver = rcp(new belos_pseudocg_manager_type(Problem_muelu, belosList)); + else if (krylovSolverType == "gmres") + solver = rcp(new belos_gmres_manager_type(Problem_muelu, belosList)); + else if (krylovSolverType == "bicgstab") + solver = rcp(new belos_bicgstab_manager_type(Problem_muelu, belosList)); + else + throw std::invalid_argument("bad Krylov solver type"); + + solver->solve(); + int numIterations_muelu = solver->getNumIters(); + + Teuchos::Array::magnitudeType> normVec_muelu(1); + multivector_type residual_muelu(B->getMap(), 1); + A->apply(*X_muelu, residual_muelu); + residual_muelu.update(1.0, *B, -1.0); + residual_muelu.norm2(normVec_muelu); + if (mypid == 0) { + std::cout << "number of iterations with MueLu preconditioner= " << numIterations_muelu << std::endl; + std::cout << "||Residual|| = " << normVec_muelu[0] << std::endl; + } + } - //Teuchos::TimeMonitor::summarize (); + // Teuchos::TimeMonitor::summarize (); - //Columns Map for BAP - std::vector indTimerMap; - if(0!=mypid) - indTimerMap.emplace_back(mypid-1); + // Columns Map for BAP + std::vector indTimerMap; + if (0 != mypid) + indTimerMap.emplace_back(mypid - 1); - Teuchos::ArrayView elementListTimer (indTimerMap); - Teuchos::RCP< Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > TimerMap = rcp( new Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type >( static_cast(comm->getSize()), elementListTimer, indexBase, comm ) ); + Teuchos::ArrayView elementListTimer(indTimerMap); + Teuchos::RCP> TimerMap = rcp(new Tpetra::Map(static_cast(comm->getSize()), elementListTimer, indexBase, comm)); - RCP TimerRestr = rcp(new multivector_type(TimerMap,1)); - RCP TimerProlong = rcp(new multivector_type(TimerMap,1)); - RCP TimerFine = rcp(new multivector_type(TimerMap,1)); - RCP TimerCoarse = rcp(new multivector_type(TimerMap,1)); + RCP TimerRestr = rcp(new multivector_type(TimerMap, 1)); + RCP TimerProlong = rcp(new multivector_type(TimerMap, 1)); + RCP TimerFine = rcp(new multivector_type(TimerMap, 1)); + RCP TimerCoarse = rcp(new multivector_type(TimerMap, 1)); return EXIT_SUCCESS; } diff --git a/packages/muelu/research/max/XpetraSplitting/Test_muelu.cpp b/packages/muelu/research/max/XpetraSplitting/Test_muelu.cpp index 271490c23681..c06dabe58572 100644 --- a/packages/muelu/research/max/XpetraSplitting/Test_muelu.cpp +++ b/packages/muelu/research/max/XpetraSplitting/Test_muelu.cpp @@ -40,53 +40,46 @@ #include #include - // =========== // // main driver // // =========== // -int main(int argc, char* argv[]) -{ - - - typedef double scalar_type; - typedef int local_ordinal_type; - typedef int global_ordinal_type; - typedef scalar_type Scalar; - typedef local_ordinal_type LocalOrdinal; - typedef global_ordinal_type GlobalOrdinal; +int main(int argc, char* argv[]) { + typedef double scalar_type; + typedef int local_ordinal_type; + typedef int global_ordinal_type; + typedef scalar_type Scalar; + typedef local_ordinal_type LocalOrdinal; + typedef global_ordinal_type GlobalOrdinal; typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType Node; - typedef Xpetra::Matrix Matrix; - typedef Xpetra::MatrixSplitting MatrixSplitting; + typedef Xpetra::Matrix Matrix; + typedef Xpetra::MatrixSplitting MatrixSplitting; typedef Xpetra::CrsMatrixWrap EpCrsMatrix; - - TEUCHOS_TEST_FOR_EXCEPT_MSG(argc<2, "\nInvalid name for input matrix\n"); + TEUCHOS_TEST_FOR_EXCEPT_MSG(argc < 2, "\nInvalid name for input matrix\n"); int numGlobalElements = 1; Teuchos::RCP > comm = Teuchos::DefaultComm::getComm(); - //Create Xpetra map - Teuchos::RCP > xpetraMap; - xpetraMap = Xpetra::MapFactory::Build(Xpetra::UseEpetra, numGlobalElements, 0, comm); + // Create Xpetra map + Teuchos::RCP > xpetraMap; + xpetraMap = Xpetra::MapFactory::Build(Xpetra::UseEpetra, numGlobalElements, 0, comm); - //Import matrix from an .mtx file into an Xpetra wrapper for an Epetra matrix - Teuchos::RCP > xpetraMatrix = Xpetra::IO::Read(argv[1], Xpetra::UseEpetra, comm); - //Export matrix from an Xpetra wrapper into an .mtx file - Xpetra::IO::Write("A_write.mtx", *xpetraMatrix); + // Import matrix from an .mtx file into an Xpetra wrapper for an Epetra matrix + Teuchos::RCP > xpetraMatrix = Xpetra::IO::Read(argv[1], Xpetra::UseEpetra, comm); + // Export matrix from an Xpetra wrapper into an .mtx file + Xpetra::IO::Write("A_write.mtx", *xpetraMatrix); Teuchos::RCP xpetraMatrixSplitting; Teuchos::ParameterList xmlParams; - Teuchos::RCP > Hierarchy = MueLu::CreateXpetraPreconditioner( (Teuchos::RCP)xpetraMatrixSplitting, xmlParams ); - + Teuchos::RCP > Hierarchy = MueLu::CreateXpetraPreconditioner((Teuchos::RCP)xpetraMatrixSplitting, xmlParams); #ifdef HAVE_MPI MPI_Finalize(); #endif - return(EXIT_SUCCESS); + return (EXIT_SUCCESS); } - diff --git a/packages/muelu/research/max/XpetraSplitting/Test_xpetra.cpp b/packages/muelu/research/max/XpetraSplitting/Test_xpetra.cpp index 287f2aed5646..e712344b5f95 100644 --- a/packages/muelu/research/max/XpetraSplitting/Test_xpetra.cpp +++ b/packages/muelu/research/max/XpetraSplitting/Test_xpetra.cpp @@ -36,25 +36,21 @@ #include "Epetra_CrsMatrix.h" #include "Epetra_LinearProblem.h" - // =========== // // main driver // // =========== // -int main(int argc, char* argv[]) -{ - - - typedef double scalar_type; - typedef int local_ordinal_type; - typedef int global_ordinal_type; - typedef scalar_type Scalar; - typedef local_ordinal_type LocalOrdinal; - typedef global_ordinal_type GlobalOrdinal; +int main(int argc, char* argv[]) { + typedef double scalar_type; + typedef int local_ordinal_type; + typedef int global_ordinal_type; + typedef scalar_type Scalar; + typedef local_ordinal_type LocalOrdinal; + typedef global_ordinal_type GlobalOrdinal; typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType Node; - typedef Xpetra::MultiVector multivector_type; - typedef Xpetra::MatrixSplitting tpetra_splitting; + typedef Xpetra::MultiVector multivector_type; + typedef Xpetra::MatrixSplitting tpetra_splitting; #ifdef HAVE_MPI MPI_Init(&argc, &argv); @@ -69,13 +65,13 @@ int main(int argc, char* argv[]) // // with Matrix arising from a 5-point formula discretization. - TEUCHOS_TEST_FOR_EXCEPT_MSG(argc<4, "\nInvalid name for input matrix and output file\n"); + TEUCHOS_TEST_FOR_EXCEPT_MSG(argc < 4, "\nInvalid name for input matrix and output file\n"); Teuchos::RCP out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); // process command line arguments - const char* xmlFileName = argv[1]; - const char* matrixFileName = argv[2]; + const char* xmlFileName = argv[1]; + const char* matrixFileName = argv[2]; const char* mappingFileName = argv[3]; Teuchos::ParameterList xmlParams; @@ -84,52 +80,49 @@ int main(int argc, char* argv[]) Teuchos::RCP > comm = Teuchos::DefaultComm::getComm(); if (CommEpetra.MyPID() == 0) - std::cout<<"Number of processors: "< driver("node.txt", comm); - Teuchos::Array elementlist = driver.GetGlobalRowMap(); - driver.printInactive(); - Xpetra::MatrixSplitting xpetraWrapper( argv[1], argv[2], comm ); - std::string output_file="A_write.mtx"; - xpetraWrapper.writeGlobalMatrix(); - xpetraWrapper.writeRegionMatrices();*/ - + Teuchos::Array elementlist = driver.GetGlobalRowMap(); + driver.printInactive(); + Xpetra::MatrixSplitting xpetraWrapper( argv[1], argv[2], comm ); + std::string output_file="A_write.mtx"; + xpetraWrapper.writeGlobalMatrix(); + xpetraWrapper.writeRegionMatrices();*/ - //Teuchos::RCP > A; - //A = Xpetra::IO::Read(argv[2], Xpetra::UseTpetra, comm); + // Teuchos::RCP > A; + // A = Xpetra::IO::Read(argv[2], Xpetra::UseTpetra, comm); // Create the RegionHandler to deal with mappings of nodes to regions etc. - Teuchos::RCP > regionHandler = Teuchos::rcp(new Xpetra::RegionHandler (mappingFileName, comm)); - Teuchos::Array elementlist = regionHandler->GetGlobalRowMap(); - std::size_t num_total_elements = regionHandler->GetNumGlobalElements(); - std::size_t num_total_regions = regionHandler->GetNumTotalRegions(); + Teuchos::RCP > regionHandler = Teuchos::rcp(new Xpetra::RegionHandler(mappingFileName, comm)); + Teuchos::Array elementlist = regionHandler->GetGlobalRowMap(); + std::size_t num_total_elements = regionHandler->GetNumGlobalElements(); + std::size_t num_total_regions = regionHandler->GetNumTotalRegions(); // Read and split the matrix Teuchos::RCP matrixSplitting = Teuchos::rcp(new tpetra_splitting(matrixFileName, regionHandler, comm)); // Create region-wise AMG hierarchy - int max_num_levels = 4; + int max_num_levels = 4; int coarsening_factor = 3; - Xpetra::RegionAMG preconditioner(matrixSplitting, regionHandler, comm, mueluParams, max_num_levels, coarsening_factor); - -// // Setup vectors for test problem -// Teuchos::RCP X = Xpetra::MultiVectorFactory< Scalar, LocalOrdinal, GlobalOrdinal, Node >::Build(preconditioner.getDomainMap(), 1) ; -// Teuchos::RCP Y = Xpetra::MultiVectorFactory< Scalar, LocalOrdinal, GlobalOrdinal, Node >::Build(preconditioner.getRangeMap(), 1) ; -// X->randomize(); -// Y->putScalar((scalar_type) 0.0); -// -// // Apply the preconditioner -// preconditioner.apply(*X,*Y); -// -// // Output result to screen -// Y->describe(*out, Teuchos::VERB_EXTREME); + Xpetra::RegionAMG preconditioner(matrixSplitting, regionHandler, comm, mueluParams, max_num_levels, coarsening_factor); + + // // Setup vectors for test problem + // Teuchos::RCP X = Xpetra::MultiVectorFactory< Scalar, LocalOrdinal, GlobalOrdinal, Node >::Build(preconditioner.getDomainMap(), 1) ; + // Teuchos::RCP Y = Xpetra::MultiVectorFactory< Scalar, LocalOrdinal, GlobalOrdinal, Node >::Build(preconditioner.getRangeMap(), 1) ; + // X->randomize(); + // Y->putScalar((scalar_type) 0.0); + // + // // Apply the preconditioner + // preconditioner.apply(*X,*Y); + // + // // Output result to screen + // Y->describe(*out, Teuchos::VERB_EXTREME); #ifdef HAVE_MPI MPI_Finalize(); #endif - return(EXIT_SUCCESS); + return (EXIT_SUCCESS); } - diff --git a/packages/muelu/research/max/XpetraSplitting/Xpetra_Level_decl.hpp b/packages/muelu/research/max/XpetraSplitting/Xpetra_Level_decl.hpp index 7207c1cae481..49e3b8b48c7b 100644 --- a/packages/muelu/research/max/XpetraSplitting/Xpetra_Level_decl.hpp +++ b/packages/muelu/research/max/XpetraSplitting/Xpetra_Level_decl.hpp @@ -52,119 +52,114 @@ #ifndef XPETRA_LEVEL_DECL_HPP #define XPETRA_LEVEL_DECL_HPP -namespace Xpetra{ +namespace Xpetra { - template::scalar_type, - class LocalOrdinal = typename MultiVector::local_ordinal_type, - class GlobalOrdinal = typename MultiVector::global_ordinal_type, - class Node = typename MultiVector::node_type> - class Level { +template ::scalar_type, + class LocalOrdinal = typename MultiVector::local_ordinal_type, + class GlobalOrdinal = typename MultiVector::global_ordinal_type, + class Node = typename MultiVector::node_type> +class Level { + typedef Map map_type; + typedef Vector vector_type; + typedef MultiVector multivector_type; + typedef Matrix matrix_type; - typedef Map map_type; - typedef Vector vector_type; - typedef MultiVector multivector_type; - typedef Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > matrix_type; + public: + //! Constructors/destructors + //@{ + // Constructor + Level(int, int); - public: + // Default destructor + virtual ~Level(){}; - //! Constructors/destructors - //@{ - // Constructor - Level( int, int ); + //@} - //Default destructor - virtual ~Level(){}; + //! Public methods + //@{ + virtual void + apply(const multivector_type& X, multivector_type& Y, + Teuchos::ETransp mode = Teuchos::NO_TRANS, + Scalar alpha = Teuchos::ScalarTraits::one(), + Scalar beta = Teuchos::ScalarTraits::zero()) const {}; - //@} + virtual bool hasTransposeApply() const { return false; } - //! Public methods - //@{ - virtual void - apply (const multivector_type& X, multivector_type& Y, - Teuchos::ETransp mode = Teuchos::NO_TRANS, - Scalar alpha = Teuchos::ScalarTraits::one(), - Scalar beta = Teuchos::ScalarTraits::zero())const{}; + //! Get methods + //@{ - virtual bool hasTransposeApply() const { return false; } + //! Extract the number of regions instantiated in the level + int GetNumRegions() const; - //! Get methods - //@{ + GlobalOrdinal GetNumRegionNodes(GlobalOrdinal region_idx) const { return regionA_[region_idx]->getCrsGraph()->getGlobalNumRows(); }; - //!Extract the number of regions instantiated in the level - int GetNumRegions() const; + RCP GetRegionMatrix(GlobalOrdinal region_idx) const { return regionA_[region_idx]; }; - GlobalOrdinal GetNumRegionNodes( GlobalOrdinal region_idx)const{return regionA_[region_idx]->getCrsGraph()->getGlobalNumRows();}; + //! Extract regionToAll from the level + Array > > GetRegionToAll() const; - RCP GetRegionMatrix( GlobalOrdinal region_idx)const{return regionA_[region_idx];}; + // Extract the level ID + GlobalOrdinal GetLevelID() const { return levelID_; } - //! Extract regionToAll from the level - Array > > GetRegionToAll() const; + //! Take a region index and returns the composite index for a given mesh node + GlobalOrdinal GetCompositeIndex(int, GlobalOrdinal) const; - // Extract the level ID - GlobalOrdinal GetLevelID() const {return levelID_;} + //@} - //! Take a region index and returns the composite index for a given mesh node - GlobalOrdinal GetCompositeIndex(int, GlobalOrdinal)const; + //@} - //@} - - //@} + //! Set up methods + //@{ + //! Set the region galerkin operators + void SetA(Array >&); - //! Set up methods - //@{ - //! Set the region galerkin operators - void SetA( Array >& ); + //! Set the region prolongators + void SetP(Array >&); - //! Set the region prolongators - void SetP( Array >& ); + //! Set the region restrioctions + void SetR(Array >&); - //! Set the region restrioctions - void SetR( Array >& ); + //! Set the region smoothers + void SetSmoother(Array >&); - //! Set the region smoothers - void SetSmoother( Array >& ); + //! Set the regionToAll structure for the current level by using information coming from the finer level + void SetRegionToAll(Array > >); - //! Set the regionToAll structure for the current level by using information coming from the finer level - void SetRegionToAll( Array > > ); + //@} - //@} - - //! Check method - //@{ - //! Control that algebraic quantities have matching dimensions - void checkConsistency() const; - //@} + //! Check method + //@{ + //! Control that algebraic quantities have matching dimensions + void checkConsistency() const; + //@} - //! Constrcution of region smoothers - //@{ - void ComputeRegionJacobi(); - //@} + //! Constrcution of region smoothers + //@{ + void ComputeRegionJacobi(); + //@} - private: + private: + //! Private variables + //@{ + GlobalOrdinal levelID_ = -1; + GlobalOrdinal num_regions_ = -1; - //! Private variables - //@{ - GlobalOrdinal levelID_ = -1; - GlobalOrdinal num_regions_ = -1; + //! Region operators + Array > regionA_; - //! Region operators - Array > regionA_; + //! Region grid transfers + Array > regionP_; + Array > regionR_; - //! Region grid transfers - Array > regionP_; - Array > regionR_; + //! Smoother + Array > regionSmoother_; - //! Smoother - Array > regionSmoother_; + //! Auxiliary quantities to handle regions at each coarsening level + Array > > level_regionToAll_; - //! Auxiliary quantities to handle regions at each coarsening level - Array< Array > > level_regionToAll_; + //@} +}; - //@} - - - }; - -} //namespace Xpetra +} // namespace Xpetra #endif diff --git a/packages/muelu/research/max/XpetraSplitting/Xpetra_Level_def.hpp b/packages/muelu/research/max/XpetraSplitting/Xpetra_Level_def.hpp index a4436045e78d..c263236c0ee2 100644 --- a/packages/muelu/research/max/XpetraSplitting/Xpetra_Level_def.hpp +++ b/packages/muelu/research/max/XpetraSplitting/Xpetra_Level_def.hpp @@ -54,134 +54,109 @@ #include "Xpetra_Level_decl.hpp" -namespace Xpetra -{ - -template< class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node > -Level::Level( int levelID, int num_regions ): -levelID_(levelID), -num_regions_(num_regions){} - -template< class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node > -void Level::SetP( Array >& P ) -{ - TEUCHOS_TEST_FOR_EXCEPTION( regionP_.size()!=0, Exceptions::RuntimeError, "Current level already has prolongators \n" ); - TEUCHOS_TEST_FOR_EXCEPTION( P.size()!=num_regions_, Exceptions::RuntimeError, "Number of region prolongators is "< +Level::Level(int levelID, int num_regions) + : levelID_(levelID) + , num_regions_(num_regions) {} + +template +void Level::SetP(Array >& P) { + TEUCHOS_TEST_FOR_EXCEPTION(regionP_.size() != 0, Exceptions::RuntimeError, "Current level already has prolongators \n"); + TEUCHOS_TEST_FOR_EXCEPTION(P.size() != num_regions_, Exceptions::RuntimeError, "Number of region prolongators is " << P.size() << "and does not math the number of regions which is " << num_regions_ << " \n"); regionP_ = P; } -template< class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node > -void Level::SetR( Array >& R ) -{ - TEUCHOS_TEST_FOR_EXCEPTION( regionR_.size()!=0, Exceptions::RuntimeError, "Current level already has restrictions \n" ); - TEUCHOS_TEST_FOR_EXCEPTION( R.size()!=num_regions_, Exceptions::RuntimeError, "Number of region restrictions is "< +void Level::SetR(Array >& R) { + TEUCHOS_TEST_FOR_EXCEPTION(regionR_.size() != 0, Exceptions::RuntimeError, "Current level already has restrictions \n"); + TEUCHOS_TEST_FOR_EXCEPTION(R.size() != num_regions_, Exceptions::RuntimeError, "Number of region restrictions is " << R.size() << "and does not math the number of regions which is " << num_regions_ << " \n"); regionR_ = R; } -template< class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node > -void Level::SetA( Array >& A ) -{ - TEUCHOS_TEST_FOR_EXCEPTION( regionA_.size()!=0, Exceptions::RuntimeError, "Current level already has operators \n" ); - TEUCHOS_TEST_FOR_EXCEPTION( A.size()!=num_regions_, Exceptions::RuntimeError, "Number of region operators is "< +void Level::SetA(Array >& A) { + TEUCHOS_TEST_FOR_EXCEPTION(regionA_.size() != 0, Exceptions::RuntimeError, "Current level already has operators \n"); + TEUCHOS_TEST_FOR_EXCEPTION(A.size() != num_regions_, Exceptions::RuntimeError, "Number of region operators is " << A.size() << "and does not math the number of regions which is " << num_regions_ << " \n"); regionA_ = A; } -template< class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node > -void Level::SetSmoother( Array >& S ) -{ - TEUCHOS_TEST_FOR_EXCEPTION( regionSmoother_.size()!=0, Exceptions::RuntimeError, "Current level already has smoothers \n" ); - TEUCHOS_TEST_FOR_EXCEPTION( S.size()!=num_regions_, Exceptions::RuntimeError, "Number of region smoothers is "< +void Level::SetSmoother(Array >& S) { + TEUCHOS_TEST_FOR_EXCEPTION(regionSmoother_.size() != 0, Exceptions::RuntimeError, "Current level already has smoothers \n"); + TEUCHOS_TEST_FOR_EXCEPTION(S.size() != num_regions_, Exceptions::RuntimeError, "Number of region smoothers is " << S.size() << "and does not math the number of regions which is " << num_regions_ << " \n"); regionSmoother_ = S; } - -template< class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node > -void Level::SetRegionToAll( Array > > regionToAll ) -{ - TEUCHOS_TEST_FOR_EXCEPTION( regionToAll.size()!=num_regions_, Exceptions::RuntimeError, "Passed regionToAll has number of regions equal to "< +void Level::SetRegionToAll(Array > > regionToAll) { + TEUCHOS_TEST_FOR_EXCEPTION(regionToAll.size() != num_regions_, Exceptions::RuntimeError, "Passed regionToAll has number of regions equal to " << regionToAll.size() << "which does not match the number of regions in level ID: <<" << levelID_ << " with number of regions equal to " << num_regions_ << "\n"); level_regionToAll_ = regionToAll; } - -template< class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node > -int Level::GetNumRegions( ) const -{ - TEUCHOS_TEST_FOR_EXCEPTION( num_regions_==0, Exceptions::RuntimeError, "level ID: <<"< +int Level::GetNumRegions() const { + TEUCHOS_TEST_FOR_EXCEPTION(num_regions_ == 0, Exceptions::RuntimeError, "level ID: <<" << levelID_ << " does NOT have defined regions \n"); return num_regions_; } -template< class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node > -Array > > Level::GetRegionToAll( ) const -{ - TEUCHOS_TEST_FOR_EXCEPTION( level_regionToAll_.size()==0, Exceptions::RuntimeError, "level ID: "< +Array > > Level::GetRegionToAll() const { + TEUCHOS_TEST_FOR_EXCEPTION(level_regionToAll_.size() == 0, Exceptions::RuntimeError, "level ID: " << levelID_ << " does NOT have level_regionToAll_ initialized yet \n"); return level_regionToAll_; } - - -template< class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node > -GlobalOrdinal Level::GetCompositeIndex(int region_idx, GlobalOrdinal region_node_idx ) const -{ - TEUCHOS_TEST_FOR_EXCEPTION( level_regionToAll_.size()==0, Exceptions::RuntimeError, "level ID: "<=num_regions_, Exceptions::RuntimeError, "level ID: "< +GlobalOrdinal Level::GetCompositeIndex(int region_idx, GlobalOrdinal region_node_idx) const { + TEUCHOS_TEST_FOR_EXCEPTION(level_regionToAll_.size() == 0, Exceptions::RuntimeError, "level ID: " << levelID_ << " does NOT have level_regionToAll_ initialized yet \n"); + TEUCHOS_TEST_FOR_EXCEPTION(level_regionToAll_.size() != num_regions_, Exceptions::RuntimeError, "level ID: " << levelID_ << " has information stored for a number of regions that does NOT match with the declared number of regions \n"); + TEUCHOS_TEST_FOR_EXCEPTION(region_idx >= num_regions_, Exceptions::RuntimeError, "level ID: " << levelID_ << " Invalid region index \n"); GlobalOrdinal composite_index = -1; checkerRegionToAll unaryPredicate(region_node_idx); - typename Array< std::tuple >::iterator global_iterator; - Array< std::tuple > regionToAll = level_regionToAll_[region_idx]; - global_iterator = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicate); - TEUCHOS_TEST_FOR_EXCEPTION( global_iterator==level_regionToAll_[region_idx].end(), Exceptions::RuntimeError, " - Region: "<( *global_iterator ); + typename Array >::iterator global_iterator; + Array > regionToAll = level_regionToAll_[region_idx]; + global_iterator = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicate); + TEUCHOS_TEST_FOR_EXCEPTION(global_iterator == level_regionToAll_[region_idx].end(), Exceptions::RuntimeError, " - Region: " << region_idx << " - " + << " node with region index: " << region_idx << " is not in regionToAll[" << region_idx << "]" + << "\n"); + composite_index = std::get<1>(*global_iterator); return composite_index; } +template +void Level::checkConsistency() const { + TEUCHOS_TEST_FOR_EXCEPTION(num_regions_ <= 0, Exceptions::RuntimeError, "level ID: " << levelID_ << " does not have any regions \n"); + TEUCHOS_TEST_FOR_EXCEPTION(num_regions_ != regionA_.size(), Exceptions::RuntimeError, "level ID: " << levelID_ << " - number of region matrices " << regionA_.size() << " does NOT match the number of regions " << num_regions_ << "\n"); + TEUCHOS_TEST_FOR_EXCEPTION(num_regions_ != level_regionToAll_.size(), Exceptions::RuntimeError, "level ID: " << levelID_ << " - size of level_regionToAll_ " << level_regionToAll_.size() << " does NOT match the number of regions " << num_regions_ << "\n"); -template< class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node > -void Level::checkConsistency( ) const -{ - TEUCHOS_TEST_FOR_EXCEPTION( num_regions_<=0, Exceptions::RuntimeError, "level ID: "<0 ) - { - TEUCHOS_TEST_FOR_EXCEPTION( num_regions_!=regionP_.size(), Exceptions::RuntimeError, "level ID: "< 0) { + TEUCHOS_TEST_FOR_EXCEPTION(num_regions_ != regionP_.size(), Exceptions::RuntimeError, "level ID: " << levelID_ << " - number of prolongators " << regionP_.size() << " does NOT match the number of regions " << num_regions_ << "\n"); + TEUCHOS_TEST_FOR_EXCEPTION(num_regions_ != regionR_.size(), Exceptions::RuntimeError, "level ID: " << levelID_ << " - number of restrictions " << regionR_.size() << " does NOT match the number of regions " << num_regions_ << "\n"); } - - for( int region_idx = 0; region_idx0 ) - { - TEUCHOS_TEST_FOR_EXCEPTION( regionP_[region_idx]->getGlobalNumCols()!=regionA_[region_idx]->getGlobalNumCols(), Exceptions::RuntimeError, "level ID: "<getGlobalNumRows()!=regionA_[region_idx]->getGlobalNumCols(), Exceptions::RuntimeError, "level ID: "< 0) { + TEUCHOS_TEST_FOR_EXCEPTION(regionP_[region_idx]->getGlobalNumCols() != regionA_[region_idx]->getGlobalNumCols(), Exceptions::RuntimeError, "level ID: " << levelID_ << " - Region: " << region_idx << " , numCols(P) = " << regionP_[region_idx]->getGlobalNumCols() << " BUT numCols(A) = " << regionA_[region_idx]->getGlobalNumCols() << "\n"); + TEUCHOS_TEST_FOR_EXCEPTION(regionR_[region_idx]->getGlobalNumRows() != regionA_[region_idx]->getGlobalNumCols(), Exceptions::RuntimeError, "level ID: " << levelID_ << " - Region: " << region_idx << " , numRows(R) = " << regionR_[region_idx]->getGlobalNumRows() << " BUT numCols(A) = " << regionA_[region_idx]->getGlobalNumCols() << "\n"); } - TEUCHOS_TEST_FOR_EXCEPTION( regionA_[region_idx]->getRowMap()->getLocalNumElements()>0 && level_regionToAll_[region_idx].size()!=regionA_[region_idx]->getGlobalNumRows(), Exceptions::RuntimeError, "Process ID: "<getRowMap()->getComm()->getRank()<<" - level ID: "<getRowMap()->getLocalNumElements() > 0 && level_regionToAll_[region_idx].size() != regionA_[region_idx]->getGlobalNumRows(), Exceptions::RuntimeError, "Process ID: " << regionA_[region_idx]->getRowMap()->getComm()->getRank() << " - level ID: " << levelID_ << " - Region: " << region_idx << " , size(regionToAll) = " << level_regionToAll_[region_idx].size() << " BUT numCols(A) = " << regionA_[region_idx]->getGlobalNumCols() << "\n"); } - } - - -template< class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node > -void Level::ComputeRegionJacobi( ) -{ +template +void Level::ComputeRegionJacobi() { regionSmoother_.resize(num_regions_); - for( int i = 0; i::Build(regionA_[i]->getRowMap()) ; - regionA_[i]->getLocalDiagCopy( *(regionSmoother_[i]) ); + for (int i = 0; i < num_regions_; ++i) { + regionSmoother_[i] = VectorFactory::Build(regionA_[i]->getRowMap()); + regionA_[i]->getLocalDiagCopy(*(regionSmoother_[i])); } - } -} +} // namespace Xpetra #endif diff --git a/packages/muelu/research/max/XpetraSplitting/Xpetra_MatrixSplitting.hpp b/packages/muelu/research/max/XpetraSplitting/Xpetra_MatrixSplitting.hpp index 5b28ded63dd6..c2fd9d96d805 100644 --- a/packages/muelu/research/max/XpetraSplitting/Xpetra_MatrixSplitting.hpp +++ b/packages/muelu/research/max/XpetraSplitting/Xpetra_MatrixSplitting.hpp @@ -52,7 +52,7 @@ #ifndef XPETRA_MATRIXSPLITTING_HPP #define XPETRA_MATRIXSPLITTING_HPP -//Xpetra +// Xpetra #include "Xpetra_Map.hpp" #include "Xpetra_MapFactory.hpp" #include "Xpetra_Matrix.hpp" @@ -60,10 +60,10 @@ #include "Xpetra_IO.hpp" #include "Xpetra_RegionHandler_def.hpp" -//Ifpack2 +// Ifpack2 #include "Ifpack2_OverlappingRowMatrix_def.hpp" -//MueLu +// MueLu #include /** \file Xpetra_MatrixSplitting.hpp @@ -72,157 +72,134 @@ Declarations for the class Xpetra::MatrixSplitting. */ namespace Xpetra { - -//Definition of the predicate for the regionToAll structure. -//Given a tuple made of node index and a specific region it belongs to, -//this predicate returns true if the node has composite index which coincides with the index specified in input. -template +// Definition of the predicate for the regionToAll structure. +// Given a tuple made of node index and a specific region it belongs to, +// this predicate returns true if the node has composite index which coincides with the index specified in input. +template class checkerRegionToAll { + public: + // Constructor + checkerRegionToAll(GlobalOrdinal node_index) { node_index_ = node_index; }; -public: - - //Constructor - checkerRegionToAll( GlobalOrdinal node_index){node_index_ = node_index;}; - - //Unary Operator - bool operator()(const std::tuple &node) - { return (std::get<0>(node) == node_index_); } - -private: + // Unary Operator + bool operator()(const std::tuple &node) { return (std::get<0>(node) == node_index_); } + private: GlobalOrdinal node_index_; - }; - -//Definition of the predicate for the node_ structure. -//Given a tuple made of node index and a specific region it belongs to, -//this predicate returns true if the node has composite index which coincides with the index specified in input. -//It does the same thing as checkerRegionToAll but it works on a different data structure -template +// Definition of the predicate for the node_ structure. +// Given a tuple made of node index and a specific region it belongs to, +// this predicate returns true if the node has composite index which coincides with the index specified in input. +// It does the same thing as checkerRegionToAll but it works on a different data structure +template class checkerAllToRegion { + public: + // Constructor + checkerAllToRegion(GlobalOrdinal node_index) { node_index_ = node_index; }; -public: - - //Constructor - checkerAllToRegion( GlobalOrdinal node_index){node_index_ = node_index;}; - - //Unary Operator - bool operator()(const std::tuple &node) - { return (std::get<1>(node) == node_index_); } - -private: + // Unary Operator + bool operator()(const std::tuple &node) { return (std::get<1>(node) == node_index_); } + private: GlobalOrdinal node_index_; - }; - -//Definition of the predicate for the node_ structure. -//Given a tuple made of node index and a specific region it belongs to, -//this predicate returns true if the node has composite index which coincides with the index specified in input. -//This checker is specifically used only for nodes lying on the interface -template +// Definition of the predicate for the node_ structure. +// Given a tuple made of node index and a specific region it belongs to, +// this predicate returns true if the node has composite index which coincides with the index specified in input. +// This checker is specifically used only for nodes lying on the interface +template class checkerInterfaceNodes { + public: + // Constructor + checkerInterfaceNodes(GlobalOrdinal node_index) { node_index_ = node_index; }; -public: - - //Constructor - checkerInterfaceNodes( GlobalOrdinal node_index){node_index_ = node_index;}; - - //Unary Operator - bool operator()(const std::tuple > &node) - { return (std::get<0>(node) == node_index_); } - -private: + // Unary Operator + bool operator()(const std::tuple > &node) { return (std::get<0>(node) == node_index_); } + private: GlobalOrdinal node_index_; - }; /*! - @class Xpetra::MatrixSplitting class. - @brief Xpetra-specific matrix class. + @class Xpetra::MatrixSplitting class. + @brief Xpetra-specific matrix class. - This class is specific to Xpetra and has no analogue in Epetra or Tpetra. The main motivation for this class is to be able to access matrix data in a manner different than how it is stored. - For example, it might be more convenient to treat ("view") a matrix stored in compressed row storage as if it were a block matrix. The Xpetra::MatrixSplitting class is intended to manage these "views". + This class is specific to Xpetra and has no analogue in Epetra or Tpetra. The main motivation for this class is to be able to access matrix data in a manner different than how it is stored. + For example, it might be more convenient to treat ("view") a matrix stored in compressed row storage as if it were a block matrix. The Xpetra::MatrixSplitting class is intended to manage these "views". - How to create a Matrix from an existing CrsMatrix + How to create a Matrix from an existing CrsMatrix */ typedef std::string viewLabel_t; template ::scalar_type, - class LocalOrdinal = Operator<>::local_ordinal_type, - class GlobalOrdinal = typename Operator::global_ordinal_type, - class Node = typename Operator::node_type, - UnderlyingLib lib = Xpetra::UseEpetra, - bool collapse = false> -class MatrixSplitting : public Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > { - + class LocalOrdinal = Operator<>::local_ordinal_type, + class GlobalOrdinal = typename Operator::global_ordinal_type, + class Node = typename Operator::node_type, + UnderlyingLib lib = Xpetra::UseEpetra, + bool collapse = false> +class MatrixSplitting : public Matrix { typedef Xpetra::Map Map; - typedef Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > Matrix; + typedef Xpetra::Matrix Matrix; typedef Xpetra::CrsGraph CrsGraph; typedef Xpetra::CrsMatrix CrsMatrix; typedef Xpetra::CrsMatrixWrap CrsMatrixWrap; typedef Xpetra::CrsMatrixFactory CrsMatrixFactory; typedef Xpetra::MatrixView MatrixView; - //Xpetra structures must be converted into Tpetra specialized ones to construct an Ifpack2::OverlappingRowMatrix object - //Once the Ifpack2::OverlappingRowMatrix class is transferred into the Xpetra directory, the following 6 lines can be changed/removed + // Xpetra structures must be converted into Tpetra specialized ones to construct an Ifpack2::OverlappingRowMatrix object + // Once the Ifpack2::OverlappingRowMatrix class is transferred into the Xpetra directory, the following 6 lines can be changed/removed typedef Tpetra::CrsMatrix tpetra_crs_matrix; typedef Tpetra::RowMatrix tpetra_row_matrix; -public: - + public: //! @name Constructor/Destructor Methods //@{ //! Constructor specifying fixed number of entries for each row. - MatrixSplitting(RCP matrix, RCP< Array > > nodes) - { - std::cout<<"This version of MatrixSplitting constructor is NOT currently supported \n"; + MatrixSplitting(RCP matrix, RCP > > nodes) { + std::cout << "This version of MatrixSplitting constructor is NOT currently supported \n"; } // // - MatrixSplitting(const char* matrix_file_name, - Teuchos::RCP > regionHandler, - RCP > comm - ) - { - comm_ = comm; - regionHandler_ = regionHandler; + MatrixSplitting(const char *matrix_file_name, + Teuchos::RCP > regionHandler, + RCP > comm) { + comm_ = comm; + regionHandler_ = regionHandler; Array elementlist = regionHandler_->GetGlobalRowMap(); - num_total_elements_ = regionHandler_->GetNumGlobalElements(); - num_total_regions_ = regionHandler_->GetNumTotalRegions(); + num_total_elements_ = regionHandler_->GetNumGlobalElements(); + num_total_regions_ = regionHandler_->GetNumTotalRegions(); - if(comm_->getRank()==0) - std::cout<<"MatrixSplitting constructor initialized"<getRank() == 0) + std::cout << "MatrixSplitting constructor initialized" << std::endl; region_matrix_initialized_.clear(); - for( int i = 0; igetRank()==0) - std::cout<<"Starting construction of Composite Map"< > xpetraMap; - xpetraMap = Xpetra::MapFactory::Build(lib, num_total_elements_, elementlist, 0, comm); - if(comm_->getRank()==0) - std::cout<<"Finished construction of Composite Map"<getRank()==0) - std::cout<<"Started reading composite matrix"<::Read(matrix_file_name, xpetraMap); - if(comm_->getRank()==0) - std::cout<<"Finished reading composite matrix"<GetRegionRowMaps() ); + // Create Xpetra map for composite stiffness matrix + if (comm_->getRank() == 0) + std::cout << "Starting construction of Composite Map" << std::endl; + RCP > xpetraMap; + xpetraMap = Xpetra::MapFactory::Build(lib, num_total_elements_, elementlist, 0, comm); + if (comm_->getRank() == 0) + std::cout << "Finished construction of Composite Map" << std::endl; + + if (comm_->getRank() == 0) + std::cout << "Started reading composite matrix" << std::endl; + // Import matrix from an .mm file into an Xpetra wrapper for an Epetra matrix + compositeMatrixData_ = Xpetra::IO::Read(matrix_file_name, xpetraMap); + if (comm_->getRank() == 0) + std::cout << "Finished reading composite matrix" << std::endl; + + CreateRegionMatrices(regionHandler_->GetRegionRowMaps()); } //! Destructor - virtual ~MatrixSplitting() { } + virtual ~MatrixSplitting() {} //@} //! @name Insertion/Removal Methods @@ -263,16 +240,16 @@ class MatrixSplitting : public Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node \note If (globalRow,cols[i]) corresponds to an entry that is duplicated in this matrix row (likely because it was inserted more than once and fillComplete() has not been called in the interim), the behavior of this function is not defined. */ void replaceGlobalValues(GlobalOrdinal globalRow, - const ArrayView &cols, - const ArrayView &vals) { compositeMatrixData_->replaceGlobalValues(globalRow, cols, vals); } + const ArrayView &cols, + const ArrayView &vals) { compositeMatrixData_->replaceGlobalValues(globalRow, cols, vals); } //! Replace matrix entries, using local IDs. /** All index values must be in the local space. Note that if a value is not already present for the specified location in the matrix, the input value will be ignored silently. */ void replaceLocalValues(LocalOrdinal localRow, - const ArrayView &cols, - const ArrayView &vals) { compositeMatrixData_->replaceLocalValues(localRow, cols, vals); } + const ArrayView &cols, + const ArrayView &vals) { compositeMatrixData_->replaceLocalValues(localRow, cols, vals); } //! Set all matrix entries equal to scalar virtual void setAllToScalar(const Scalar &alpha) { compositeMatrixData_->setAllToScalar(alpha); } @@ -288,27 +265,27 @@ class MatrixSplitting : public Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node //@{ /*! Resume fill operations. - After calling fillComplete(), resumeFill() must be called before initiating any changes to the matrix. + After calling fillComplete(), resumeFill() must be called before initiating any changes to the matrix. - resumeFill() may be called repeatedly. + resumeFill() may be called repeatedly. - \post isFillActive() == true - \post isFillComplete() == false + \post isFillActive() == true + \post isFillComplete() == false */ - void resumeFill(const RCP< ParameterList > ¶ms=null) { + void resumeFill(const RCP ¶ms = null) { compositeMatrixData_->resumeFill(params); } /*! \brief Signal that data entry is complete, specifying domain and range maps. - Off-node indices are distributed (via globalAssemble()), indices are sorted, redundant indices are eliminated, and global indices are transformed to local indices. + Off-node indices are distributed (via globalAssemble()), indices are sorted, redundant indices are eliminated, and global indices are transformed to local indices. - \pre isFillActive() == true - \pre isFillComplete()() == false + \pre isFillActive() == true + \pre isFillComplete()() == false - \post isFillActive() == false - \post isFillComplete() == true - \post if os == DoOptimizeStorage, then isStorageOptimized() == true + \post isFillActive() == false + \post isFillComplete() == true + \post if os == DoOptimizeStorage, then isStorageOptimized() == true */ void fillComplete(const RCP &domainMap, const RCP &rangeMap, const RCP ¶ms = null) { compositeMatrixData_->fillComplete(domainMap, rangeMap, params); @@ -319,18 +296,18 @@ class MatrixSplitting : public Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node /*! \brief Signal that data entry is complete. - Off-node entries are distributed (via globalAssemble()), repeated entries are summed, and global indices are transformed to local indices. + Off-node entries are distributed (via globalAssemble()), repeated entries are summed, and global indices are transformed to local indices. - \note This method calls fillComplete( getRowMap(), getRowMap(), os ). + \note This method calls fillComplete( getRowMap(), getRowMap(), os ). - \pre isFillActive() == true - \pre isFillComplete()() == false + \pre isFillActive() == true + \pre isFillComplete()() == false - \post isFillActive() == false - \post isFillComplete() == true - \post if os == DoOptimizeStorage, then isStorageOptimized() == true + \post isFillActive() == false + \post isFillComplete() == true + \post if os == DoOptimizeStorage, then isStorageOptimized() == true */ - //TODO : Get ride of "Tpetra"::OptimizeOption + // TODO : Get ride of "Tpetra"::OptimizeOption void fillComplete(const RCP ¶ms = null) { compositeMatrixData_->fillComplete(params); @@ -374,7 +351,7 @@ class MatrixSplitting : public Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const { return compositeMatrixData_->getNumEntriesInLocalRow(localRow); } - + //! \brief Returns the maximum number of entries across all rows/columns on all nodes. /** Undefined if isFillActive(). */ @@ -391,14 +368,13 @@ class MatrixSplitting : public Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node //! \brief Returns the number of regions in the composite domain. global_size_t getNumRegions() const { - TEUCHOS_TEST_FOR_EXCEPTION( region_matrix_initialized_.size()==0 , Exceptions::RuntimeError, "Regions have not been initialized yet \n"); - for( int i = 0; iisLocallyIndexed(); @@ -416,34 +392,33 @@ class MatrixSplitting : public Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node //! Extract a list of entries in a specified local row of the matrix. Put into storage allocated by calling routine. /*! - \param LocalRow - (In) Local row number for which indices are desired. - \param Indices - (Out) Local column indices corresponding to values. - \param Values - (Out) Matrix values. - \param NumIndices - (Out) Number of indices. + \param LocalRow - (In) Local row number for which indices are desired. + \param Indices - (Out) Local column indices corresponding to values. + \param Values - (Out) Matrix values. + \param NumIndices - (Out) Number of indices. - Note: A std::runtime_error exception is thrown if either \c Indices or \c Values is not large enough to hold the data associated - with row \c LocalRow. If \c LocalRow is not valid for this node, then \c Indices and \c Values are unchanged and \c NumIndices is - returned as OrdinalTraits::invalid(). + Note: A std::runtime_error exception is thrown if either \c Indices or \c Values is not large enough to hold the data associated + with row \c LocalRow. If \c LocalRow is not valid for this node, then \c Indices and \c Values are unchanged and \c NumIndices is + returned as OrdinalTraits::invalid(). - \pre isLocallyIndexed()==true or hasColMap() == true + \pre isLocallyIndexed()==true or hasColMap() == true */ void getLocalRowCopy(LocalOrdinal LocalRow, - const ArrayView &Indices, - const ArrayView &Values, - size_t &NumEntries - ) const { + const ArrayView &Indices, + const ArrayView &Values, + size_t &NumEntries) const { compositeMatrixData_->getLocalRowCopy(LocalRow, Indices, Values, NumEntries); } //! Extract a const, non-persisting view of global indices in a specified row of the matrix. /*! - \param GlobalRow - (In) Global row number for which indices are desired. - \param Indices - (Out) Global column indices corresponding to values. - \param Values - (Out) Row values - \pre isLocallyIndexed() == false - \post indices.size() == getNumEntriesInGlobalRow(GlobalRow) + \param GlobalRow - (In) Global row number for which indices are desired. + \param Indices - (Out) Global column indices corresponding to values. + \param Values - (Out) Row values + \pre isLocallyIndexed() == false + \post indices.size() == getNumEntriesInGlobalRow(GlobalRow) - Note: If \c GlobalRow does not belong to this node, then \c indices is set to null. + Note: If \c GlobalRow does not belong to this node, then \c indices is set to null. */ void getGlobalRowView(GlobalOrdinal GlobalRow, ArrayView &indices, ArrayView &values) const { compositeMatrixData_->getGlobalRowView(GlobalRow, indices, values); @@ -451,13 +426,13 @@ class MatrixSplitting : public Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node //! Extract a const, non-persisting view of local indices in a specified row of the matrix. /*! - \param LocalRow - (In) Local row number for which indices are desired. - \param Indices - (Out) Global column indices corresponding to values. - \param Values - (Out) Row values - \pre isGloballyIndexed() == false - \post indices.size() == getNumEntriesInLocalRow(LocalRow) + \param LocalRow - (In) Local row number for which indices are desired. + \param Indices - (Out) Global column indices corresponding to values. + \param Values - (Out) Row values + \pre isGloballyIndexed() == false + \post indices.size() == getNumEntriesInLocalRow(LocalRow) - Note: If \c LocalRow does not belong to this node, then \c indices is set to null. + Note: If \c LocalRow does not belong to this node, then \c indices is set to null. */ void getLocalRowView(LocalOrdinal LocalRow, ArrayView &indices, ArrayView &values) const { compositeMatrixData_->getLocalRowView(LocalRow, indices, values); @@ -465,8 +440,8 @@ class MatrixSplitting : public Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node //! \brief Get a copy of the diagonal entries owned by this node, with local row idices. /*! Returns a distributed Vector object partitioned according to this matrix's row map, containing the - the zero and non-zero diagonals owned by this node. */ - void getLocalDiagCopy(Xpetra::Vector &diag) const { + the zero and non-zero diagonals owned by this node. */ + void getLocalDiagCopy(Xpetra::Vector &diag) const { compositeMatrixData_->getLocalDiagCopy(diag); } @@ -476,8 +451,8 @@ class MatrixSplitting : public Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node } //! Get a copy of the diagonal entries owned by this node, with local row indices, using row offsets. - void getLocalDiagCopy(Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &diag, const ArrayView &offsets) const { - compositeMatrixData_->getLocalDiagCopy(diag,offsets); + void getLocalDiagCopy(Xpetra::Vector &diag, const ArrayView &offsets) const { + compositeMatrixData_->getLocalDiagCopy(diag, offsets); } //! Get Frobenius norm of the matrix @@ -486,12 +461,12 @@ class MatrixSplitting : public Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node } //! Left scale matrix using the given vector entries - void leftScale (const Vector& x) { + void leftScale(const Vector &x) { compositeMatrixData_->leftScale(x); } //! Neighbor2 scale matrix using the given vector entries - void rightScale (const Vector& x) { + void rightScale(const Vector &x) { compositeMatrixData_->rightScale(x); } @@ -507,41 +482,40 @@ class MatrixSplitting : public Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node //! \brief Computes the sparse matrix-multivector multiplication. /*! Performs \f$Y = \alpha A^{\textrm{mode}} X + \beta Y\f$, with one special exceptions: - - if beta == 0, apply() overwrites \c Y, so that any values in \c Y (including NaNs) are ignored. + - if beta == 0, apply() overwrites \c Y, so that any values in \c Y (including NaNs) are ignored. */ - virtual void apply(const Xpetra::MultiVector& X, - Xpetra::MultiVector& Y, - Teuchos::ETransp mode = Teuchos::NO_TRANS, - Scalar alpha = ScalarTraits::one(), - Scalar beta = ScalarTraits::zero()) const { - - compositeMatrixData_->apply(X,Y,mode,alpha,beta); + virtual void apply(const Xpetra::MultiVector &X, + Xpetra::MultiVector &Y, + Teuchos::ETransp mode = Teuchos::NO_TRANS, + Scalar alpha = ScalarTraits::one(), + Scalar beta = ScalarTraits::zero()) const { + compositeMatrixData_->apply(X, Y, mode, alpha, beta); } //! \brief Returns the Map associated with the domain of this operator. //! This will be null until fillComplete() is called. - RCP > getDomainMap() const { + RCP > getDomainMap() const { return compositeMatrixData_->getDomainMap(); } //! Returns the Map associated with the domain of this operator. //! This will be null until fillComplete() is called. - RCP > getRangeMap() const { + RCP > getRangeMap() const { return compositeMatrixData_->getRangeMap(); } //! \brief Returns the Map that describes the column distribution in this matrix. //! This might be null until fillComplete() is called. - const RCP & getColMap() const { return getColMap(Matrix::GetCurrentViewLabel()); } + const RCP &getColMap() const { return getColMap(Matrix::GetCurrentViewLabel()); } //! \brief Returns the Map that describes the column distribution in this matrix. - const RCP & getColMap(viewLabel_t viewLabel) const { + const RCP &getColMap(viewLabel_t viewLabel) const { TEUCHOS_TEST_FOR_EXCEPTION(Matrix::operatorViewTable_.containsKey(viewLabel) == false, Xpetra::Exceptions::RuntimeError, "Xpetra::Matrix.GetColMap(): view '" + viewLabel + "' does not exist."); - updateDefaultView(); // If CrsMatrix::fillComplete() have been used instead of MatrixSplitting::fillComplete(), the default view is updated. + updateDefaultView(); // If CrsMatrix::fillComplete() have been used instead of MatrixSplitting::fillComplete(), the default view is updated. return Matrix::operatorViewTable_.get(viewLabel)->GetColMap(); } - void removeEmptyProcessesInPlace(const RCP& newMap) { + void removeEmptyProcessesInPlace(const RCP &newMap) { compositeMatrixData_->removeEmptyProcessesInPlace(newMap); this->operatorViewTable_.get(this->GetCurrentViewLabel())->SetRowMap(compositeMatrixData_->getRowMap()); this->operatorViewTable_.get(this->GetCurrentViewLabel())->SetColMap(compositeMatrixData_->getColMap()); @@ -553,45 +527,44 @@ class MatrixSplitting : public Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node //{@ //! Access function for the Tpetra::Map this DistObject was constructed with. - const RCP< const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getMap() const { + const RCP > getMap() const { return compositeMatrixData_->getMap(); } //! Import. void doImport(const Matrix &source, - const Xpetra::Import< LocalOrdinal, GlobalOrdinal, Node > &importer, CombineMode CM) { - std::cout<<"Import not implemented"<(source); - //compositeMatrixData_->doImport(*sourceWrp.getCrsMatrix(), importer, CM); + const Xpetra::Import &importer, CombineMode CM) { + std::cout << "Import not implemented" << std::endl; + // const MatrixSplitting & sourceWrp = dynamic_cast(source); + // compositeMatrixData_->doImport(*sourceWrp.getCrsMatrix(), importer, CM); } //! Export. void doExport(const Matrix &dest, - const Xpetra::Import< LocalOrdinal, GlobalOrdinal, Node >& importer, CombineMode CM) { - std::cout<<"Export not implemented"<(dest); - //compositeMatrixData_->doExport(*destWrp.getCrsMatrix(), importer, CM); + const Xpetra::Import &importer, CombineMode CM) { + std::cout << "Export not implemented" << std::endl; + // const MatrixSplitting & destWrp = dynamic_cast(dest); + // compositeMatrixData_->doExport(*destWrp.getCrsMatrix(), importer, CM); } //! Import (using an Exporter). void doImport(const Matrix &source, - const Xpetra::Export< LocalOrdinal, GlobalOrdinal, Node >& exporter, CombineMode CM) { - std::cout<<"Import not implemented"<(source); - //compositeMatrixData_->doImport(*sourceWrp.getCrsMatrix(), exporter, CM); + const Xpetra::Export &exporter, CombineMode CM) { + std::cout << "Import not implemented" << std::endl; + // const MatrixSplitting & sourceWrp = dynamic_cast(source); + // compositeMatrixData_->doImport(*sourceWrp.getCrsMatrix(), exporter, CM); } //! Export (using an Importer). void doExport(const Matrix &dest, - const Xpetra::Export< LocalOrdinal, GlobalOrdinal, Node >& exporter, CombineMode CM) { - std::cout<<"Export not implemented"<(dest); - //compositeMatrixData_->doExport(*destWrp.getCrsMatrix(), exporter, CM); + const Xpetra::Export &exporter, CombineMode CM) { + std::cout << "Export not implemented" << std::endl; + // const MatrixSplitting & destWrp = dynamic_cast(dest); + // compositeMatrixData_->doExport(*destWrp.getCrsMatrix(), exporter, CM); } // @} - //! @name Overridden from Teuchos::Describable //@{ @@ -601,8 +574,8 @@ class MatrixSplitting : public Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node } /** \brief Print the object with some verbosity level to an FancyOStream object. */ - void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const { - compositeMatrixData_->describe(out,verbLevel); + void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel = Teuchos::Describable::verbLevel_default) const { + compositeMatrixData_->describe(out, verbLevel); } //@} @@ -613,58 +586,50 @@ class MatrixSplitting : public Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node RCP getCrsGraph() const { return compositeMatrixData_->getCrsGraph(); } //! Returns an Xpetra::CrsMatrix pointer to the the composite matrix - RCP getCrsMatrix() const { return compositeMatrixData_; } + RCP getCrsMatrix() const { return compositeMatrixData_; } //! Returns an Xpetra::Matrix pointer to the composite matrix - RCP getMatrix() const { return compositeMatrixData_; } + RCP getMatrix() const { return compositeMatrixData_; } //! Returns an Xpetra::Matrix pointer to the region matrix associated with a specific region index - RCP getRegionMatrix( GlobalOrdinal region_idx ) const - { - //The region index is assumed to start from 0 - TEUCHOS_TEST_FOR_EXCEPTION( num_total_regions_<=0, Exceptions::RuntimeError, "Regions not initialized yet ( total number of regions is <=0 ) \n"); - TEUCHOS_TEST_FOR_EXCEPTION( region_idx >= num_total_regions_, Exceptions::RuntimeError, "Region index not valid \n"); + RCP getRegionMatrix(GlobalOrdinal region_idx) const { + // The region index is assumed to start from 0 + TEUCHOS_TEST_FOR_EXCEPTION(num_total_regions_ <= 0, Exceptions::RuntimeError, "Regions not initialized yet ( total number of regions is <=0 ) \n"); + TEUCHOS_TEST_FOR_EXCEPTION(region_idx >= num_total_regions_, Exceptions::RuntimeError, "Region index not valid \n"); - return regionMatrixData_[ region_idx ]; - } + return regionMatrixData_[region_idx]; + } //! Return a ppointer to the underlying regionHandler object used for the matrix splitting - RCP > getRegionHandler() const - { + RCP > getRegionHandler() const { return regionHandler_; - } + } //@} //! Write methods //{@ - void writeGlobalMatrix() - { + void writeGlobalMatrix() { std::string file_name; file_name += "./output/A_composite.mm"; - Xpetra::IO::Write(file_name, *compositeMatrixData_); + Xpetra::IO::Write(file_name, *compositeMatrixData_); } - - void writeRegionMatrices() - { - for( int i = 0; i::Write(file_name.c_str(), *regionMatrixData_[i]); + Xpetra::IO::Write(file_name.c_str(), *regionMatrixData_[i]); } } // @} -private: - + private: // Default view is created after fillComplete() // Because ColMap might not be available before fillComplete(). void CreateDefaultView() { - // Create default view this->defaultViewLabel_ = "point"; this->CreateView(this->GetDefaultViewLabel(), compositeMatrixData_->getRowMap(), compositeMatrixData_->getColMap()); @@ -676,7 +641,7 @@ class MatrixSplitting : public Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node // The colMap can be null until fillComplete() is called. The default view of the Matrix have to be updated when fillComplete() is called. // If CrsMatrix::fillComplete() have been used instead of MatrixSplitting::fillComplete(), the default view is updated when getColMap() is called. void updateDefaultView() const { - if ((finalDefaultView_ == false) && compositeMatrixData_->isFillComplete() ) { + if ((finalDefaultView_ == false) && compositeMatrixData_->isFillComplete()) { // Update default view with the colMap Matrix::operatorViewTable_.get(Matrix::GetDefaultViewLabel())->SetColMap(compositeMatrixData_->getColMap()); finalDefaultView_ = true; @@ -688,188 +653,176 @@ class MatrixSplitting : public Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node //! Creation of region matrices //@{ - void RegionMatrix(GlobalOrdinal region_idx) - { - TEUCHOS_TEST_FOR_EXCEPTION( num_total_regions_!=regionMatrixData_.size(), Exceptions::RuntimeError, "Number of regions does not match with the size of regionMatrixData_ structure \n"); + void RegionMatrix(GlobalOrdinal region_idx) { + TEUCHOS_TEST_FOR_EXCEPTION(num_total_regions_ != regionMatrixData_.size(), Exceptions::RuntimeError, "Number of regions does not match with the size of regionMatrixData_ structure \n"); RCP region_matrix = regionMatrixData_[region_idx]; - RCP tpetraGlobalMatrix = MueLu::Utilities::Op2NonConstTpetraCrs(compositeMatrixData_); + RCP tpetraGlobalMatrix = MueLu::Utilities::Op2NonConstTpetraCrs(compositeMatrixData_); Ifpack2::OverlappingRowMatrix enlargedMatrix(tpetraGlobalMatrix, 2); region_matrix->resumeFill(); - //Region matrices are initially built to be a chopped version of the composite matrix - InitializeRegionMatrices( region_idx, region_matrix, enlargedMatrix ); + // Region matrices are initially built to be a chopped version of the composite matrix + InitializeRegionMatrices(region_idx, region_matrix, enlargedMatrix); - //If the template paramater is set to collapse by the user, then interface entries of the region matrix are modified to collapse - //information coming from adjacent regions. If the collapsing is not done, then the splitting is calculated - if( collapse ) - RegionCollapse( region_idx, region_matrix, enlargedMatrix ); + // If the template paramater is set to collapse by the user, then interface entries of the region matrix are modified to collapse + // information coming from adjacent regions. If the collapsing is not done, then the splitting is calculated + if (collapse) + RegionCollapse(region_idx, region_matrix, enlargedMatrix); else - RegionSplitting( region_idx, region_matrix, enlargedMatrix ); + RegionSplitting(region_idx, region_matrix, enlargedMatrix); region_matrix->fillComplete(); }; // @} - //! @name Initialization of Region matrices //@{ - void InitializeRegionMatrices(GlobalOrdinal region_idx, RCP& region_matrix, Ifpack2::OverlappingRowMatrix& enlargedMatrix) - { - - //Region matrices are initially built to be a chopped version of the composite matrix - TEUCHOS_TEST_FOR_EXCEPTION( region_matrix_initialized_[region_idx], Exceptions::RuntimeError, "Surrogate region stiffness matrices are already initialized by chopping the composite stiffness matrix \n"); + void InitializeRegionMatrices(GlobalOrdinal region_idx, RCP ®ion_matrix, Ifpack2::OverlappingRowMatrix &enlargedMatrix) { + // Region matrices are initially built to be a chopped version of the composite matrix + TEUCHOS_TEST_FOR_EXCEPTION(region_matrix_initialized_[region_idx], Exceptions::RuntimeError, "Surrogate region stiffness matrices are already initialized by chopping the composite stiffness matrix \n"); - Array< std::tuple > regionToAll = regionHandler_->GetRegionToAll(region_idx); + Array > regionToAll = regionHandler_->GetRegionToAll(region_idx); - //THIS IS THE CORE OF THE PROBLEM WHERE ONE NEEDS TO POPULATE THE REGIONAL MATRICES BY ACCESSING ENTRIES OF THE GLOBAL MATRIX + // THIS IS THE CORE OF THE PROBLEM WHERE ONE NEEDS TO POPULATE THE REGIONAL MATRICES BY ACCESSING ENTRIES OF THE GLOBAL MATRIX // - ArrayView MyRegionElements =region_matrix->getRowMap()->getLocalElementList(); - for( typename ArrayView::iterator iter = MyRegionElements.begin(); iter!=MyRegionElements.end(); ++iter ) - { - //Nodes are saved in data structures with 1 as base index - checkerRegionToAll unaryPredicate(*iter+1); - typename Array< std::tuple >::iterator composite_iterator; - composite_iterator = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicate); - TEUCHOS_TEST_FOR_EXCEPTION( composite_iterator==regionToAll.end(), Exceptions::RuntimeError, "Process ID: "<getRank()<<" - Region: "<( *composite_iterator ); - LocalOrdinal node_local_idx = enlargedMatrix.getRowMap()->getLocalElement(node_idx-1); + ArrayView MyRegionElements = region_matrix->getRowMap()->getLocalElementList(); + for (typename ArrayView::iterator iter = MyRegionElements.begin(); iter != MyRegionElements.end(); ++iter) { + // Nodes are saved in data structures with 1 as base index + checkerRegionToAll unaryPredicate(*iter + 1); + typename Array >::iterator composite_iterator; + composite_iterator = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicate); + TEUCHOS_TEST_FOR_EXCEPTION(composite_iterator == regionToAll.end(), Exceptions::RuntimeError, "Process ID: " << comm_->getRank() << " - Region: " << region_idx << " - " + << " node with region index: " << *iter + 1 << " is not in regionToAll[" << region_idx << "]" + << "\n"); + GlobalOrdinal node_idx = std::get<1>(*composite_iterator); + LocalOrdinal node_local_idx = enlargedMatrix.getRowMap()->getLocalElement(node_idx - 1); ArrayView inds; ArrayView vals; - enlargedMatrix.getLocalRowView( node_local_idx, inds, vals ); + enlargedMatrix.getLocalRowView(node_local_idx, inds, vals); std::vector region_inds_vector(0); - std::vector region_vals_vector(0); + std::vector region_vals_vector(0); - for( LocalOrdinal i = 0; i < inds.size(); ++i ) - { - //Nodes are saved in data structures with 1 as base index + for (LocalOrdinal i = 0; i < inds.size(); ++i) { + // Nodes are saved in data structures with 1 as base index GlobalOrdinal composite_col_ind = enlargedMatrix.getColMap()->getGlobalElement(inds[i]) + 1; checkerAllToRegion unaryPredicate2(composite_col_ind); - typename Array< std::tuple >::iterator region_iterator; - region_iterator = std::find_if >::iterator, checkerAllToRegion >(regionToAll.begin(), regionToAll.end(), unaryPredicate2); - if( region_iterator!=regionToAll.end() ) - { - region_inds_vector.push_back( std::get<0>(*region_iterator)-1 ); - region_vals_vector.push_back( vals[i] ); + typename Array >::iterator region_iterator; + region_iterator = std::find_if >::iterator, checkerAllToRegion >(regionToAll.begin(), regionToAll.end(), unaryPredicate2); + if (region_iterator != regionToAll.end()) { + region_inds_vector.push_back(std::get<0>(*region_iterator) - 1); + region_vals_vector.push_back(vals[i]); } } ArrayView region_inds(region_inds_vector); ArrayView region_vals(region_vals_vector); - region_matrix -> insertGlobalValues( *iter,region_inds,region_vals ); + region_matrix->insertGlobalValues(*iter, region_inds, region_vals); } region_matrix_initialized_[region_idx] = true; } //@} - //! @name Collapse of External neighbouring nodes information on Region matrices //@{ - void RegionCollapse(GlobalOrdinal region_idx, RCP& region_matrix, Ifpack2::OverlappingRowMatrix& enlargedMatrix) - { - TEUCHOS_TEST_FOR_EXCEPTION( !region_matrix_initialized_[region_idx], Exceptions::RuntimeError, "The composite stiffness matrix must be chopped into surrogate region matrices before collapsing \n"); - TEUCHOS_TEST_FOR_EXCEPTION( regionHandler_->GetNumRegionNodes(region_idx)!=regionMatrixData_[region_idx]->getGlobalNumRows(), Exceptions::RuntimeError, "Process ID: "<getRank()<<" - Number of region nodes in region "<getGlobalNumRows() \n"); + void RegionCollapse(GlobalOrdinal region_idx, RCP ®ion_matrix, Ifpack2::OverlappingRowMatrix &enlargedMatrix) { + TEUCHOS_TEST_FOR_EXCEPTION(!region_matrix_initialized_[region_idx], Exceptions::RuntimeError, "The composite stiffness matrix must be chopped into surrogate region matrices before collapsing \n"); + TEUCHOS_TEST_FOR_EXCEPTION(regionHandler_->GetNumRegionNodes(region_idx) != regionMatrixData_[region_idx]->getGlobalNumRows(), Exceptions::RuntimeError, "Process ID: " << comm_->getRank() << " - Number of region nodes in region " << region_idx + 1 << " does not coincide with the value returned by regionMatrixData_[" << region_idx + 1 << "]->getGlobalNumRows() \n"); - Array< std::tuple > regionToAll = regionHandler_->GetRegionToAll(region_idx); + Array > regionToAll = regionHandler_->GetRegionToAll(region_idx); - //This portion of the code assumes that the number of region nodes is the same on each direction of the domain - //Foir a 2D problem we have then nx = ny = sqrt( num_region_ndoes_ ) + // This portion of the code assumes that the number of region nodes is the same on each direction of the domain + // Foir a 2D problem we have then nx = ny = sqrt( num_region_ndoes_ ) GlobalOrdinal n; GlobalOrdinal nx; GlobalOrdinal ny; - n = regionHandler_->GetNumRegionNodes(region_idx); + n = regionHandler_->GetNumRegionNodes(region_idx); nx = std::sqrt(n); ny = nx; - TEUCHOS_TEST_FOR_EXCEPTION( static_cast( nx - std::floor(static_cast( std::sqrt(static_cast(n)) )))!=0.0 , Exceptions::RuntimeError, "The code assumes that the regions are 2D and that the number of region nodes is the same on each direction of the domain \n"); + TEUCHOS_TEST_FOR_EXCEPTION(static_cast(nx - std::floor(static_cast(std::sqrt(static_cast(n))))) != 0.0, Exceptions::RuntimeError, "The code assumes that the regions are 2D and that the number of region nodes is the same on each direction of the domain \n"); - //interfaceNodes contains nodes on an interface between any regions + // interfaceNodes contains nodes on an interface between any regions Array > > interfaceNodes = regionHandler_->GetInterfaceNodes(); - ArrayView MyRegionElements =region_matrix->getRowMap()->getLocalElementList(); - for( typename ArrayView::iterator iter = MyRegionElements.begin(); iter!=MyRegionElements.end(); ++iter ) - { - - //Nodes are saved in data structures with 1 as base index - GlobalOrdinal region_node_idx = *iter+1; + ArrayView MyRegionElements = region_matrix->getRowMap()->getLocalElementList(); + for (typename ArrayView::iterator iter = MyRegionElements.begin(); iter != MyRegionElements.end(); ++iter) { + // Nodes are saved in data structures with 1 as base index + GlobalOrdinal region_node_idx = *iter + 1; checkerRegionToAll unaryPredicate(region_node_idx); - typename Array< std::tuple >::iterator composite_iterator; - composite_iterator = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicate); - TEUCHOS_TEST_FOR_EXCEPTION( composite_iterator==regionToAll.end(), Exceptions::RuntimeError, "Process ID: "<getRank()<<" - Region: "<( *composite_iterator ); - checkerInterfaceNodes unaryPredicate2( composite_node_idx ); - typename Array< std::tuple > >::iterator interface_iterator; - interface_iterator = std::find_if > >::iterator, checkerInterfaceNodes >(interfaceNodes.begin(), interfaceNodes.end(), unaryPredicate2); - - //Here we assuming that a specific labeling choice is adopted region wise and we use it to distinguish coarse node from fine nodes + typename Array >::iterator composite_iterator; + composite_iterator = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicate); + TEUCHOS_TEST_FOR_EXCEPTION(composite_iterator == regionToAll.end(), Exceptions::RuntimeError, "Process ID: " << comm_->getRank() << " - Region: " << region_idx << " - " + << " node with region index: " << region_node_idx << " is not in regionToAll[" << region_idx << "]" + << "\n"); + + GlobalOrdinal composite_node_idx = std::get<1>(*composite_iterator); + checkerInterfaceNodes unaryPredicate2(composite_node_idx); + typename Array > >::iterator interface_iterator; + interface_iterator = std::find_if > >::iterator, checkerInterfaceNodes >(interfaceNodes.begin(), interfaceNodes.end(), unaryPredicate2); + + // Here we assuming that a specific labeling choice is adopted region wise and we use it to distinguish coarse node from fine nodes bool coarse_point = false; - if( region_node_idx%3==1 ) + if (region_node_idx % 3 == 1) coarse_point = true; GlobalOrdinal region_node_idx_neighbor1 = 0; GlobalOrdinal region_node_idx_neighbor2 = 0; - //Horizontal-Vertical Collapse - if( interface_iterator!=interfaceNodes.end() && region_node_idx>ny && region_node_idx<=(nx-1)*ny && !coarse_point ) - { - region_node_idx_neighbor1 = region_node_idx-ny; - region_node_idx_neighbor2 = region_node_idx+ny; - } - else if( interface_iterator!=interfaceNodes.end() && region_node_idx%ny>1 && !coarse_point ) - { - region_node_idx_neighbor1 = region_node_idx-1; - region_node_idx_neighbor2 = region_node_idx+1; + // Horizontal-Vertical Collapse + if (interface_iterator != interfaceNodes.end() && region_node_idx > ny && region_node_idx <= (nx - 1) * ny && !coarse_point) { + region_node_idx_neighbor1 = region_node_idx - ny; + region_node_idx_neighbor2 = region_node_idx + ny; + } else if (interface_iterator != interfaceNodes.end() && region_node_idx % ny > 1 && !coarse_point) { + region_node_idx_neighbor1 = region_node_idx - 1; + region_node_idx_neighbor2 = region_node_idx + 1; } - if( region_node_idx_neighbor1!=0 && region_node_idx_neighbor2!=0 ) - { - //Computation of composite index for neighbor1 node + if (region_node_idx_neighbor1 != 0 && region_node_idx_neighbor2 != 0) { + // Computation of composite index for neighbor1 node checkerRegionToAll unaryPredicateLeft(region_node_idx_neighbor1); - typename Array< std::tuple >::iterator composite_iterator_neighbor1; - composite_iterator_neighbor1 = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicateLeft); + typename Array >::iterator composite_iterator_neighbor1; + composite_iterator_neighbor1 = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicateLeft); - //Computation of composite index for neighbor2 node + // Computation of composite index for neighbor2 node checkerRegionToAll unaryPredicateRight(region_node_idx_neighbor2); - typename Array< std::tuple >::iterator composite_iterator_neighbor2; - composite_iterator_neighbor2 = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicateRight); - - TEUCHOS_TEST_FOR_EXCEPTION( composite_iterator_neighbor1 == regionToAll.end() || composite_iterator_neighbor2 == regionToAll.end(), Exceptions::RuntimeError, "Process ID: "<getRank()<<" - Region: "<(*interface_iterator)<<" BUT has compositely mislabeled neighbouring nodes missing from regionToAll \n" ); - - //Check to see if neighbor1 node lies on a coarse line - GlobalOrdinal composite_node_idx_neighbor1 = std::get<1>( *composite_iterator_neighbor1 ); - checkerInterfaceNodes unaryPredicate2neighbor1( composite_node_idx_neighbor1 ); - typename Array< std::tuple > >::iterator interface_iterator_neighbor1; - interface_iterator_neighbor1 = std::find_if > >::iterator, checkerInterfaceNodes >(interfaceNodes.begin(), interfaceNodes.end(), unaryPredicate2neighbor1); - - //Check to see if neighbor2 node lies on a coarse line - GlobalOrdinal composite_node_idx_neighbor2 = std::get<1>( *composite_iterator_neighbor2 ); - checkerInterfaceNodes unaryPredicate2neighbor2( composite_node_idx_neighbor2 ); - typename Array< std::tuple > >::iterator interface_iterator_neighbor2; - interface_iterator_neighbor2 = std::find_if > >::iterator, checkerInterfaceNodes >(interfaceNodes.begin(), interfaceNodes.end(), unaryPredicate2neighbor2); - - //I apply the collapse only if the current node is a fine node which lies on a coarse line - //This means that the neighbor1 node and neighbor2 node must both lie on the coarse line as well - if( interface_iterator_neighbor1!=interfaceNodes.end() && interface_iterator_neighbor2!=interfaceNodes.end() ) - { - - //For each fine node on a horixontal coarse line on the interface, I extract the rows from the composite matrix - LocalOrdinal node_idx = enlargedMatrix.getRowMap()->getLocalElement(composite_node_idx-1); - LocalOrdinal node_idx_neighbor1 = enlargedMatrix.getRowMap()->getLocalElement(composite_node_idx_neighbor1-1); - LocalOrdinal node_idx_neighbor2 = enlargedMatrix.getRowMap()->getLocalElement(composite_node_idx_neighbor2-1); + typename Array >::iterator composite_iterator_neighbor2; + composite_iterator_neighbor2 = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicateRight); + + TEUCHOS_TEST_FOR_EXCEPTION(composite_iterator_neighbor1 == regionToAll.end() || composite_iterator_neighbor2 == regionToAll.end(), Exceptions::RuntimeError, "Process ID: " << comm_->getRank() << " - Region: " << region_idx << " - " + << " node with region index: " << region_node_idx << " lies on the interface between regions: " << std::get<1>(*interface_iterator) << " BUT has compositely mislabeled neighbouring nodes missing from regionToAll \n"); + + // Check to see if neighbor1 node lies on a coarse line + GlobalOrdinal composite_node_idx_neighbor1 = std::get<1>(*composite_iterator_neighbor1); + checkerInterfaceNodes unaryPredicate2neighbor1(composite_node_idx_neighbor1); + typename Array > >::iterator interface_iterator_neighbor1; + interface_iterator_neighbor1 = std::find_if > >::iterator, checkerInterfaceNodes >(interfaceNodes.begin(), interfaceNodes.end(), unaryPredicate2neighbor1); + + // Check to see if neighbor2 node lies on a coarse line + GlobalOrdinal composite_node_idx_neighbor2 = std::get<1>(*composite_iterator_neighbor2); + checkerInterfaceNodes unaryPredicate2neighbor2(composite_node_idx_neighbor2); + typename Array > >::iterator interface_iterator_neighbor2; + interface_iterator_neighbor2 = std::find_if > >::iterator, checkerInterfaceNodes >(interfaceNodes.begin(), interfaceNodes.end(), unaryPredicate2neighbor2); + + // I apply the collapse only if the current node is a fine node which lies on a coarse line + // This means that the neighbor1 node and neighbor2 node must both lie on the coarse line as well + if (interface_iterator_neighbor1 != interfaceNodes.end() && interface_iterator_neighbor2 != interfaceNodes.end()) { + // For each fine node on a horixontal coarse line on the interface, I extract the rows from the composite matrix + LocalOrdinal node_idx = enlargedMatrix.getRowMap()->getLocalElement(composite_node_idx - 1); + LocalOrdinal node_idx_neighbor1 = enlargedMatrix.getRowMap()->getLocalElement(composite_node_idx_neighbor1 - 1); + LocalOrdinal node_idx_neighbor2 = enlargedMatrix.getRowMap()->getLocalElement(composite_node_idx_neighbor2 - 1); ArrayView inds; ArrayView vals; - enlargedMatrix.getLocalRowView( node_idx, inds, vals ); + enlargedMatrix.getLocalRowView(node_idx, inds, vals); ArrayView inds_neighbor1; ArrayView vals_neighbor1; - enlargedMatrix.getLocalRowView( node_idx_neighbor1, inds_neighbor1, vals_neighbor1 ); + enlargedMatrix.getLocalRowView(node_idx_neighbor1, inds_neighbor1, vals_neighbor1); ArrayView inds_neighbor2; ArrayView vals_neighbor2; - enlargedMatrix.getLocalRowView( node_idx_neighbor2, inds_neighbor2, vals_neighbor2 ); + enlargedMatrix.getLocalRowView(node_idx_neighbor2, inds_neighbor2, vals_neighbor2); - std::vector inds_vector = createVector(inds); + std::vector inds_vector = createVector(inds); std::vector inds_neighbor1_vector = createVector(inds_neighbor1); std::vector inds_neighbor2_vector = createVector(inds_neighbor2); @@ -877,282 +830,273 @@ class MatrixSplitting : public Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node std::vector composite_inds_neighbor1_vector(0); std::vector composite_inds_neighbor2_vector(0); - for( typename std::vector::iterator iter_node = inds_vector.begin(); iter_node!=inds_vector.end(); ++iter_node ) - composite_inds_vector.push_back( enlargedMatrix.getRowMap()->getGlobalElement(*iter_node) ); - std::sort( composite_inds_vector.begin(), composite_inds_vector.end() ); + for (typename std::vector::iterator iter_node = inds_vector.begin(); iter_node != inds_vector.end(); ++iter_node) + composite_inds_vector.push_back(enlargedMatrix.getRowMap()->getGlobalElement(*iter_node)); + std::sort(composite_inds_vector.begin(), composite_inds_vector.end()); - for( typename std::vector::iterator iter_node = inds_neighbor1_vector.begin(); iter_node!=inds_neighbor1_vector.end(); ++iter_node ) - composite_inds_neighbor1_vector.push_back( enlargedMatrix.getRowMap()->getGlobalElement(*iter_node) ); + for (typename std::vector::iterator iter_node = inds_neighbor1_vector.begin(); iter_node != inds_neighbor1_vector.end(); ++iter_node) + composite_inds_neighbor1_vector.push_back(enlargedMatrix.getRowMap()->getGlobalElement(*iter_node)); - std::sort( composite_inds_neighbor1_vector.begin(), composite_inds_neighbor1_vector.end() ); + std::sort(composite_inds_neighbor1_vector.begin(), composite_inds_neighbor1_vector.end()); - for( typename std::vector::iterator iter_node = inds_neighbor2_vector.begin(); iter_node!=inds_neighbor2_vector.end(); ++iter_node ) - composite_inds_neighbor2_vector.push_back( enlargedMatrix.getRowMap()->getGlobalElement(*iter_node) ); + for (typename std::vector::iterator iter_node = inds_neighbor2_vector.begin(); iter_node != inds_neighbor2_vector.end(); ++iter_node) + composite_inds_neighbor2_vector.push_back(enlargedMatrix.getRowMap()->getGlobalElement(*iter_node)); - std::sort( composite_inds_neighbor2_vector.begin(), composite_inds_neighbor2_vector.end() ); + std::sort(composite_inds_neighbor2_vector.begin(), composite_inds_neighbor2_vector.end()); - //IDENTIFICATION OF EXTERNAL NODES THROUGH COMPOSITE INDICES STARTS HERE + // IDENTIFICATION OF EXTERNAL NODES THROUGH COMPOSITE INDICES STARTS HERE std::vector composite_node_idx_neighbor1_extra; std::vector composite_node_idx_neighbor2_extra; std::vector composite_node_idx_extra(composite_inds_vector); - //The follolwing triple of vector is expected to EVENTUALLY contain only one entry: - //the label of the external node with information to collapse close to neighbor1, neighbor2 and central node + // The follolwing triple of vector is expected to EVENTUALLY contain only one entry: + // the label of the external node with information to collapse close to neighbor1, neighbor2 and central node std::vector diff_neighbor1; std::vector diff_neighbor2; std::vector diff_center; - //Identification of external node from the side of neighbor1 + // Identification of external node from the side of neighbor1 { - //Compute the intersection between neoghbourhood of neighbor1 node and neighbourhood of central node + // Compute the intersection between neoghbourhood of neighbor1 node and neighbourhood of central node std::set_intersection(composite_inds_vector.begin(), composite_inds_vector.end(), composite_inds_neighbor1_vector.begin(), composite_inds_neighbor1_vector.end(), std::back_inserter(composite_node_idx_neighbor1_extra)); - for( typename std::vector::iterator iter_node = composite_node_idx_neighbor1_extra.begin(); iter_node!=composite_node_idx_neighbor1_extra.end(); ++iter_node ) - { - checkerAllToRegion unaryPredicateExtra(*iter_node+1); - typename Array< std::tuple >::iterator region_iterator_extra; - region_iterator_extra = std::find_if >::iterator, checkerAllToRegion >(regionToAll.begin(), regionToAll.end(), unaryPredicateExtra); - - //Invalidation of node indices for nodes belonging to the current region - if( region_iterator_extra!=regionToAll.end() ) + for (typename std::vector::iterator iter_node = composite_node_idx_neighbor1_extra.begin(); iter_node != composite_node_idx_neighbor1_extra.end(); ++iter_node) { + checkerAllToRegion unaryPredicateExtra(*iter_node + 1); + typename Array >::iterator region_iterator_extra; + region_iterator_extra = std::find_if >::iterator, checkerAllToRegion >(regionToAll.begin(), regionToAll.end(), unaryPredicateExtra); + + // Invalidation of node indices for nodes belonging to the current region + if (region_iterator_extra != regionToAll.end()) *iter_node = -1; } - //Removal of invalidated indices associated with nodes belonging to current region: (external nodes do not belong to this region) - composite_node_idx_neighbor1_extra.erase(std::remove(composite_node_idx_neighbor1_extra.begin(), composite_node_idx_neighbor1_extra.end(), -1),composite_node_idx_neighbor1_extra.end()); + // Removal of invalidated indices associated with nodes belonging to current region: (external nodes do not belong to this region) + composite_node_idx_neighbor1_extra.erase(std::remove(composite_node_idx_neighbor1_extra.begin(), composite_node_idx_neighbor1_extra.end(), -1), composite_node_idx_neighbor1_extra.end()); - //External node from neighbor1 side does not belong to the neighborhood of neighbor2 + // External node from neighbor1 side does not belong to the neighborhood of neighbor2 std::set_difference(composite_node_idx_neighbor1_extra.begin(), composite_node_idx_neighbor1_extra.end(), composite_inds_neighbor2_vector.begin(), composite_inds_neighbor2_vector.end(), std::inserter(diff_neighbor1, diff_neighbor1.begin())); - TEUCHOS_TEST_FOR_EXCEPTION( diff_neighbor1.size()!=1 , Exceptions::RuntimeError, "Process ID: "<getRank()<<" - Region: "<getRank() << " - Region: " << region_idx << " - " + << "Mislabeling of nodes obstructed the identification of the extra node: region node " << region_node_idx << " leads to diff_neighbor1.size()= " << diff_neighbor1.size() << " \n"); } - //Identification of external node from the side of neighbor2 + // Identification of external node from the side of neighbor2 { - //Compute the intersection between neighbourhood of neighbor2 node and neighbourhood of central node + // Compute the intersection between neighbourhood of neighbor2 node and neighbourhood of central node std::set_intersection(composite_inds_vector.begin(), composite_inds_vector.end(), composite_inds_neighbor2_vector.begin(), composite_inds_neighbor2_vector.end(), std::back_inserter(composite_node_idx_neighbor2_extra)); - for( typename std::vector::iterator iter_node = composite_node_idx_neighbor2_extra.begin(); iter_node!=composite_node_idx_neighbor2_extra.end(); ++iter_node ) - { - checkerAllToRegion unaryPredicateExtra(*iter_node+1); - typename Array< std::tuple >::iterator region_iterator_extra; - region_iterator_extra = std::find_if >::iterator, checkerAllToRegion >(regionToAll.begin(), regionToAll.end(), unaryPredicateExtra); + for (typename std::vector::iterator iter_node = composite_node_idx_neighbor2_extra.begin(); iter_node != composite_node_idx_neighbor2_extra.end(); ++iter_node) { + checkerAllToRegion unaryPredicateExtra(*iter_node + 1); + typename Array >::iterator region_iterator_extra; + region_iterator_extra = std::find_if >::iterator, checkerAllToRegion >(regionToAll.begin(), regionToAll.end(), unaryPredicateExtra); - //Invalidation of node indices for nodes belonging to the current region - if( region_iterator_extra!=regionToAll.end() ) + // Invalidation of node indices for nodes belonging to the current region + if (region_iterator_extra != regionToAll.end()) *iter_node = -1; } - //Removal of invalidated indices associated with nodes belonging to current region: (external nodes do not belong to this region) - composite_node_idx_neighbor2_extra.erase(std::remove(composite_node_idx_neighbor2_extra.begin(), composite_node_idx_neighbor2_extra.end(), -1),composite_node_idx_neighbor2_extra.end()); + // Removal of invalidated indices associated with nodes belonging to current region: (external nodes do not belong to this region) + composite_node_idx_neighbor2_extra.erase(std::remove(composite_node_idx_neighbor2_extra.begin(), composite_node_idx_neighbor2_extra.end(), -1), composite_node_idx_neighbor2_extra.end()); - //External node from neighbor2 side does not belong to the neighborhood of neighbor1 + // External node from neighbor2 side does not belong to the neighborhood of neighbor1 std::set_difference(composite_node_idx_neighbor2_extra.begin(), composite_node_idx_neighbor2_extra.end(), composite_inds_neighbor1_vector.begin(), composite_inds_neighbor1_vector.end(), std::inserter(diff_neighbor2, diff_neighbor2.begin())); - TEUCHOS_TEST_FOR_EXCEPTION( diff_neighbor2.size()!=1 , Exceptions::RuntimeError, "Process ID: "<getRank()<<" - Region: "<getRank() << " - Region: " << region_idx << " - " + << "Mislabeling of nodes obstructed the identification of the extra node: region node " << region_node_idx << " leads to diff_neighbor2.size()= " << diff_neighbor2.size() << " \n"); } - //Identification of external node from the side of central node + // Identification of external node from the side of central node { - for( typename std::vector::iterator iter_node = composite_node_idx_extra.begin(); iter_node!=composite_node_idx_extra.end(); ++iter_node ) - { - checkerAllToRegion unaryPredicateExtra(*iter_node+1); - typename Array< std::tuple >::iterator region_iterator_extra; - region_iterator_extra = std::find_if >::iterator, checkerAllToRegion >(regionToAll.begin(), regionToAll.end(), unaryPredicateExtra); - - //Invalidation of node indices for nodes belonging to the current region - if( region_iterator_extra!=regionToAll.end() ) + for (typename std::vector::iterator iter_node = composite_node_idx_extra.begin(); iter_node != composite_node_idx_extra.end(); ++iter_node) { + checkerAllToRegion unaryPredicateExtra(*iter_node + 1); + typename Array >::iterator region_iterator_extra; + region_iterator_extra = std::find_if >::iterator, checkerAllToRegion >(regionToAll.begin(), regionToAll.end(), unaryPredicateExtra); + + // Invalidation of node indices for nodes belonging to the current region + if (region_iterator_extra != regionToAll.end()) *iter_node = -1; } - //Removal of invalidated indices associated with nodes belonging to current region: (external nodes do not belong to this region) - composite_node_idx_extra.erase(std::remove(composite_node_idx_extra.begin(), composite_node_idx_extra.end(), -1),composite_node_idx_extra.end()); + // Removal of invalidated indices associated with nodes belonging to current region: (external nodes do not belong to this region) + composite_node_idx_extra.erase(std::remove(composite_node_idx_extra.begin(), composite_node_idx_extra.end(), -1), composite_node_idx_extra.end()); std::vector diff_center_temp; - //At thie point composite_node_idx_extra contains indices of all the three external nodes: two of these must be removed since they are already tracked - //External nodes from neighbors1's and neighbor2's side must be removed + // At thie point composite_node_idx_extra contains indices of all the three external nodes: two of these must be removed since they are already tracked + // External nodes from neighbors1's and neighbor2's side must be removed std::set_difference(composite_node_idx_extra.begin(), composite_node_idx_extra.end(), diff_neighbor1.begin(), diff_neighbor1.end(), std::inserter(diff_center_temp, diff_center_temp.begin())); std::set_difference(diff_center_temp.begin(), diff_center_temp.end(), diff_neighbor2.begin(), diff_neighbor2.end(), std::inserter(diff_center, diff_center.begin())); - TEUCHOS_TEST_FOR_EXCEPTION( diff_center.size()!=1 , Exceptions::RuntimeError, "Process ID: "<getRank()<<" - Region: "<getRank() << " - Region: " << region_idx << " - " + << "Mislabeling of nodes obstructed the identification of the extra node: region node " << region_node_idx << " leads to diff_center.size()= " << diff_center.size() << " \n"); } - //Computation of local indices for central node and its neighbors - LocalOrdinal local_region_node_idx = region_matrix->getRowMap()->getLocalElement( region_node_idx ); - LocalOrdinal local_region_node_idx_neighbor1 = region_matrix->getRowMap()->getLocalElement( region_node_idx_neighbor1 ); - LocalOrdinal local_region_node_idx_neighbor2 = region_matrix->getRowMap()->getLocalElement( region_node_idx_neighbor2 ); + // Computation of local indices for central node and its neighbors + LocalOrdinal local_region_node_idx = region_matrix->getRowMap()->getLocalElement(region_node_idx); + LocalOrdinal local_region_node_idx_neighbor1 = region_matrix->getRowMap()->getLocalElement(region_node_idx_neighbor1); + LocalOrdinal local_region_node_idx_neighbor2 = region_matrix->getRowMap()->getLocalElement(region_node_idx_neighbor2); - //Computation of local indices for external nodes - LocalOrdinal local_extra_central = enlargedMatrix.getRowMap()->getLocalElement( diff_center[0] ); - LocalOrdinal local_extra_neighbor1 = enlargedMatrix.getRowMap()->getLocalElement( diff_neighbor1[0] ); - LocalOrdinal local_extra_neighbor2 = enlargedMatrix.getRowMap()->getLocalElement( diff_neighbor2[0] ); + // Computation of local indices for external nodes + LocalOrdinal local_extra_central = enlargedMatrix.getRowMap()->getLocalElement(diff_center[0]); + LocalOrdinal local_extra_neighbor1 = enlargedMatrix.getRowMap()->getLocalElement(diff_neighbor1[0]); + LocalOrdinal local_extra_neighbor2 = enlargedMatrix.getRowMap()->getLocalElement(diff_neighbor2[0]); ArrayView region_row; ArrayView region_col; ArrayView region_val; - //Extract Row view of the region matrix before collapsing - if( region_matrix -> isLocallyIndexed() ) - region_matrix -> getLocalRowView( local_region_node_idx, region_col, region_val ); + // Extract Row view of the region matrix before collapsing + if (region_matrix->isLocallyIndexed()) + region_matrix->getLocalRowView(local_region_node_idx, region_col, region_val); else - region_matrix -> getGlobalRowView( region_node_idx, region_col, region_val ); + region_matrix->getGlobalRowView(region_node_idx, region_col, region_val); - //Extract Row of overlapped composite matrix to detect node with information to collapse + // Extract Row of overlapped composite matrix to detect node with information to collapse ArrayView external_row; ArrayView external_col; ArrayView external_val; - enlargedMatrix.getLocalRowView( node_idx, external_col, external_val ); + enlargedMatrix.getLocalRowView(node_idx, external_col, external_val); - //neighbor1 collapse + // neighbor1 collapse { Scalar initial_value = 0; - for( typename ArrayView::iterator iter_view = region_col.begin(); iter_view!=region_col.end(); ++iter_view ) - { - if( region_matrix -> isLocallyIndexed() ) - if( *iter_view==local_region_node_idx_neighbor1 ) + for (typename ArrayView::iterator iter_view = region_col.begin(); iter_view != region_col.end(); ++iter_view) { + if (region_matrix->isLocallyIndexed()) + if (*iter_view == local_region_node_idx_neighbor1) initial_value = region_val[iter_view - region_col.begin()]; - if( region_matrix -> isGloballyIndexed() ) - if( *iter_view==region_node_idx_neighbor1 ) + if (region_matrix->isGloballyIndexed()) + if (*iter_view == region_node_idx_neighbor1) initial_value = region_val[iter_view - region_col.begin()]; - if(initial_value!=0) + if (initial_value != 0) break; } Scalar external_value = 0; - for( typename ArrayView::iterator iter_view = external_col.begin(); iter_view!=external_col.end(); ++iter_view ) - { - if( *iter_view==local_extra_neighbor1 ) + for (typename ArrayView::iterator iter_view = external_col.begin(); iter_view != external_col.end(); ++iter_view) { + if (*iter_view == local_extra_neighbor1) external_value = external_val[iter_view - external_col.begin()]; - if(external_value!=0) + if (external_value != 0) break; } - Scalar new_value = external_value;// new matrix entry generated with the collapsing + Scalar new_value = external_value; // new matrix entry generated with the collapsing std::vector new_entry_ind; std::vector new_entry_val; - new_entry_ind.push_back(region_node_idx_neighbor1-1); + new_entry_ind.push_back(region_node_idx_neighbor1 - 1); new_entry_val.push_back(new_value); - //If a nonzero value is already stored in the specified position, the new values is SUMMED to the already existing one - //See description of insertGlobalValues(...) - region_matrix -> insertGlobalValues( region_node_idx-1, new_entry_ind, new_entry_val ); + // If a nonzero value is already stored in the specified position, the new values is SUMMED to the already existing one + // See description of insertGlobalValues(...) + region_matrix->insertGlobalValues(region_node_idx - 1, new_entry_ind, new_entry_val); } - //neighbor2 collapse + // neighbor2 collapse { Scalar initial_value = 0; - for( typename ArrayView::iterator iter_view = region_col.begin(); iter_view!=region_col.end(); ++iter_view ) - { - if( region_matrix -> isLocallyIndexed() ) - if( *iter_view==local_region_node_idx_neighbor2 ) + for (typename ArrayView::iterator iter_view = region_col.begin(); iter_view != region_col.end(); ++iter_view) { + if (region_matrix->isLocallyIndexed()) + if (*iter_view == local_region_node_idx_neighbor2) initial_value = region_val[iter_view - region_col.begin()]; - if( region_matrix -> isGloballyIndexed() ) - if( *iter_view==region_node_idx_neighbor2 ) + if (region_matrix->isGloballyIndexed()) + if (*iter_view == region_node_idx_neighbor2) initial_value = region_val[iter_view - region_col.begin()]; - if(initial_value!=0) + if (initial_value != 0) break; } Scalar external_value = 0; - for( typename ArrayView::iterator iter_view = external_col.begin(); iter_view!=external_col.end(); ++iter_view ) - { - if( *iter_view==local_extra_neighbor2 ) + for (typename ArrayView::iterator iter_view = external_col.begin(); iter_view != external_col.end(); ++iter_view) { + if (*iter_view == local_extra_neighbor2) external_value = external_val[iter_view - external_col.begin()]; - if(external_value!=0) + if (external_value != 0) break; } - Scalar new_value = external_value;// new matrix entry generated with the collapsing + Scalar new_value = external_value; // new matrix entry generated with the collapsing std::vector new_entry_ind; std::vector new_entry_val; - new_entry_ind.push_back(region_node_idx_neighbor2-1); + new_entry_ind.push_back(region_node_idx_neighbor2 - 1); new_entry_val.push_back(new_value); - //If a nonzero value is already stored in the specified position, the new values is SUMMED to the already existing one - //See description of insertGlobalValues(...) - region_matrix -> insertGlobalValues( region_node_idx-1, new_entry_ind, new_entry_val ); + // If a nonzero value is already stored in the specified position, the new values is SUMMED to the already existing one + // See description of insertGlobalValues(...) + region_matrix->insertGlobalValues(region_node_idx - 1, new_entry_ind, new_entry_val); } - //central node collapse + // central node collapse { Scalar initial_value = 0; - for( typename ArrayView::iterator iter_view = region_col.begin(); iter_view!=region_col.end(); ++iter_view ) - { - if( region_matrix -> isLocallyIndexed() ) - if( *iter_view==local_region_node_idx ) + for (typename ArrayView::iterator iter_view = region_col.begin(); iter_view != region_col.end(); ++iter_view) { + if (region_matrix->isLocallyIndexed()) + if (*iter_view == local_region_node_idx) initial_value = region_val[iter_view - region_col.begin()]; - if( region_matrix -> isGloballyIndexed() ) - if( *iter_view==region_node_idx ) + if (region_matrix->isGloballyIndexed()) + if (*iter_view == region_node_idx) initial_value = region_val[iter_view - region_col.begin()]; - if(initial_value!=0) + if (initial_value != 0) break; } Scalar external_value = 0; - for( typename ArrayView::iterator iter_view = external_col.begin(); iter_view!=external_col.end(); ++iter_view ) - { - if( *iter_view==local_extra_central ) + for (typename ArrayView::iterator iter_view = external_col.begin(); iter_view != external_col.end(); ++iter_view) { + if (*iter_view == local_extra_central) external_value = external_val[iter_view - external_col.begin()]; - if(external_value!=0) + if (external_value != 0) break; } - Scalar new_value = external_value;// new matrix entry generated with the collapsing + Scalar new_value = external_value; // new matrix entry generated with the collapsing std::vector new_entry_ind; std::vector new_entry_val; - new_entry_ind.push_back(region_node_idx-1); + new_entry_ind.push_back(region_node_idx - 1); new_entry_val.push_back(new_value); - //If a nonzero value is already stored in the specified position, the new values is SUMMED to the already existing one - //See description of insertGlobalValues(...) - region_matrix -> insertGlobalValues( region_node_idx-1, new_entry_ind, new_entry_val ); + // If a nonzero value is already stored in the specified position, the new values is SUMMED to the already existing one + // See description of insertGlobalValues(...) + region_matrix->insertGlobalValues(region_node_idx - 1, new_entry_ind, new_entry_val); } } } - } } //@} - //! @name Creation of Region Splitting //@{ - void RegionSplitting(GlobalOrdinal region_idx, RCP& region_matrix, Ifpack2::OverlappingRowMatrix& enlargedMatrix) - { - TEUCHOS_TEST_FOR_EXCEPTION( !region_matrix_initialized_[region_idx], Exceptions::RuntimeError, "The composite stiffness matrix must be chopped into surrogate region matrices before collapsing \n"); - TEUCHOS_TEST_FOR_EXCEPTION( regionHandler_->GetNumRegionNodes(region_idx)!=regionMatrixData_[region_idx]->getGlobalNumRows(), Exceptions::RuntimeError, "Process ID: "<getRank()<<" - Number of region nodes in region "<getGlobalNumRows() \n"); + void RegionSplitting(GlobalOrdinal region_idx, RCP ®ion_matrix, Ifpack2::OverlappingRowMatrix &enlargedMatrix) { + TEUCHOS_TEST_FOR_EXCEPTION(!region_matrix_initialized_[region_idx], Exceptions::RuntimeError, "The composite stiffness matrix must be chopped into surrogate region matrices before collapsing \n"); + TEUCHOS_TEST_FOR_EXCEPTION(regionHandler_->GetNumRegionNodes(region_idx) != regionMatrixData_[region_idx]->getGlobalNumRows(), Exceptions::RuntimeError, "Process ID: " << comm_->getRank() << " - Number of region nodes in region " << region_idx + 1 << " does not coincide with the value returned by regionMatrixData_[" << region_idx + 1 << "]->getGlobalNumRows() \n"); - Array< std::tuple > regionToAll = regionHandler_->GetRegionToAll(region_idx); + Array > regionToAll = regionHandler_->GetRegionToAll(region_idx); - //This portion of the code assumes that the number of region nodes is the same on each direction of the domain - //Foir a 2D problem we have then nx = ny = sqrt( num_region_ndoes_ ) + // This portion of the code assumes that the number of region nodes is the same on each direction of the domain + // Foir a 2D problem we have then nx = ny = sqrt( num_region_ndoes_ ) GlobalOrdinal n; GlobalOrdinal nx; GlobalOrdinal ny; - n = regionHandler_->GetNumRegionNodes(region_idx); + n = regionHandler_->GetNumRegionNodes(region_idx); nx = std::sqrt(n); ny = nx; - TEUCHOS_TEST_FOR_EXCEPTION( static_cast( nx - std::floor(static_cast( std::sqrt(static_cast(n)) )))!=0.0 , Exceptions::RuntimeError, "The code assumes that the regions are 2D and that the number of region nodes is the same on each direction of the domain \n"); + TEUCHOS_TEST_FOR_EXCEPTION(static_cast(nx - std::floor(static_cast(std::sqrt(static_cast(n))))) != 0.0, Exceptions::RuntimeError, "The code assumes that the regions are 2D and that the number of region nodes is the same on each direction of the domain \n"); - //interfaceNodes contains nodes on an interface between any regions + // interfaceNodes contains nodes on an interface between any regions Array > > interfaceNodes = regionHandler_->GetInterfaceNodes(); - ArrayView MyRegionElements =region_matrix->getRowMap()->getLocalElementList(); - for( typename ArrayView::iterator iter = MyRegionElements.begin(); iter!=MyRegionElements.end(); ++iter ) - { - - //Nodes are saved in data structures with 1 as base index - GlobalOrdinal region_node_idx = *iter+1; + ArrayView MyRegionElements = region_matrix->getRowMap()->getLocalElementList(); + for (typename ArrayView::iterator iter = MyRegionElements.begin(); iter != MyRegionElements.end(); ++iter) { + // Nodes are saved in data structures with 1 as base index + GlobalOrdinal region_node_idx = *iter + 1; checkerRegionToAll unaryPredicate(region_node_idx); - typename Array< std::tuple >::iterator composite_iterator; - composite_iterator = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicate); - TEUCHOS_TEST_FOR_EXCEPTION( composite_iterator==regionToAll.end(), Exceptions::RuntimeError, "Process ID: "<getRank()<<" - Region: "< >::iterator composite_iterator; + composite_iterator = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicate); + TEUCHOS_TEST_FOR_EXCEPTION(composite_iterator == regionToAll.end(), Exceptions::RuntimeError, "Process ID: " << comm_->getRank() << " - Region: " << region_idx << " - " + << " node with region index: " << region_node_idx << " is not in regionToAll[" << region_idx << "]" + << "\n"); - GlobalOrdinal composite_node_idx = std::get<1>( *composite_iterator ); - checkerInterfaceNodes unaryPredicate2( composite_node_idx ); - typename Array< std::tuple > >::iterator interface_iterator; - interface_iterator = std::find_if > >::iterator, checkerInterfaceNodes >(interfaceNodes.begin(), interfaceNodes.end(), unaryPredicate2); + GlobalOrdinal composite_node_idx = std::get<1>(*composite_iterator); + checkerInterfaceNodes unaryPredicate2(composite_node_idx); + typename Array > >::iterator interface_iterator; + interface_iterator = std::find_if > >::iterator, checkerInterfaceNodes >(interfaceNodes.begin(), interfaceNodes.end(), unaryPredicate2); GlobalOrdinal region_node_idx_neighbor_w = 0; GlobalOrdinal region_node_idx_neighbor_e = 0; @@ -1161,287 +1105,252 @@ class MatrixSplitting : public Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node int count_neighbours = 0; - if( interface_iterator!=interfaceNodes.end() && region_node_idx>ny ) - { - region_node_idx_neighbor_w = region_node_idx-ny; + if (interface_iterator != interfaceNodes.end() && region_node_idx > ny) { + region_node_idx_neighbor_w = region_node_idx - ny; count_neighbours++; } - if( interface_iterator!=interfaceNodes.end() && region_node_idx<=(nx-1)*ny ) - { - region_node_idx_neighbor_e = region_node_idx+ny; + if (interface_iterator != interfaceNodes.end() && region_node_idx <= (nx - 1) * ny) { + region_node_idx_neighbor_e = region_node_idx + ny; count_neighbours++; } - if( interface_iterator!=interfaceNodes.end() && region_node_idx%ny!=1 ) - { - region_node_idx_neighbor_s = region_node_idx-1; + if (interface_iterator != interfaceNodes.end() && region_node_idx % ny != 1) { + region_node_idx_neighbor_s = region_node_idx - 1; count_neighbours++; } - if( interface_iterator!=interfaceNodes.end() && region_node_idx%ny!=0 ) - { - region_node_idx_neighbor_n= region_node_idx+1; + if (interface_iterator != interfaceNodes.end() && region_node_idx % ny != 0) { + region_node_idx_neighbor_n = region_node_idx + 1; count_neighbours++; } - bool interface_line = false; + bool interface_line = false; bool interface_corner = false; - if( 3==count_neighbours ) + if (3 == count_neighbours) interface_line = true; - else if( 2==count_neighbours ) + else if (2 == count_neighbours) interface_corner = true; - typename Array< std::tuple >::iterator composite_iterator_neighbor_e; - typename Array< std::tuple > >::iterator interface_iterator_neighbor_e; - typename Array< std::tuple >::iterator composite_iterator_neighbor_w; - typename Array< std::tuple > >::iterator interface_iterator_neighbor_w; - typename Array< std::tuple >::iterator composite_iterator_neighbor_s; - typename Array< std::tuple > >::iterator interface_iterator_neighbor_s; - typename Array< std::tuple >::iterator composite_iterator_neighbor_n; - typename Array< std::tuple > >::iterator interface_iterator_neighbor_n; - - if( interface_line || interface_corner ) - { - //Computation of composite index for East node - if( region_node_idx_neighbor_e!=0 ) - { + typename Array >::iterator composite_iterator_neighbor_e; + typename Array > >::iterator interface_iterator_neighbor_e; + typename Array >::iterator composite_iterator_neighbor_w; + typename Array > >::iterator interface_iterator_neighbor_w; + typename Array >::iterator composite_iterator_neighbor_s; + typename Array > >::iterator interface_iterator_neighbor_s; + typename Array >::iterator composite_iterator_neighbor_n; + typename Array > >::iterator interface_iterator_neighbor_n; + + if (interface_line || interface_corner) { + // Computation of composite index for East node + if (region_node_idx_neighbor_e != 0) { checkerRegionToAll unaryPredicateEast(region_node_idx_neighbor_e); - composite_iterator_neighbor_e = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicateEast); + composite_iterator_neighbor_e = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicateEast); - //Check to see if neighbor_e node lies on a coarse line - GlobalOrdinal composite_node_idx_neighbor_e = std::get<1>( *composite_iterator_neighbor_e ); - checkerInterfaceNodes unaryPredicate2neighborEast( composite_node_idx_neighbor_e ); - interface_iterator_neighbor_e = std::find_if > >::iterator, checkerInterfaceNodes >(interfaceNodes.begin(), interfaceNodes.end(), unaryPredicate2neighborEast); - } - else + // Check to see if neighbor_e node lies on a coarse line + GlobalOrdinal composite_node_idx_neighbor_e = std::get<1>(*composite_iterator_neighbor_e); + checkerInterfaceNodes unaryPredicate2neighborEast(composite_node_idx_neighbor_e); + interface_iterator_neighbor_e = std::find_if > >::iterator, checkerInterfaceNodes >(interfaceNodes.begin(), interfaceNodes.end(), unaryPredicate2neighborEast); + } else interface_iterator_neighbor_e = interfaceNodes.end(); - //Computation of composite index for West node - if( region_node_idx_neighbor_w!=0 ) - { + // Computation of composite index for West node + if (region_node_idx_neighbor_w != 0) { checkerRegionToAll unaryPredicateWest(region_node_idx_neighbor_w); - composite_iterator_neighbor_w = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicateWest); + composite_iterator_neighbor_w = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicateWest); - //Check to see if neighbor_w node lies on a coarse line - GlobalOrdinal composite_node_idx_neighbor_w = std::get<1>( *composite_iterator_neighbor_w ); - checkerInterfaceNodes unaryPredicate2neighborWest( composite_node_idx_neighbor_w ); - interface_iterator_neighbor_w = std::find_if > >::iterator, checkerInterfaceNodes >(interfaceNodes.begin(), interfaceNodes.end(), unaryPredicate2neighborWest); - } - else + // Check to see if neighbor_w node lies on a coarse line + GlobalOrdinal composite_node_idx_neighbor_w = std::get<1>(*composite_iterator_neighbor_w); + checkerInterfaceNodes unaryPredicate2neighborWest(composite_node_idx_neighbor_w); + interface_iterator_neighbor_w = std::find_if > >::iterator, checkerInterfaceNodes >(interfaceNodes.begin(), interfaceNodes.end(), unaryPredicate2neighborWest); + } else interface_iterator_neighbor_w = interfaceNodes.end(); - //Computation of composite index for South node - if( region_node_idx_neighbor_s!=0 ) - { + // Computation of composite index for South node + if (region_node_idx_neighbor_s != 0) { checkerRegionToAll unaryPredicateSouth(region_node_idx_neighbor_s); - composite_iterator_neighbor_s = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicateSouth); + composite_iterator_neighbor_s = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicateSouth); - //Check to see if neighbor_s node lies on a coarse line - GlobalOrdinal composite_node_idx_neighbor_s = std::get<1>( *composite_iterator_neighbor_s ); - checkerInterfaceNodes unaryPredicate2neighborSouth( composite_node_idx_neighbor_s ); - interface_iterator_neighbor_s = std::find_if > >::iterator, checkerInterfaceNodes >(interfaceNodes.begin(), interfaceNodes.end(), unaryPredicate2neighborSouth); - } - else + // Check to see if neighbor_s node lies on a coarse line + GlobalOrdinal composite_node_idx_neighbor_s = std::get<1>(*composite_iterator_neighbor_s); + checkerInterfaceNodes unaryPredicate2neighborSouth(composite_node_idx_neighbor_s); + interface_iterator_neighbor_s = std::find_if > >::iterator, checkerInterfaceNodes >(interfaceNodes.begin(), interfaceNodes.end(), unaryPredicate2neighborSouth); + } else interface_iterator_neighbor_s = interfaceNodes.end(); - //Computation of composite index for North node - if( region_node_idx_neighbor_n!=0 ) - { + // Computation of composite index for North node + if (region_node_idx_neighbor_n != 0) { checkerRegionToAll unaryPredicateNorth(region_node_idx_neighbor_n); - composite_iterator_neighbor_n = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicateNorth); + composite_iterator_neighbor_n = std::find_if >::iterator, checkerRegionToAll >(regionToAll.begin(), regionToAll.end(), unaryPredicateNorth); - //Check to see if neighbor_n node lies on a coarse line - GlobalOrdinal composite_node_idx_neighbor_n = std::get<1>( *composite_iterator_neighbor_n ); - checkerInterfaceNodes unaryPredicate2neighborNorth( composite_node_idx_neighbor_n ); - interface_iterator_neighbor_n = std::find_if > >::iterator, checkerInterfaceNodes >(interfaceNodes.begin(), interfaceNodes.end(), unaryPredicate2neighborNorth); - } - else + // Check to see if neighbor_n node lies on a coarse line + GlobalOrdinal composite_node_idx_neighbor_n = std::get<1>(*composite_iterator_neighbor_n); + checkerInterfaceNodes unaryPredicate2neighborNorth(composite_node_idx_neighbor_n); + interface_iterator_neighbor_n = std::find_if > >::iterator, checkerInterfaceNodes >(interfaceNodes.begin(), interfaceNodes.end(), unaryPredicate2neighborNorth); + } else interface_iterator_neighbor_n = interfaceNodes.end(); int count_neighbours_interface = 0; - if( interface_iterator_neighbor_e!=interfaceNodes.end() ) + if (interface_iterator_neighbor_e != interfaceNodes.end()) count_neighbours_interface++; - if( interface_iterator_neighbor_w!=interfaceNodes.end() ) + if (interface_iterator_neighbor_w != interfaceNodes.end()) count_neighbours_interface++; - if( interface_iterator_neighbor_s!=interfaceNodes.end() ) + if (interface_iterator_neighbor_s != interfaceNodes.end()) count_neighbours_interface++; - if( interface_iterator_neighbor_n!=interfaceNodes.end() ) + if (interface_iterator_neighbor_n != interfaceNodes.end()) count_neighbours_interface++; - TEUCHOS_TEST_FOR_EXCEPTION( count_neighbours_interface>count_neighbours, Exceptions::RuntimeError, "Process ID: "<getRank()<<" - Region: "< count_neighbours, Exceptions::RuntimeError, "Process ID: " << comm_->getRank() << " - Region: " << region_idx << " - " + << " node with region index: " << region_node_idx << " has inconsistent information on the number of neighbours: count_neighbours = " << count_neighbours << "but count_neighbours_interface =" << count_neighbours_interface << " \n"); - //First the splitting is applied on extradiagonal entries + // First the splitting is applied on extradiagonal entries - //Computation of local indices for central node and its neighbors - //Node index base start from 1 in the structures used, but Trilinos maps start from 0, so - //indices must be shifted by 1 - LocalOrdinal local_region_node_idx = region_matrix->getRowMap()->getLocalElement( region_node_idx-1 ); - LocalOrdinal local_region_node_idx_neighbor_e = region_matrix->getRowMap()->getLocalElement( region_node_idx_neighbor_e-1 ); - LocalOrdinal local_region_node_idx_neighbor_w = region_matrix->getRowMap()->getLocalElement( region_node_idx_neighbor_w-1 ); - LocalOrdinal local_region_node_idx_neighbor_s = region_matrix->getRowMap()->getLocalElement( region_node_idx_neighbor_s-1 ); - LocalOrdinal local_region_node_idx_neighbor_n = region_matrix->getRowMap()->getLocalElement( region_node_idx_neighbor_n-1 ); + // Computation of local indices for central node and its neighbors + // Node index base start from 1 in the structures used, but Trilinos maps start from 0, so + // indices must be shifted by 1 + LocalOrdinal local_region_node_idx = region_matrix->getRowMap()->getLocalElement(region_node_idx - 1); + LocalOrdinal local_region_node_idx_neighbor_e = region_matrix->getRowMap()->getLocalElement(region_node_idx_neighbor_e - 1); + LocalOrdinal local_region_node_idx_neighbor_w = region_matrix->getRowMap()->getLocalElement(region_node_idx_neighbor_w - 1); + LocalOrdinal local_region_node_idx_neighbor_s = region_matrix->getRowMap()->getLocalElement(region_node_idx_neighbor_s - 1); + LocalOrdinal local_region_node_idx_neighbor_n = region_matrix->getRowMap()->getLocalElement(region_node_idx_neighbor_n - 1); ArrayView region_col; ArrayView region_val; - //Extract Row view of the region matrix - if( region_matrix -> isLocallyIndexed() ) - region_matrix -> getLocalRowView( local_region_node_idx, region_col, region_val ); + // Extract Row view of the region matrix + if (region_matrix->isLocallyIndexed()) + region_matrix->getLocalRowView(local_region_node_idx, region_col, region_val); else - region_matrix -> getGlobalRowView( *iter, region_col, region_val ); + region_matrix->getGlobalRowView(*iter, region_col, region_val); std::vector region_col_vector = createVector(region_col); std::vector ind_vector(0); std::vector val_vector(0); - //Extraction of the info about East neighbour to halve the associated entry in the matrix - if( interface_iterator_neighbor_e!=interfaceNodes.end() ) - { + // Extraction of the info about East neighbour to halve the associated entry in the matrix + if (interface_iterator_neighbor_e != interfaceNodes.end()) { typename std::vector::iterator iter_east_vector; GlobalOrdinal east_ind; - if( region_matrix -> isLocallyIndexed() ) - { - iter_east_vector = std::find( region_col_vector.begin(), region_col_vector.end(), local_region_node_idx_neighbor_e ); - east_ind = region_matrix->getRowMap()->getGlobalElement( *iter_east_vector ); + if (region_matrix->isLocallyIndexed()) { + iter_east_vector = std::find(region_col_vector.begin(), region_col_vector.end(), local_region_node_idx_neighbor_e); + east_ind = region_matrix->getRowMap()->getGlobalElement(*iter_east_vector); + } else { + iter_east_vector = std::find(region_col_vector.begin(), region_col_vector.end(), region_node_idx_neighbor_e - 1); + east_ind = *iter_east_vector; } - else - { - iter_east_vector = std::find( region_col_vector.begin(), region_col_vector.end(), region_node_idx_neighbor_e-1 ); - east_ind = *iter_east_vector; - } - Scalar east_val = - 0.5 * region_val[iter_east_vector-region_col_vector.begin()]; - ind_vector.push_back( east_ind ); - val_vector.push_back( east_val ); + Scalar east_val = -0.5 * region_val[iter_east_vector - region_col_vector.begin()]; + ind_vector.push_back(east_ind); + val_vector.push_back(east_val); } - //Extraction of the info about West neighbour to halve the associated entry in the matrix - if( interface_iterator_neighbor_w!=interfaceNodes.end() ) - { + // Extraction of the info about West neighbour to halve the associated entry in the matrix + if (interface_iterator_neighbor_w != interfaceNodes.end()) { typename std::vector::iterator iter_west_vector; GlobalOrdinal west_ind; - if( region_matrix -> isLocallyIndexed() ) - { - iter_west_vector = std::find( region_col_vector.begin(), region_col_vector.end(), local_region_node_idx_neighbor_w ); - west_ind = region_matrix->getRowMap()->getGlobalElement( *iter_west_vector ); + if (region_matrix->isLocallyIndexed()) { + iter_west_vector = std::find(region_col_vector.begin(), region_col_vector.end(), local_region_node_idx_neighbor_w); + west_ind = region_matrix->getRowMap()->getGlobalElement(*iter_west_vector); + } else { + iter_west_vector = std::find(region_col_vector.begin(), region_col_vector.end(), region_node_idx_neighbor_w - 1); + west_ind = *iter_west_vector; } - else - { - iter_west_vector = std::find( region_col_vector.begin(), region_col_vector.end(), region_node_idx_neighbor_w-1 ); - west_ind = *iter_west_vector; - } - Scalar west_val = - 0.5 * region_val[iter_west_vector-region_col_vector.begin()]; - ind_vector.push_back( west_ind ); - val_vector.push_back( west_val ); + Scalar west_val = -0.5 * region_val[iter_west_vector - region_col_vector.begin()]; + ind_vector.push_back(west_ind); + val_vector.push_back(west_val); } - //Extraction of the info about South neighbour to halve the associated entry in the matrix - if( interface_iterator_neighbor_s!=interfaceNodes.end() ) - { + // Extraction of the info about South neighbour to halve the associated entry in the matrix + if (interface_iterator_neighbor_s != interfaceNodes.end()) { typename std::vector::iterator iter_south_vector; GlobalOrdinal south_ind; - if( region_matrix -> isLocallyIndexed() ) - { - iter_south_vector = std::find( region_col_vector.begin(), region_col_vector.end(), local_region_node_idx_neighbor_s ); - south_ind = region_matrix->getRowMap()->getGlobalElement( *iter_south_vector ); + if (region_matrix->isLocallyIndexed()) { + iter_south_vector = std::find(region_col_vector.begin(), region_col_vector.end(), local_region_node_idx_neighbor_s); + south_ind = region_matrix->getRowMap()->getGlobalElement(*iter_south_vector); + } else { + iter_south_vector = std::find(region_col_vector.begin(), region_col_vector.end(), region_node_idx_neighbor_s - 1); + south_ind = *iter_south_vector; } - else - { - iter_south_vector = std::find( region_col_vector.begin(), region_col_vector.end(), region_node_idx_neighbor_s-1 ); - south_ind = *iter_south_vector; - } - Scalar south_val = - 0.5 * region_val[iter_south_vector-region_col_vector.begin()]; - ind_vector.push_back( south_ind ); - val_vector.push_back( south_val ); + Scalar south_val = -0.5 * region_val[iter_south_vector - region_col_vector.begin()]; + ind_vector.push_back(south_ind); + val_vector.push_back(south_val); } - //Extraction of the info about North neighbour to halve the associated entry in the matrix - if( interface_iterator_neighbor_n!=interfaceNodes.end() ) - { + // Extraction of the info about North neighbour to halve the associated entry in the matrix + if (interface_iterator_neighbor_n != interfaceNodes.end()) { typename std::vector::iterator iter_north_vector; GlobalOrdinal north_ind; - if( region_matrix -> isLocallyIndexed() ) - { - iter_north_vector = std::find( region_col_vector.begin(), region_col_vector.end(), local_region_node_idx_neighbor_n ); - north_ind = region_matrix->getRowMap()->getGlobalElement( *iter_north_vector ); + if (region_matrix->isLocallyIndexed()) { + iter_north_vector = std::find(region_col_vector.begin(), region_col_vector.end(), local_region_node_idx_neighbor_n); + north_ind = region_matrix->getRowMap()->getGlobalElement(*iter_north_vector); + } else { + iter_north_vector = std::find(region_col_vector.begin(), region_col_vector.end(), region_node_idx_neighbor_n - 1); + north_ind = *iter_north_vector; } - else - { - iter_north_vector = std::find( region_col_vector.begin(), region_col_vector.end(), region_node_idx_neighbor_n-1 ); - north_ind = *iter_north_vector; - } - Scalar north_val = - 0.5 * region_val[iter_north_vector-region_col_vector.begin()]; - ind_vector.push_back( north_ind ); - val_vector.push_back( north_val ); + Scalar north_val = -0.5 * region_val[iter_north_vector - region_col_vector.begin()]; + ind_vector.push_back(north_ind); + val_vector.push_back(north_val); } - //Extraction of the info about my Node ID to split the associated entry in the matrix - //The ratio used for the splitting depends on the num ber of regions this current node - //belongs to + // Extraction of the info about my Node ID to split the associated entry in the matrix + // The ratio used for the splitting depends on the num ber of regions this current node + // belongs to typename std::vector::iterator iter_center_vector; GlobalOrdinal center_ind; - if( region_matrix -> isLocallyIndexed() ) - { - iter_center_vector = std::find( region_col_vector.begin(), region_col_vector.end(), local_region_node_idx ); - center_ind = region_matrix->getRowMap()->getGlobalElement( *iter_center_vector ); - } - else - { - iter_center_vector = std::find( region_col_vector.begin(), region_col_vector.end(), region_node_idx-1 ); - center_ind = *iter_center_vector; + if (region_matrix->isLocallyIndexed()) { + iter_center_vector = std::find(region_col_vector.begin(), region_col_vector.end(), local_region_node_idx); + center_ind = region_matrix->getRowMap()->getGlobalElement(*iter_center_vector); + } else { + iter_center_vector = std::find(region_col_vector.begin(), region_col_vector.end(), region_node_idx - 1); + center_ind = *iter_center_vector; } - //Count of the nubmer of regions the current node belogns to + // Count of the nubmer of regions the current node belogns to GlobalOrdinal region_belonging = std::get<1>(*interface_iterator).size(); - TEUCHOS_TEST_FOR_EXCEPTION( region_belonging<2, Exceptions::RuntimeError, "Process ID: "<getRank()<<" - Region: "<(*interface_iterator)<<" should lie on an interface between regions but the nubmer of regions it belongs to is only "<getRank() << " - Region: " << region_idx << " - " + << " node with composite index: " << std::get<0>(*interface_iterator) << " should lie on an interface between regions but the nubmer of regions it belongs to is only " << region_belonging << "\n"); Scalar center_val; - //If a node is on a corner between four itnerfaces, then each the entry A(node_idx,node_idx) must be split in four parts - //otherwise the entry must be divided by two, similarly to what done for the neighbours + // If a node is on a corner between four itnerfaces, then each the entry A(node_idx,node_idx) must be split in four parts + // otherwise the entry must be divided by two, similarly to what done for the neighbours - center_val = - (1 - static_cast(1/static_cast(region_belonging))) * region_val[iter_center_vector-region_col_vector.begin()]; - ind_vector.push_back( center_ind ); - val_vector.push_back( center_val ); - - region_matrix -> insertGlobalValues( region_node_idx-1, ind_vector, val_vector ); + center_val = -(1 - static_cast(1 / static_cast(region_belonging))) * region_val[iter_center_vector - region_col_vector.begin()]; + ind_vector.push_back(center_ind); + val_vector.push_back(center_val); + region_matrix->insertGlobalValues(region_node_idx - 1, ind_vector, val_vector); } } } //@} - //! @name Creation of Region matrices //@{ - void CreateRegionMatrices( Array > region_maps){ + void CreateRegionMatrices(Array > region_maps) { + TEUCHOS_TEST_FOR_EXCEPTION(num_total_regions_ != region_maps.size(), Exceptions::RuntimeError, "Number of regions does not match with the size of region_maps structure \n"); - TEUCHOS_TEST_FOR_EXCEPTION( num_total_regions_!=region_maps.size(), Exceptions::RuntimeError, "Number of regions does not match with the size of region_maps structure \n"); + regionMatrixData_.clear(); - regionMatrixData_.clear( ); - - for( int i = 0; i > xpetraMap; - xpetraMap = Xpetra::MapFactory::Build(lib, regionHandler_->GetNumRegionNodes(i), region_maps[i], 0, comm_); + for (int i = 0; i < num_total_regions_; ++i) { + // Create Xpetra map for region stiffness matrix + RCP > xpetraMap; + xpetraMap = Xpetra::MapFactory::Build(lib, regionHandler_->GetNumRegionNodes(i), region_maps[i], 0, comm_); int num_elements = xpetraMap->getGlobalNumElements(); RCP crs_matrix; - if( Xpetra::UseEpetra==lib ) - crs_matrix = rcp( new EpetraCrsMatrix(xpetraMap, num_elements) ); - else if( Xpetra::UseTpetra==lib ) - crs_matrix = rcp( new TpetraCrsMatrix( xpetraMap, num_elements ) ); + if (Xpetra::UseEpetra == lib) + crs_matrix = rcp(new EpetraCrsMatrix(xpetraMap, num_elements)); + else if (Xpetra::UseTpetra == lib) + crs_matrix = rcp(new TpetraCrsMatrix(xpetraMap, num_elements)); else - std::cerr<<" The library to build matrices must be either Epetra or Tpetra \n"; + std::cerr << " The library to build matrices must be either Epetra or Tpetra \n"; RCP matrixPointer = rcp(new CrsMatrixWrap(crs_matrix)); - regionMatrixData_.push_back( matrixPointer ); + regionMatrixData_.push_back(matrixPointer); } - for( GlobalOrdinal i = 0; i > regionMatrixData_; GlobalOrdinal num_total_elements_ = 0; - GlobalOrdinal num_total_regions_ = 0; + GlobalOrdinal num_total_regions_ = 0; //@} -}; //class MatrixSplitting +}; // class MatrixSplitting -} //namespace Xpetra +} // namespace Xpetra -#endif //XPETRA_MATRIXSPLITTING_HPP +#endif // XPETRA_MATRIXSPLITTING_HPP diff --git a/packages/muelu/research/max/XpetraSplitting/Xpetra_RegionAMG_decl.hpp b/packages/muelu/research/max/XpetraSplitting/Xpetra_RegionAMG_decl.hpp index 555feb9e754e..4b5a526a58ff 100644 --- a/packages/muelu/research/max/XpetraSplitting/Xpetra_RegionAMG_decl.hpp +++ b/packages/muelu/research/max/XpetraSplitting/Xpetra_RegionAMG_decl.hpp @@ -53,19 +53,18 @@ #ifndef XPETRA_REGIONAMG_DECL_HPP #define XPETRA_REGIONAMG_DECL_HPP -//Xpetra +// Xpetra #include #include "Xpetra_MatrixSplitting.hpp" #include "Xpetra_Level_def.hpp" -//MueLu +// MueLu #include #include #include #include - -namespace Xpetra{ +namespace Xpetra { /*! * class RegionAMG @@ -80,121 +79,116 @@ namespace Xpetra{ * we explains here below the rationale behind the approach adopted in this class. * */ -template::scalar_type, - class LocalOrdinal = typename MultiVector::local_ordinal_type, - class GlobalOrdinal = typename MultiVector::global_ordinal_type, - class Node = typename MultiVector::node_type> +template ::scalar_type, + class LocalOrdinal = typename MultiVector::local_ordinal_type, + class GlobalOrdinal = typename MultiVector::global_ordinal_type, + class Node = typename MultiVector::node_type> class RegionAMG : Operator { - typedef Map map_type; typedef MultiVector multivector_type; - typedef Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > matrix_type; - typedef MatrixSplitting tpetra_splitting; - typedef MueLu::Hierarchy Hierarchy; + typedef Matrix matrix_type; + typedef MatrixSplitting tpetra_splitting; + typedef MueLu::Hierarchy Hierarchy; typedef MultiVectorFactory mv_factory_type; - typedef Level level; - -public: + typedef Level level; + public: //! Constructors //@{ - RegionAMG() -{ - std::cout<<"This version of constructor is not implemented yet"< matrixSplitting, - Teuchos::RCP > regionHandler, - RCP > comm, Teuchos::ParameterList muelu, - GlobalOrdinal num_levels, GlobalOrdinal coarsening_factor); + Teuchos::RCP > regionHandler, + RCP > comm, Teuchos::ParameterList muelu, + GlobalOrdinal num_levels, GlobalOrdinal coarsening_factor); //@} - GlobalOrdinal GetNumLevels(){return num_levels_;} + GlobalOrdinal GetNumLevels() { return num_levels_; } //! Methods to extract Map information //@{ //! For now, the domain Map coincides with the Domain Map of the composite matrix at the fine level - virtual RCP getDomainMap()const{return domainMap_;} + virtual RCP getDomainMap() const { return domainMap_; } //! For now, the domain Map coincides with the Range Map of the composite matrix at the fine level - virtual RCP getRangeMap()const{return rangeMap_;} + virtual RCP getRangeMap() const { return rangeMap_; } //@} //! Apply method //@{ - //!N.B.: The implementation still has to be finished + //! N.B.: The implementation still has to be finished virtual void - apply (const multivector_type& X, multivector_type& Y, - Teuchos::ETransp mode = Teuchos::NO_TRANS, - Scalar alpha = Teuchos::ScalarTraits::one(), - Scalar beta = Teuchos::ScalarTraits::zero())const; + apply(const multivector_type& X, multivector_type& Y, + Teuchos::ETransp mode = Teuchos::NO_TRANS, + Scalar alpha = Teuchos::ScalarTraits::one(), + Scalar beta = Teuchos::ScalarTraits::zero()) const; //@} //! hasTransposeApply should not be needed virtual bool hasTransposeApply() const { return false; } -private: - + private: //! Private variables //@{ - //Total number of levels in the hierarchy + // Total number of levels in the hierarchy GlobalOrdinal num_levels_ = -1; - //Total number of regions + // Total number of regions GlobalOrdinal num_regions_ = -1; - //Coarsening factor to transfer quantities across levels + // Coarsening factor to transfer quantities across levels int coarsening_factor_ = -1; RCP > comm_; Teuchos::ParameterList muelu_; - RCP domainMap_; - RCP rangeMap_; + RCP domainMap_; + RCP rangeMap_; - //matrixSplitting associated with the composite matrix at the fine level + // matrixSplitting associated with the composite matrix at the fine level RCP matrixSplitting_; - //Array of MueLu hierarchies (one for each region) + // Array of MueLu hierarchies (one for each region) Array > regionHierarchies_; - //Array of levels in the new hierarchy (each levels contains quantities associaed with every region for that level) + // Array of levels in the new hierarchy (each levels contains quantities associaed with every region for that level) Array > levels_; //@} //@Private Methods // - //This methods construct the Hierarchy using the quantities stored in the MueLu::Hierarchy objects - //There are as many MueLu::Hierarchy objects as the number of geometric regions partitioning the domain + // This methods construct the Hierarchy using the quantities stored in the MueLu::Hierarchy objects + // There are as many MueLu::Hierarchy objects as the number of geometric regions partitioning the domain void SetUpHierarchy(); - //This method is called in SetUpHierarchy and it is in chanrge of extracting quantities from MueLu::Hierarchy objects - //and store them inside Xpetra::Level objects + // This method is called in SetUpHierarchy and it is in chanrge of extracting quantities from MueLu::Hierarchy objects + // and store them inside Xpetra::Level objects void DefineLevels(); - //Method to create region input multivectors from the composite input multivector - void computeRegionX(const multivector_type& , Array > )const; + // Method to create region input multivectors from the composite input multivector + void computeRegionX(const multivector_type&, Array >) const; - //Method to create composite output multivector from region output multivectors - void computeCompositeY(Array >, multivector_type& )const; + // Method to create composite output multivector from region output multivectors + void computeCompositeY(Array >, multivector_type&) const; - //This method detects entries in region output mutlivectors associated with mesh nodes that lie on interregion interfaces - //and it scales them. The scaling factor is equal to the number of regions that the given degrees of freedom are shared by - void rescaleInterfaceEntries( Array > )const; + // This method detects entries in region output mutlivectors associated with mesh nodes that lie on interregion interfaces + // and it scales them. The scaling factor is equal to the number of regions that the given degrees of freedom are shared by + void rescaleInterfaceEntries(Array >) const; - virtual void regionToAllCoarsen(const level&, level& ); + virtual void regionToAllCoarsen(const level&, level&); //@} - }; -} //namespace Xpetra +} // namespace Xpetra #endif diff --git a/packages/muelu/research/max/XpetraSplitting/Xpetra_RegionAMG_def.hpp b/packages/muelu/research/max/XpetraSplitting/Xpetra_RegionAMG_def.hpp index 022b51e32b76..c10e467ec0c4 100644 --- a/packages/muelu/research/max/XpetraSplitting/Xpetra_RegionAMG_def.hpp +++ b/packages/muelu/research/max/XpetraSplitting/Xpetra_RegionAMG_def.hpp @@ -54,41 +54,41 @@ #include "Xpetra_RegionAMG_decl.hpp" -namespace Xpetra{ +namespace Xpetra { -template +template RegionAMG::RegionAMG( Teuchos::RCP matrixSplitting, Teuchos::RCP > regionHandler, RCP > comm, Teuchos::ParameterList muelu, - GlobalOrdinal num_levels, GlobalOrdinal coarsening_factor) : - num_levels_(num_levels), coarsening_factor_(coarsening_factor), comm_(comm), muelu_( - muelu) -{ - + GlobalOrdinal num_levels, GlobalOrdinal coarsening_factor) + : num_levels_(num_levels) + , coarsening_factor_(coarsening_factor) + , comm_(comm) + , muelu_( + muelu) { TEUCHOS_TEST_FOR_EXCEPT(matrixSplitting.is_null()); matrixSplitting_ = matrixSplitting; - //The maps defined here below are used to interface with outer applications (e.g. Belos-type solvers) - //and guarantee that DomainMap and RangeMap partition input multivectors and output multivectors in a proper way. - //the maps stored in doaminMap and rangeMap coincide with the maps of the composite matrix - domainMap_ = matrixSplitting_->getDomainMap(); - rangeMap_ = matrixSplitting_->getRangeMap(); + // The maps defined here below are used to interface with outer applications (e.g. Belos-type solvers) + // and guarantee that DomainMap and RangeMap partition input multivectors and output multivectors in a proper way. + // the maps stored in doaminMap and rangeMap coincide with the maps of the composite matrix + domainMap_ = matrixSplitting_->getDomainMap(); + rangeMap_ = matrixSplitting_->getRangeMap(); num_regions_ = matrixSplitting_->getNumRegions(); SetUpHierarchy(); } -template -void RegionAMG::SetUpHierarchy() -{ - TEUCHOS_TEST_FOR_EXCEPTION( num_levels_<=0, Exceptions::RuntimeError, "Number of levels must be a positive integer number \n" ); - TEUCHOS_TEST_FOR_EXCEPTION( num_regions_<=0 , Exceptions::RuntimeError, "No existing regions \n"); +template +void RegionAMG::SetUpHierarchy() { + TEUCHOS_TEST_FOR_EXCEPTION(num_levels_ <= 0, Exceptions::RuntimeError, "Number of levels must be a positive integer number \n"); + TEUCHOS_TEST_FOR_EXCEPTION(num_regions_ <= 0, Exceptions::RuntimeError, "No existing regions \n"); - //Creation of the MueLu list for the region multigrid preconditioner - //It would be ideal to have this parameter list read by an .xml file - //at the final stage of the implementation. - //As for now, we hard code it insice of SetUpHierarchy because we want standard setups for almost everything + // Creation of the MueLu list for the region multigrid preconditioner + // It would be ideal to have this parameter list read by an .xml file + // at the final stage of the implementation. + // As for now, we hard code it insice of SetUpHierarchy because we want standard setups for almost everything RCP list = rcp(new Teuchos::ParameterList()); list->setName("MueLu"); @@ -101,29 +101,27 @@ void RegionAMG::SetUpHierarchy() list->set("coarse: max size", 16); GlobalOrdinal num_regions = matrixSplitting_->getNumRegions(); - regionHierarchies_.resize( num_regions ); - - //Instantiation of MueLu Hierarchies for each region of the domain - for( GlobalOrdinal region_idx = 0; region_idxgetRegionHandler()->GetNumRegionNodes(region_idx); // N.B. The following instructions is based on the assumption that each region is a square. If not, then another instruciton // must be used to specify the size of the region mesh along teh x-direction GlobalOrdinal nx = std::sqrt(n); - RCP map = matrixSplitting_->getRegionMatrix(region_idx)->getRowMap(); - size_t NumMyElements = map->getLocalNumElements(); + RCP map = matrixSplitting_->getRegionMatrix(region_idx)->getRowMap(); + size_t NumMyElements = map->getLocalNumElements(); Teuchos::ArrayView MyGlobalElements = map->getLocalElementList(); - RCP coords = mv_factory_type::Build (map, 2); + RCP coords = mv_factory_type::Build(map, 2); - //Coordinates are created to pass them as additional input the MueLu::Hierarchy constructors - //The only thing we care aboput is to create a unique mapping between coordinates and degrees of freedom associated with mesh nodes - //Therefore we base the creation of coordinates upon a fake region mesh, which does not coincide with its real gemetric shape - //The fake region we use for coordinates is a [0,(number_nodes_x-1)]X[0,(number_nodes_y-1)] rectangle + // Coordinates are created to pass them as additional input the MueLu::Hierarchy constructors + // The only thing we care aboput is to create a unique mapping between coordinates and degrees of freedom associated with mesh nodes + // Therefore we base the creation of coordinates upon a fake region mesh, which does not coincide with its real gemetric shape + // The fake region we use for coordinates is a [0,(number_nodes_x-1)]X[0,(number_nodes_y-1)] rectangle Teuchos::ArrayRCP > Coord(2); Coord[0] = coords->getDataNonConst(0); Coord[1] = coords->getDataNonConst(1); @@ -136,26 +134,23 @@ void RegionAMG::SetUpHierarchy() Coord[1][i] = Teuchos::as(iy); } - //We create as many MueLu::Hierarchy objects as the nubmer of region that partition the composite domain - regionHierarchies_[region_idx] = MueLu::CreateXpetraPreconditioner( matrixSplitting_->getRegionMatrix( region_idx), *list, coords ); + // We create as many MueLu::Hierarchy objects as the nubmer of region that partition the composite domain + regionHierarchies_[region_idx] = MueLu::CreateXpetraPreconditioner(matrixSplitting_->getRegionMatrix(region_idx), *list, coords); // // The following commented line here below should replace the line right above if the MueLu parameter list is passed through an .xml file - //regionHierarchies_[region_idx] = MueLu::CreateXpetraPreconditioner( matrixSplitting_->getRegionMatrix( region_idx), muelu_, coords ); + // regionHierarchies_[region_idx] = MueLu::CreateXpetraPreconditioner( matrixSplitting_->getRegionMatrix( region_idx), muelu_, coords ); - //Different regions may have meshes with different size, or the number of levels the user wants to create may be too big - //with respect to the number of levels MueLu allows (i.e. the minimum coarse size may be reached for a smaller number of levels) - //In this case, the value of num_levels_ is readjusted to the minimum number of levels instantied across all the regions of the domain - num_levels_ = std::min( num_levels_, regionHierarchies_[region_idx]->GetNumLevels() ); + // Different regions may have meshes with different size, or the number of levels the user wants to create may be too big + // with respect to the number of levels MueLu allows (i.e. the minimum coarse size may be reached for a smaller number of levels) + // In this case, the value of num_levels_ is readjusted to the minimum number of levels instantied across all the regions of the domain + num_levels_ = std::min(num_levels_, regionHierarchies_[region_idx]->GetNumLevels()); } DefineLevels(); - } - -template -void RegionAMG::DefineLevels ( ) -{ +template +void RegionAMG::DefineLevels() { levels_.clear(); Array > P; P.clear(); @@ -164,46 +159,41 @@ void RegionAMG::DefineLevels ( ) Array > A; A.clear(); - TEUCHOS_TEST_FOR_EXCEPTION( levels_.size()!=0, Exceptions::RuntimeError, "Levels structure is already initialized \n" ); - for( int i = 0; i new_level = rcp( new level(i,num_regions_) ); - for( int region_idx = 0; region_idxGetLevel(i)->IsAvailable("A") , Exceptions::RuntimeError, "No existing operator at level "<GetLevel(i)->template Get >("A") ); - - //MueLu::Hierarchy objects store prolongator and restriction operators only in levels that are the result of a coarsening (they do not exist at the fine level) - if( i>0 ) - { - TEUCHOS_TEST_FOR_EXCEPTION( !regionHierarchies_[region_idx]->GetLevel(i)->IsAvailable("P") , Exceptions::RuntimeError, "No existing prolongator at level "<GetLevel(i)->IsAvailable("R") , Exceptions::RuntimeError, "No existing restriction at level "<GetLevel(i)->template Get >("P") ); - R.push_back( regionHierarchies_[region_idx]->GetLevel(i)->template Get >("R") ); + RCP new_level = rcp(new level(i, num_regions_)); + for (int region_idx = 0; region_idx < num_regions_; ++region_idx) { + TEUCHOS_TEST_FOR_EXCEPTION(!regionHierarchies_[region_idx]->GetLevel(i)->IsAvailable("A"), Exceptions::RuntimeError, "No existing operator at level " << i << " of region " << region_idx << "\n"); + A.push_back(regionHierarchies_[region_idx]->GetLevel(i)->template Get >("A")); + + // MueLu::Hierarchy objects store prolongator and restriction operators only in levels that are the result of a coarsening (they do not exist at the fine level) + if (i > 0) { + TEUCHOS_TEST_FOR_EXCEPTION(!regionHierarchies_[region_idx]->GetLevel(i)->IsAvailable("P"), Exceptions::RuntimeError, "No existing prolongator at level " << i << " of region " << region_idx << "\n"); + TEUCHOS_TEST_FOR_EXCEPTION(!regionHierarchies_[region_idx]->GetLevel(i)->IsAvailable("R"), Exceptions::RuntimeError, "No existing restriction at level " << i << " of region " << region_idx << "\n"); + P.push_back(regionHierarchies_[region_idx]->GetLevel(i)->template Get >("P")); + R.push_back(regionHierarchies_[region_idx]->GetLevel(i)->template Get >("R")); } } - //Following the same policy adopted in MueLu::Hierarchy, prolongator and restriction operators are stored at levels produced with a coarsening - if( i>0 ) - { - new_level->SetP( P ); - new_level->SetR( R ); + // Following the same policy adopted in MueLu::Hierarchy, prolongator and restriction operators are stored at levels produced with a coarsening + if (i > 0) { + new_level->SetP(P); + new_level->SetR(R); } - //The Galerkin operator is define dat every level (regardless of coarsening) - new_level->SetA( A ); + // The Galerkin operator is define dat every level (regardless of coarsening) + new_level->SetA(A); - //The structure regionToAll is passed to the fine level as it is - if( 0==i ) - new_level->SetRegionToAll( matrixSplitting_->getRegionHandler()->GetRegionToAll() ); - else - { - //Coarsening level own a regionToAll structure whcih is the result of an injected coarsened regionToAll from the upper level of the hierarchy + // The structure regionToAll is passed to the fine level as it is + if (0 == i) + new_level->SetRegionToAll(matrixSplitting_->getRegionHandler()->GetRegionToAll()); + else { + // Coarsening level own a regionToAll structure whcih is the result of an injected coarsened regionToAll from the upper level of the hierarchy Array > > coarse_regionToAll; - regionToAllCoarsen( *(levels_[i-1]), *new_level ); + regionToAllCoarsen(*(levels_[i - 1]), *new_level); } new_level->checkConsistency(); @@ -214,133 +204,106 @@ void RegionAMG::DefineLevels ( ) // (THIS HAS TO BE CHANGED SO THAT A PORTION OF THE COMPOSITE SMOOTHER IS STORED INSTEAD) new_level->ComputeRegionJacobi(); - levels_.push_back( new_level ); + levels_.push_back(new_level); } } +template +void RegionAMG::regionToAllCoarsen(const level& fine, level& coarse) { + TEUCHOS_TEST_FOR_EXCEPTION(fine.GetNumRegions() != coarse.GetNumRegions(), Exceptions::RuntimeError, "Level " << fine.GetLevelID() << "has " << fine.GetNumRegions() << " regions instantiated whereas level " << coarse.GetLevelID() << "has " << coarse.GetNumRegions() << " regions instantiated \n"); -template -void RegionAMG::regionToAllCoarsen (const level& fine, level& coarse) -{ - TEUCHOS_TEST_FOR_EXCEPTION( fine.GetNumRegions()!=coarse.GetNumRegions(), Exceptions::RuntimeError, "Level "< > > fine_regionToAll = fine.GetRegionToAll(); + Array > > coarse_regionToAll(num_regions_); - Array > > fine_regionToAll = fine.GetRegionToAll(); - Array > > coarse_regionToAll(num_regions_); - - for( GlobalOrdinal region_idx = 0; region_idx -void RegionAMG::computeRegionX (const multivector_type& X, Array > regionX)const -{ - - //Array to store extended region Maps (needed to copy composite entries of the input vector into region partitioning of it) +template +void RegionAMG::computeRegionX(const multivector_type& X, Array > regionX) const { + // Array to store extended region Maps (needed to copy composite entries of the input vector into region partitioning of it) Teuchos::Array > overlapping_composite_array; - //Create Overlapping composite maps - for( int region_idx = 0; region_idx overlapped_composite; LocalOrdinal num_elements = regionX[region_idx]->getMap()->getLocalNumElements(); - for( LocalOrdinal local_region_index = 0; local_region_indexgetMap()->getGlobalElement(local_region_index); - GlobalOrdinal composite_composite_index = levels_[0]->GetCompositeIndex(region_idx, composite_region_index+1); - overlapped_composite.push_back( composite_composite_index-1 ); + for (LocalOrdinal local_region_index = 0; local_region_index < num_elements; ++local_region_index) { + GlobalOrdinal composite_region_index = regionX[region_idx]->getMap()->getGlobalElement(local_region_index); + GlobalOrdinal composite_composite_index = levels_[0]->GetCompositeIndex(region_idx, composite_region_index + 1); + overlapped_composite.push_back(composite_composite_index - 1); } - overlapping_composite_array.push_back( overlapped_composite ); + overlapping_composite_array.push_back(overlapped_composite); } - //We split at first the input and output multivectors into region ones + // We split at first the input and output multivectors into region ones Array > composite_overlapping_X; - composite_overlapping_X.resize( num_regions_ ); - for( int region_idx = 0; region_idx aux; - if( overlapping_composite_array[region_idx].size()>0 ) - { + if (overlapping_composite_array[region_idx].size() > 0) { Teuchos::Array aux1 = overlapping_composite_array[region_idx]; Teuchos::Array aux2 = X.getMap()->getLocalElementList(); - std::set_union(aux1.begin(), aux1.end(), aux2.begin(), aux2.end(),std::back_inserter(aux)); - } - else - { + std::set_union(aux1.begin(), aux1.end(), aux2.begin(), aux2.end(), std::back_inserter(aux)); + } else { aux = X.getMap()->getLocalElementList(); } - RCP overlapping_composite_map = MapFactory< LocalOrdinal, GlobalOrdinal, Node >::Build( Xpetra::UseTpetra, X.getGlobalLength(), aux, 0, comm_ ); - composite_overlapping_X[region_idx] = MultiVectorFactory< Scalar, LocalOrdinal, GlobalOrdinal, Node >::Build(overlapping_composite_map, X.getNumVectors()); - RCP > Import1 = ImportFactory::Build( X.getMap(), overlapping_composite_map ); - TEUCHOS_TEST_FOR_EXCEPTION( X.getMap()->getMinAllGlobalIndex()!=composite_overlapping_X[region_idx]->getMap()->getMinAllGlobalIndex(), Exceptions::RuntimeError, "Minimal index in old an new maps do not coincide \n" ); - TEUCHOS_TEST_FOR_EXCEPTION( X.getMap()->getMaxAllGlobalIndex()!=composite_overlapping_X[region_idx]->getMap()->getMaxAllGlobalIndex(), Exceptions::RuntimeError, "Maximal index in old an new maps do not coincide \n" ); - composite_overlapping_X[region_idx]->doImport( X, *Import1, Xpetra::INSERT ); + RCP overlapping_composite_map = MapFactory::Build(Xpetra::UseTpetra, X.getGlobalLength(), aux, 0, comm_); + composite_overlapping_X[region_idx] = MultiVectorFactory::Build(overlapping_composite_map, X.getNumVectors()); + RCP > Import1 = ImportFactory::Build(X.getMap(), overlapping_composite_map); + TEUCHOS_TEST_FOR_EXCEPTION(X.getMap()->getMinAllGlobalIndex() != composite_overlapping_X[region_idx]->getMap()->getMinAllGlobalIndex(), Exceptions::RuntimeError, "Minimal index in old an new maps do not coincide \n"); + TEUCHOS_TEST_FOR_EXCEPTION(X.getMap()->getMaxAllGlobalIndex() != composite_overlapping_X[region_idx]->getMap()->getMaxAllGlobalIndex(), Exceptions::RuntimeError, "Maximal index in old an new maps do not coincide \n"); + composite_overlapping_X[region_idx]->doImport(X, *Import1, Xpetra::INSERT); } - //Copy values from composite input multivector X into region multivectors regionX - for( int i = 0; i composite_column = composite_overlapping_X[region_idx]->getData( i ); - LocalOrdinal num_elements = regionX[region_idx]->getMap()->getLocalNumElements(); - ArrayRCP region_column = regionX[region_idx]->getDataNonConst( i ); - for( LocalOrdinal local_region_index = 0; local_region_indexgetMap()->getGlobalElement(local_region_index); - GlobalOrdinal composite_composite_index = levels_[0]->GetCompositeIndex(region_idx, composite_region_index+1); - LocalOrdinal local_composite_index = composite_overlapping_X[region_idx]->getMap()->getLocalElement( composite_composite_index-1 ); - region_column[local_region_index] = composite_column[local_composite_index]; + // Copy values from composite input multivector X into region multivectors regionX + for (int i = 0; i < X.getNumVectors(); ++i) { + for (int region_idx = 0; region_idx < num_regions_; ++region_idx) { + ArrayRCP composite_column = composite_overlapping_X[region_idx]->getData(i); + LocalOrdinal num_elements = regionX[region_idx]->getMap()->getLocalNumElements(); + ArrayRCP region_column = regionX[region_idx]->getDataNonConst(i); + for (LocalOrdinal local_region_index = 0; local_region_index < num_elements; ++local_region_index) { + GlobalOrdinal composite_region_index = regionX[region_idx]->getMap()->getGlobalElement(local_region_index); + GlobalOrdinal composite_composite_index = levels_[0]->GetCompositeIndex(region_idx, composite_region_index + 1); + LocalOrdinal local_composite_index = composite_overlapping_X[region_idx]->getMap()->getLocalElement(composite_composite_index - 1); + region_column[local_region_index] = composite_column[local_composite_index]; } } } - } +template +void RegionAMG::computeCompositeY(Array > regionY, multivector_type& Y) const { + // Array to store extended region Maps (needed to copy composite entries of the input vector into region partitioning of it) + Teuchos::Array overlapping_composite; - -template -void RegionAMG::computeCompositeY (Array > regionY, multivector_type& Y)const -{ - - //Array to store extended region Maps (needed to copy composite entries of the input vector into region partitioning of it) - Teuchos::Array overlapping_composite; - - //Create Overlapping composite maps - for( int region_idx = 0; region_idxgetMap()->getLocalNumElements(); - for( LocalOrdinal local_region_index = 0; local_region_indexgetMap()->getGlobalElement(local_region_index); - GlobalOrdinal composite_composite_index = levels_[0]->GetCompositeIndex(region_idx, composite_region_index+1); - overlapping_composite.push_back( composite_composite_index-1 ); + for (LocalOrdinal local_region_index = 0; local_region_index < num_elements; ++local_region_index) { + GlobalOrdinal composite_region_index = regionY[region_idx]->getMap()->getGlobalElement(local_region_index); + GlobalOrdinal composite_composite_index = levels_[0]->GetCompositeIndex(region_idx, composite_region_index + 1); + overlapping_composite.push_back(composite_composite_index - 1); } } @@ -349,136 +312,116 @@ void RegionAMG::computeCompositeY (Ar last = std::unique(overlapping_composite.begin(), overlapping_composite.end()); overlapping_composite.erase(last, overlapping_composite.end()); - //We split at first the input and output multivectors into region ones + // We split at first the input and output multivectors into region ones RCP composite_overlapping_Y; - RCP overlapping_composite_map = MapFactory< LocalOrdinal, GlobalOrdinal, Node >::Build( Xpetra::UseTpetra, Y.getGlobalLength(), overlapping_composite, 0, comm_ ); - composite_overlapping_Y = MultiVectorFactory< Scalar, LocalOrdinal, GlobalOrdinal, Node >::Build(overlapping_composite_map, Y.getNumVectors()); + RCP overlapping_composite_map = MapFactory::Build(Xpetra::UseTpetra, Y.getGlobalLength(), overlapping_composite, 0, comm_); + composite_overlapping_Y = MultiVectorFactory::Build(overlapping_composite_map, Y.getNumVectors()); - //Safety checks to guarantee the legitimacy of non-unique source composite map and unique target composite map - //The unique target composite map must be a subset of the non-unique source composite map - //Before running the std::includes algorithm it is very important to sort the ElementList in ascending order - Teuchos::Array original_map = Y.getMap()->getLocalElementList(); + // Safety checks to guarantee the legitimacy of non-unique source composite map and unique target composite map + // The unique target composite map must be a subset of the non-unique source composite map + // Before running the std::includes algorithm it is very important to sort the ElementList in ascending order + Teuchos::Array original_map = Y.getMap()->getLocalElementList(); Teuchos::Array overlapping_map = composite_overlapping_Y->getMap()->getLocalElementList(); std::sort(original_map.begin(), original_map.end()); std::sort(overlapping_map.begin(), overlapping_map.end()); - TEUCHOS_TEST_FOR_EXCEPTION( !( std::includes(overlapping_map.begin(), overlapping_map.end(), original_map.begin(), original_map.end()) ), Exceptions::RuntimeError, "Overlapping (non-unique) composite map does NOT include original (unique) composite map \n" ); - - //Copy values from output region multivectors regionY into output composite multivector Y - for( int i = 0; i composite_column = composite_overlapping_Y->getDataNonConst( i ); - LocalOrdinal num_elements = regionY[region_idx]->getMap()->getLocalNumElements(); - ArrayRCP region_column = regionY[region_idx]->getData( i ); - for( LocalOrdinal local_region_index = 0; local_region_indexgetMap()->getGlobalElement(local_region_index); - GlobalOrdinal composite_composite_index = levels_[0]->GetCompositeIndex(region_idx, composite_region_index+1); - LocalOrdinal local_composite_index = composite_overlapping_Y->getMap()->getLocalElement( composite_composite_index-1 ); + TEUCHOS_TEST_FOR_EXCEPTION(!(std::includes(overlapping_map.begin(), overlapping_map.end(), original_map.begin(), original_map.end())), Exceptions::RuntimeError, "Overlapping (non-unique) composite map does NOT include original (unique) composite map \n"); + + // Copy values from output region multivectors regionY into output composite multivector Y + for (int i = 0; i < Y.getNumVectors(); ++i) { + for (int region_idx = 0; region_idx < num_regions_; ++region_idx) { + ArrayRCP composite_column = composite_overlapping_Y->getDataNonConst(i); + LocalOrdinal num_elements = regionY[region_idx]->getMap()->getLocalNumElements(); + ArrayRCP region_column = regionY[region_idx]->getData(i); + for (LocalOrdinal local_region_index = 0; local_region_index < num_elements; ++local_region_index) { + GlobalOrdinal composite_region_index = regionY[region_idx]->getMap()->getGlobalElement(local_region_index); + GlobalOrdinal composite_composite_index = levels_[0]->GetCompositeIndex(region_idx, composite_region_index + 1); + LocalOrdinal local_composite_index = composite_overlapping_Y->getMap()->getLocalElement(composite_composite_index - 1); composite_column[local_composite_index] += region_column[local_region_index]; } } } - //Import object needed to transfer output composite multivector from non-unique map to unique map - RCP > Import1 = ImportFactory::Build( composite_overlapping_Y->getMap(), Y.getMap() ); - - //IMPORTANT: ALWAYS USE IMPORT-EXPORT OBJECTS IN FORWARD MODE (i.e. doImport methods must use Import objects and doExport methods must use Export objects) - //Reverse mode (i.e. crossed use of doImport/doExport methods with Export/Import objects makes the code possibly crash due to undefined behavior) - Y.doImport( *composite_overlapping_Y, *Import1, Xpetra::ADD ); + // Import object needed to transfer output composite multivector from non-unique map to unique map + RCP > Import1 = ImportFactory::Build(composite_overlapping_Y->getMap(), Y.getMap()); + // IMPORTANT: ALWAYS USE IMPORT-EXPORT OBJECTS IN FORWARD MODE (i.e. doImport methods must use Import objects and doExport methods must use Export objects) + // Reverse mode (i.e. crossed use of doImport/doExport methods with Export/Import objects makes the code possibly crash due to undefined behavior) + Y.doImport(*composite_overlapping_Y, *Import1, Xpetra::ADD); } - - -template -void RegionAMG::rescaleInterfaceEntries (Array > regionY)const -{ - - Array > > regionToAll = levels_[0]->GetRegionToAll(); +template +void RegionAMG::rescaleInterfaceEntries(Array > regionY) const { + Array > > regionToAll = levels_[0]->GetRegionToAll(); Array > > interfaceNodes = matrixSplitting_->getRegionHandler()->GetInterfaceNodes(); - TEUCHOS_TEST_FOR_EXCEPTION( num_regions_!=regionToAll.size(), Exceptions::RuntimeError, "Regions stored in Level 0 do not match with total number of regions in RegionAMG class \n" ); + TEUCHOS_TEST_FOR_EXCEPTION(num_regions_ != regionToAll.size(), Exceptions::RuntimeError, "Regions stored in Level 0 do not match with total number of regions in RegionAMG class \n"); - for( int region_idx = 0; region_idxgetMap()->getLocalNumElements(); - for( LocalOrdinal local_region_index = 0; local_region_indexgetMap()->getGlobalElement(local_region_index); - GlobalOrdinal composite_composite_index = levels_[0]->GetCompositeIndex(region_idx, composite_region_index+1); + for (LocalOrdinal local_region_index = 0; local_region_index < num_elements; ++local_region_index) { + GlobalOrdinal composite_region_index = regionY[region_idx]->getMap()->getGlobalElement(local_region_index); + GlobalOrdinal composite_composite_index = levels_[0]->GetCompositeIndex(region_idx, composite_region_index + 1); checkerNodesToRegion unaryPredicateNode(composite_composite_index); - typename Array< std::tuple > >::iterator nodes_to_region_iterator; - nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(interfaceNodes.begin(), interfaceNodes.end(), unaryPredicateNode); - //The rescaling must be applied only to entries associated with mesh on the interface - if( nodes_to_region_iterator!=interfaceNodes.end() ) - { - Array nodal_regions = std::get<1>(*nodes_to_region_iterator); - for( int i = 0; igetNumVectors(); ++i) - { - ArrayRCP region_column = regionY[region_idx]->getDataNonConst( i ); - if( nodal_regions.size()>1 ) - region_column[ local_region_index ] = region_column[ local_region_index ]/( nodal_regions.size() ); + typename Array > >::iterator nodes_to_region_iterator; + nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(interfaceNodes.begin(), interfaceNodes.end(), unaryPredicateNode); + // The rescaling must be applied only to entries associated with mesh on the interface + if (nodes_to_region_iterator != interfaceNodes.end()) { + Array nodal_regions = std::get<1>(*nodes_to_region_iterator); + for (int i = 0; i < regionY[region_idx]->getNumVectors(); ++i) { + ArrayRCP region_column = regionY[region_idx]->getDataNonConst(i); + if (nodal_regions.size() > 1) + region_column[local_region_index] = region_column[local_region_index] / (nodal_regions.size()); } } } } - } +template +void RegionAMG::apply(const multivector_type& X, multivector_type& Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const { + // N.B.: currently Scalar quantities alpha and beta are passed as input parameters to have the apply method signature match with the apply signature of an Xpetra::Operator + // however we are not currently using them (for us alpha=0 and beta=1) -template -void RegionAMG::apply (const multivector_type& X, multivector_type& Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta)const -{ - - //N.B.: currently Scalar quantities alpha and beta are passed as input parameters to have the apply method signature match with the apply signature of an Xpetra::Operator - //however we are not currently using them (for us alpha=0 and beta=1) - - //At first we check that input and output vector have matching maps with the composite matrix - //The Map of X must coincide with the Domain map of compositeA - //The Map of Y must coincide with the Range map of compositeA - TEUCHOS_TEST_FOR_EXCEPTION( !(X.getMap()->isSameAs( *(matrixSplitting_->getMatrix()->getDomainMap()) ) ), Exceptions::RuntimeError, "Map of composite input multivector X does not coincide with Domain Map of composite matrix \n" ); - TEUCHOS_TEST_FOR_EXCEPTION( !(Y.getMap()->isSameAs( *(matrixSplitting_->getMatrix()->getRangeMap()) ) ), Exceptions::RuntimeError, "Map of composite input multivector X does not coincide with Range Map of composite matrix \n" ); - TEUCHOS_TEST_FOR_EXCEPTION( X.getNumVectors()!=Y.getNumVectors(), Exceptions::RuntimeError, "Number of vectors in input numltivector X does NOT match number of vectors in output multivector Y \n" ); + // At first we check that input and output vector have matching maps with the composite matrix + // The Map of X must coincide with the Domain map of compositeA + // The Map of Y must coincide with the Range map of compositeA + TEUCHOS_TEST_FOR_EXCEPTION(!(X.getMap()->isSameAs(*(matrixSplitting_->getMatrix()->getDomainMap()))), Exceptions::RuntimeError, "Map of composite input multivector X does not coincide with Domain Map of composite matrix \n"); + TEUCHOS_TEST_FOR_EXCEPTION(!(Y.getMap()->isSameAs(*(matrixSplitting_->getMatrix()->getRangeMap()))), Exceptions::RuntimeError, "Map of composite input multivector X does not coincide with Range Map of composite matrix \n"); + TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), Exceptions::RuntimeError, "Number of vectors in input numltivector X does NOT match number of vectors in output multivector Y \n"); - //We split at first the input and output multivectors into region ones + // We split at first the input and output multivectors into region ones Array > regionX; Array > regionY; - regionX.resize( num_regions_ ); - regionY.resize( num_regions_ ); - - //Associate Maps to region input (regionX) and output vectors (regionY) - for( int region_idx = 0; region_idx::Build(levels_[0]->GetRegionMatrix(region_idx)->getDomainMap(), X.getNumVectors()); - regionY[region_idx] = MultiVectorFactory< Scalar, LocalOrdinal, GlobalOrdinal, Node >::Build(levels_[0]->GetRegionMatrix(region_idx)->getRangeMap(), Y.getNumVectors()); + regionX.resize(num_regions_); + regionY.resize(num_regions_); + + // Associate Maps to region input (regionX) and output vectors (regionY) + for (int region_idx = 0; region_idx < num_regions_; ++region_idx) { + regionX[region_idx] = MultiVectorFactory::Build(levels_[0]->GetRegionMatrix(region_idx)->getDomainMap(), X.getNumVectors()); + regionY[region_idx] = MultiVectorFactory::Build(levels_[0]->GetRegionMatrix(region_idx)->getRangeMap(), Y.getNumVectors()); } - //Split the composite input multivector X into region multivector regionX - computeRegionX( X, regionX ); + // Split the composite input multivector X into region multivector regionX + computeRegionX(X, regionX); - //This is the portion where the V-cycle is executed (for now we only have region V-cycles) - for( int region_idx = 0; region_idxIterate( *regionX[region_idx],*regionY[region_idx] ); + // This is the portion where the V-cycle is executed (for now we only have region V-cycles) + for (int region_idx = 0; region_idx < num_regions_; ++region_idx) { + regionHierarchies_[region_idx]->Iterate(*regionX[region_idx], *regionY[region_idx]); } - //We rescale entries of region multivector regionY that are associated with mesh nodes on an interface - rescaleInterfaceEntries( regionY ); + // We rescale entries of region multivector regionY that are associated with mesh nodes on an interface + rescaleInterfaceEntries(regionY); - //We create const view of the region multivector regionY because we want to guarantee that the composite output multivector Y - //is constructed without modifying any information given from each region + // We create const view of the region multivector regionY because we want to guarantee that the composite output multivector Y + // is constructed without modifying any information given from each region Array > regionYconst; - for( int region_idx = 0; region_idx #include -namespace Xpetra{ +namespace Xpetra { -template +template bool compareRegions(const std::tuple &, const std::tuple &); -template +template bool compareNodes(const std::tuple &, const std::tuple &); -//Definition of the predicate for the node_ structure. -//Given a tuple made of node index and a specific region it belongs to, -//this predicate returns true if the node belongs to the region specified in input to the predicate. -template +// Definition of the predicate for the node_ structure. +// Given a tuple made of node index and a specific region it belongs to, +// this predicate returns true if the node belongs to the region specified in input to the predicate. +template class checkerNode { + public: + // Constructor + checkerNode(GlobalOrdinal region_index) { region_index_ = region_index; }; -public: - - //Constructor - checkerNode( GlobalOrdinal region_index){region_index_ = region_index;}; - - //Unary Operator - bool operator()(const std::tuple &node) - { return (std::get<1>(node) == region_index_); } - -private: + // Unary Operator + bool operator()(const std::tuple &node) { return (std::get<1>(node) == region_index_); } + private: GlobalOrdinal region_index_; - }; - -//Definition of the predicate for the nodesToRegion_ sitructure -//Given a tuple made of node index and a vector with labels of regions it belongs to, -//this predicate returns true if the node coincides with the node specified in input to the predicate. -template +// Definition of the predicate for the nodesToRegion_ sitructure +// Given a tuple made of node index and a vector with labels of regions it belongs to, +// this predicate returns true if the node coincides with the node specified in input to the predicate. +template class checkerNodesToRegion { + public: + // Constructor + checkerNodesToRegion(GlobalOrdinal node_index) { node_index_ = node_index; }; -public: - - //Constructor - checkerNodesToRegion( GlobalOrdinal node_index){node_index_ = node_index;}; - - //Unary Operator - bool operator()(const std::tuple > &node) - { return (std::get<0>(node) == node_index_); } - -private: + // Unary Operator + bool operator()(const std::tuple > &node) { return (std::get<0>(node) == node_index_); } + private: GlobalOrdinal node_index_; - }; - -//This is an auxiliary class to store row maps for the composite matrix, region matrices and -//a regionToAll map to link region node indices with the composite ones +// This is an auxiliary class to store row maps for the composite matrix, region matrices and +// a regionToAll map to link region node indices with the composite ones template -class Splitting_MapsInfo{ -public: - Array > > regionToAll_;//used as a map for a RegionToAll node index - Array composite_map_; //used as RowMap for composite matrices - Array > region_maps_; //used as RowMap for region matrices +class Splitting_MapsInfo { + public: + Array > > regionToAll_; // used as a map for a RegionToAll node index + Array composite_map_; // used as RowMap for composite matrices + Array > region_maps_; // used as RowMap for region matrices }; - // This is the actual class that defines the regionHandler template -class RegionHandler{ - -public: - +class RegionHandler { + public: //! @name Constructor/Destructor Methods //@{ //! Constructor specifying the file name containing region information. - RegionHandler (const std::string &, RCP< const Teuchos::Comm >); + RegionHandler(const std::string &, RCP >); //} //! @Interface methods //@{ - GlobalOrdinal GetNumGlobalElements()const{return num_total_nodes_;}; - GlobalOrdinal GetNumTotalRegions()const{return num_total_regions_;}; - GlobalOrdinal GetNumRegionNodes(GlobalOrdinal region_idx)const{return num_region_nodes_[region_idx];}; - Array GetGlobalRowMap()const{return maps_.composite_map_;}; - Array GetRegionRowMap(GlobalOrdinal region_index)const; - Array > GetRegionRowMaps()const{return maps_.region_maps_;}; - Array > > GetRegionToAll()const;//used as a map for a RegionToAll node index - Array< std::tuple > GetRegionToAll(GlobalOrdinal)const;//used as a map for a RegionToAll node index - Array > > GetInterfaceNodes()const{return interfaceNodes_;}; + GlobalOrdinal GetNumGlobalElements() const { return num_total_nodes_; }; + GlobalOrdinal GetNumTotalRegions() const { return num_total_regions_; }; + GlobalOrdinal GetNumRegionNodes(GlobalOrdinal region_idx) const { return num_region_nodes_[region_idx]; }; + Array GetGlobalRowMap() const { return maps_.composite_map_; }; + Array GetRegionRowMap(GlobalOrdinal region_index) const; + Array > GetRegionRowMaps() const { return maps_.region_maps_; }; + Array > > GetRegionToAll() const; // used as a map for a RegionToAll node index + Array > GetRegionToAll(GlobalOrdinal) const; // used as a map for a RegionToAll node index + Array > > GetInterfaceNodes() const { return interfaceNodes_; }; //} //! @Printout methods void printView() const; @@ -151,29 +136,28 @@ class RegionHandler{ void printInactive() const; //} -private: - + private: //! @Private variables //@{ - RCP< const Teuchos::Comm > comm_; + RCP > comm_; bool nodes_sorted_by_regions_ = false; - //Global information - GlobalOrdinal num_total_nodes_ = 0; + // Global information + GlobalOrdinal num_total_nodes_ = 0; GlobalOrdinal num_total_regions_ = 0; - Array > nodes_;//basic structure that imports the information from the input file + Array > nodes_; // basic structure that imports the information from the input file - //the following two Array are used to handle the situation where either the number of processes exceeds the number of regions or viceversa - Array regions_per_proc_;//if num_proc > num_regions, then it says how many regions are owned by a single process, empty otherwise - Array > > procs_per_region_; //lists of processes instantiated for each region + // the following two Array are used to handle the situation where either the number of processes exceeds the number of regions or viceversa + Array regions_per_proc_; // if num_proc > num_regions, then it says how many regions are owned by a single process, empty otherwise + Array > > procs_per_region_; // lists of processes instantiated for each region - Array > > nodesToRegion_; //for each node it lists the regions it belongs to - Array > > interfaceNodes_; //for each node on the interface it lists the regions it belongs to - //vector which contains the number of region nodes for each domain region + Array > > nodesToRegion_; // for each node it lists the regions it belongs to + Array > > interfaceNodes_; // for each node on the interface it lists the regions it belongs to + // vector which contains the number of region nodes for each domain region Array num_region_nodes_; - //Maps used for composite and region operators + // Maps used for composite and region operators Splitting_MapsInfo maps_; //@} @@ -185,32 +169,30 @@ class RegionHandler{ void CreateRowMaps(); //@} -}; //class RegionHandler - -//This compare class is used to run the sorting algorithm on the list of nodes with associated regions they belong to. -//First, nodes are sorted in ascending order for region labels. Then, the sorting shuffles the nodes in ascending node index for -//each given region -template -bool compareRegions(const std::tuple &lhs, const std::tuple &rhs) -{ - //First we prioritize the sorting according to the region label - //If the region is the same, then the sorting looks at the composite node index - if( std::get<1>(lhs) < std::get<1>(rhs) ) +}; // class RegionHandler + +// This compare class is used to run the sorting algorithm on the list of nodes with associated regions they belong to. +// First, nodes are sorted in ascending order for region labels. Then, the sorting shuffles the nodes in ascending node index for +// each given region +template +bool compareRegions(const std::tuple &lhs, const std::tuple &rhs) { + // First we prioritize the sorting according to the region label + // If the region is the same, then the sorting looks at the composite node index + if (std::get<1>(lhs) < std::get<1>(rhs)) return true; - else if( std::get<1>(lhs) == std::get<1>(rhs) ) + else if (std::get<1>(lhs) == std::get<1>(rhs)) return std::get<0>(lhs) < std::get<0>(rhs); else return false; } -//This compare is sed to run the sorting algorithm where the nodes are ordered in ascendin order for thei node indes, regardless of the -//associated region index -template -bool compareNodes(const std::tuple &lhs, const std::tuple &rhs) -{ +// This compare is sed to run the sorting algorithm where the nodes are ordered in ascendin order for thei node indes, regardless of the +// associated region index +template +bool compareNodes(const std::tuple &lhs, const std::tuple &rhs) { return std::get<0>(lhs) < std::get<0>(rhs); } -} //namespace Xpetra +} // namespace Xpetra #endif diff --git a/packages/muelu/research/max/XpetraSplitting/Xpetra_RegionHandler_def.hpp b/packages/muelu/research/max/XpetraSplitting/Xpetra_RegionHandler_def.hpp index 8e7b264c11c3..516f090f102e 100644 --- a/packages/muelu/research/max/XpetraSplitting/Xpetra_RegionHandler_def.hpp +++ b/packages/muelu/research/max/XpetraSplitting/Xpetra_RegionHandler_def.hpp @@ -55,60 +55,55 @@ #include "Xpetra_RegionHandler_decl.hpp" #include -namespace Xpetra{ +namespace Xpetra { template -RegionHandler::RegionHandler(const std::string &file_name, RCP< const Teuchos::Comm > comm): comm_(comm) -{ +RegionHandler::RegionHandler(const std::string &file_name, RCP > comm) + : comm_(comm) { ReadFileInfo(file_name); - //Nodes are shuffled so that regions are sorted in ascending labeling order + // Nodes are shuffled so that regions are sorted in ascending labeling order std::sort(nodes_.begin(), nodes_.end(), compareRegions); nodes_sorted_by_regions_ = true; - if(comm_->getRank()==0) - std::cout<<"Started NodesToRegion"<getRank() == 0) + std::cout << "Started NodesToRegion" << std::endl; NodesToRegion(); - if(comm_->getRank()==0) - std::cout<<"Finished NodesToRegion"<getRank() == 0) + std::cout << "Finished NodesToRegion" << std::endl; ComputeProcRegions(); - if(comm_->getRank()==0) - std::cout<<"Started RowMaps"<getRank() == 0) + std::cout << "Started RowMaps" << std::endl; CreateRowMaps(); - if(comm_->getRank()==0) - std::cout<<"Finished RowMaps"<getRank() == 0) + std::cout << "Finished RowMaps" << std::endl; num_region_nodes_.clear(); num_region_nodes_.resize(num_total_regions_); - //For each region, the following loop counts the number of region nodes and stores them - for( GlobalOrdinal region_idx = 1; region_idx<=num_total_regions_; ++region_idx ) - { + // For each region, the following loop counts the number of region nodes and stores them + for (GlobalOrdinal region_idx = 1; region_idx <= num_total_regions_; ++region_idx) { checkerNode unaryPredicate(region_idx); - typename Array< std::tuple >::iterator nodes_iterator1; - typename Array< std::tuple >::iterator nodes_iterator2; - nodes_iterator1 = std::find_if >::iterator, checkerNode >(nodes_.begin(), nodes_.end(), unaryPredicate); - nodes_iterator2 = std::find_if_not >::iterator, checkerNode >(nodes_iterator1, nodes_.end(), unaryPredicate); - num_region_nodes_[region_idx-1] = nodes_iterator2 - nodes_iterator1; + typename Array >::iterator nodes_iterator1; + typename Array >::iterator nodes_iterator2; + nodes_iterator1 = std::find_if >::iterator, checkerNode >(nodes_.begin(), nodes_.end(), unaryPredicate); + nodes_iterator2 = std::find_if_not >::iterator, checkerNode >(nodes_iterator1, nodes_.end(), unaryPredicate); + num_region_nodes_[region_idx - 1] = nodes_iterator2 - nodes_iterator1; } - } - template -void RegionHandler::ReadFileInfo(const std::string &file_name) -{ +void RegionHandler::ReadFileInfo(const std::string &file_name) { std::ifstream input_file_(file_name, std::ifstream::in); - std::string line; - TEUCHOS_TEST_FOR_EXCEPTION( !input_file_.good(), Exceptions::RuntimeError, "Cannot read \"" << file_name << "\""); + std::string line; + TEUCHOS_TEST_FOR_EXCEPTION(!input_file_.good(), Exceptions::RuntimeError, "Cannot read \"" << file_name << "\""); GlobalOrdinal line_index = 0; - //The information contained in the file is imported and stored in a Teuchos::Array of tuples. - //The first field of the tuple is the composite node index, the second field of the tuple is the region index - while ( std::getline (input_file_,line) ) - { - std::istringstream is( line ); + // The information contained in the file is imported and stored in a Teuchos::Array of tuples. + // The first field of the tuple is the composite node index, the second field of the tuple is the region index + while (std::getline(input_file_, line)) { + std::istringstream is(line); GlobalOrdinal number; Array node; std::tuple node_region; @@ -117,22 +112,18 @@ void RegionHandler::ReadFileInfo(cons node.clear(); composite_info.clear(); - if( 1==line_index ) - { - while(is>>number) + if (1 == line_index) { + while (is >> number) composite_info.push_back(number); - TEUCHOS_TEST_FOR_EXCEPTION( composite_info.size()!=2, Exceptions::RuntimeError, "The composite information must be a couple of integers: nTotal of nodes + nTotal of regions \n"); - num_total_nodes_ = composite_info[0]; + TEUCHOS_TEST_FOR_EXCEPTION(composite_info.size() != 2, Exceptions::RuntimeError, "The composite information must be a couple of integers: nTotal of nodes + nTotal of regions \n"); + num_total_nodes_ = composite_info[0]; num_total_regions_ = composite_info[1]; - } - else if( line_index>2 ) - { - while(is>>number) - { + } else if (line_index > 2) { + while (is >> number) { node.push_back(number); } - TEUCHOS_TEST_FOR_EXCEPTION( node.size()!=2, Exceptions::RuntimeError, "The node information must be a couple of integers: Node index + Region idnex \n"); + TEUCHOS_TEST_FOR_EXCEPTION(node.size() != 2, Exceptions::RuntimeError, "The node information must be a couple of integers: Node index + Region idnex \n"); node_region = std::make_tuple(node[0], node[1]); nodes_.push_back(node_region); node.clear(); @@ -142,97 +133,86 @@ void RegionHandler::ReadFileInfo(cons input_file_.close(); } -//This routines computes the way regions are partitioned across processes -//The partitioning policies of course depends on whether the number of processes -//exceeds the number of regions or not. -//ASSUMPTION: A PROCESS CANNOT OWN CHUNKS OF MULTIPLE REGIONS. EITHER A PROCESS IS CONFINED INSIDE A SINGLE REGION -//OR IT MUST POSSESS ENTIRE REGIONS. -//The distribution of regions (or portions of them) across processes is conductes so to guarantee load balancing +// This routines computes the way regions are partitioned across processes +// The partitioning policies of course depends on whether the number of processes +// exceeds the number of regions or not. +// ASSUMPTION: A PROCESS CANNOT OWN CHUNKS OF MULTIPLE REGIONS. EITHER A PROCESS IS CONFINED INSIDE A SINGLE REGION +// OR IT MUST POSSESS ENTIRE REGIONS. +// The distribution of regions (or portions of them) across processes is conductes so to guarantee load balancing template -void RegionHandler::ComputeProcRegions() -{ +void RegionHandler::ComputeProcRegions() { int tot_num_proc = comm_->getSize(); - int myPID = comm_->getRank(); + int myPID = comm_->getRank(); regions_per_proc_.clear(); procs_per_region_.clear(); - //If the number of processes instantiate is smaller than the total number of regions, - //then each process owns entire regions. The number of regions per process is calculates so to guarantee - //load balancing. After an initial distribution of regions, leftover regions that have not been assigned to any process yet are - //distributed in a round-robin fashion - if( tot_num_proc < num_total_regions_ ) - { - int min_nregions_proc = std::floor( static_cast(num_total_regions_)/static_cast(tot_num_proc) ); + // If the number of processes instantiate is smaller than the total number of regions, + // then each process owns entire regions. The number of regions per process is calculates so to guarantee + // load balancing. After an initial distribution of regions, leftover regions that have not been assigned to any process yet are + // distributed in a round-robin fashion + if (tot_num_proc < num_total_regions_) { + int min_nregions_proc = std::floor(static_cast(num_total_regions_) / static_cast(tot_num_proc)); int num_leftover_regions = num_total_regions_ % tot_num_proc; - for( int i=1; i<=min_nregions_proc; ++i ) - regions_per_proc_.push_back( myPID*min_nregions_proc+i ); + for (int i = 1; i <= min_nregions_proc; ++i) + regions_per_proc_.push_back(myPID * min_nregions_proc + i); - if( num_leftover_regions>=myPID+1 && num_leftover_regions!=0 ) - regions_per_proc_.push_back( min_nregions_proc*tot_num_proc + (myPID+1) ); + if (num_leftover_regions >= myPID + 1 && num_leftover_regions != 0) + regions_per_proc_.push_back(min_nregions_proc * tot_num_proc + (myPID + 1)); - for( int procID = 0; procIDgetSize(); ++procID ) - { + for (int procID = 0; procID < comm_->getSize(); ++procID) { Array proc; proc.clear(); proc.push_back(procID); - for( int i = 1; i<=min_nregions_proc; ++i ) - { - GlobalOrdinal region_index = (procID)*min_nregions_proc + i; + for (int i = 1; i <= min_nregions_proc; ++i) { + GlobalOrdinal region_index = (procID)*min_nregions_proc + i; std::tuple > tuple_aux = std::make_tuple(region_index, proc); - procs_per_region_.push_back( tuple_aux ); + procs_per_region_.push_back(tuple_aux); } - if( num_leftover_regions>=procID+1 && num_leftover_regions!=0 ) - { - GlobalOrdinal region_index = min_nregions_proc*tot_num_proc + (procID+1); + if (num_leftover_regions >= procID + 1 && num_leftover_regions != 0) { + GlobalOrdinal region_index = min_nregions_proc * tot_num_proc + (procID + 1); std::tuple > tuple_aux = std::make_tuple(region_index, proc); - procs_per_region_.push_back( tuple_aux ); + procs_per_region_.push_back(tuple_aux); } } - TEUCHOS_TEST_FOR_EXCEPTION( !( procs_per_region_.size()==num_total_regions_ ), Exceptions::RuntimeError, "PID: "<getRank()<<" - Number of regions detected does not match with the initially declared one \n procs_per_region_ tracks "<getRank() << " - Number of regions detected does not match with the initially declared one \n procs_per_region_ tracks " << procs_per_region_.size() << " regions whereas num_total_regions_ = " << num_total_regions_ << "\n"); } - //This is easy: if the number of regions coincides with the total number of processes instantiated, then - //a one-to-one relation between processes and regions is created - else if( tot_num_proc == num_total_regions_ ) - { - regions_per_proc_.push_back( myPID+1 ); - - for( int i = 0; i proc; proc.clear(); proc.push_back(i); std::tuple > tuple_aux = std::make_tuple(region_index, proc); - procs_per_region_.push_back( tuple_aux ); + procs_per_region_.push_back(tuple_aux); } } - //If the number of processes exceeds the number of regions in the domain, - //then each process is given a subset of a region. - //N.B.: A SINGLE PROCESS IS NOT ALLOWED TO OWN CHUNCKS OF MULTIPLE REGIONS. - //IN THIS CONFIGURATION EACH PROCESS IS CONFINED TO A SINGLE REGION - else if( tot_num_proc > num_total_regions_ ) - { - int num_procs_region = std::ceil( static_cast(tot_num_proc)/static_cast(num_total_regions_) ); + // If the number of processes exceeds the number of regions in the domain, + // then each process is given a subset of a region. + // N.B.: A SINGLE PROCESS IS NOT ALLOWED TO OWN CHUNCKS OF MULTIPLE REGIONS. + // IN THIS CONFIGURATION EACH PROCESS IS CONFINED TO A SINGLE REGION + else if (tot_num_proc > num_total_regions_) { + int num_procs_region = std::ceil(static_cast(tot_num_proc) / static_cast(num_total_regions_)); int num_regions_extra_proc = tot_num_proc % num_total_regions_; - int proc_count = 0; + int proc_count = 0; std::tuple > region_tuple; - for( int i = 1; i<=num_total_regions_; ++i ) - { + for (int i = 1; i <= num_total_regions_; ++i) { Array procs; procs.clear(); - if( i<=num_regions_extra_proc || num_regions_extra_proc==0 ) - for( int j=1; j<=num_procs_region; ++j ) - { + if (i <= num_regions_extra_proc || num_regions_extra_proc == 0) + for (int j = 1; j <= num_procs_region; ++j) { procs.push_back(proc_count); proc_count++; } else - for( int j=1; j<=num_procs_region-1; ++j ) - { + for (int j = 1; j <= num_procs_region - 1; ++j) { procs.push_back(proc_count); proc_count++; } @@ -244,13 +224,11 @@ void RegionHandler::ComputeProcRegion } } - -//This routine associates a globally indexed node with the list of regions it belongs to -//This is helpful to spot which nodes lie on a interregion interface. In fact, these nodes must have -//the list of regions with more than one element +// This routine associates a globally indexed node with the list of regions it belongs to +// This is helpful to spot which nodes lie on a interregion interface. In fact, these nodes must have +// the list of regions with more than one element template -void RegionHandler::NodesToRegion() -{ +void RegionHandler::NodesToRegion() { nodesToRegion_.clear(); interfaceNodes_.clear(); Array > nodes_reordered; @@ -260,29 +238,24 @@ void RegionHandler::NodesToRegion() typename Array >::iterator node_iterator; node_iterator = nodes_reordered.begin(); - while( node_iterator != nodes_reordered.end() ) - { - GlobalOrdinal current_node = std::get<0>( *(node_iterator) ); + while (node_iterator != nodes_reordered.end()) { + GlobalOrdinal current_node = std::get<0>(*(node_iterator)); Array regions; regions.clear(); - regions.push_back( std::get<1>(*node_iterator) ); + regions.push_back(std::get<1>(*node_iterator)); typename Array >::iterator next_node_iterator = node_iterator + 1; - while( next_node_iterator != nodes_reordered.end() ) - { - GlobalOrdinal next_node = std::get<0>( *(next_node_iterator) ); - if( current_node == next_node ) - { - //As long as the information spanned regards the same node, - //the algorithm keeps on increasing the list of regions the given mesh node belong to - regions.push_back( std::get<1>( *(next_node_iterator) ) ); + while (next_node_iterator != nodes_reordered.end()) { + GlobalOrdinal next_node = std::get<0>(*(next_node_iterator)); + if (current_node == next_node) { + // As long as the information spanned regards the same node, + // the algorithm keeps on increasing the list of regions the given mesh node belong to + regions.push_back(std::get<1>(*(next_node_iterator))); next_node_iterator++; - } - else - { - //When the mesh node label changes, then the algorithm - //stops recording information about the previous node and it starts recording information for the new one + } else { + // When the mesh node label changes, then the algorithm + // stops recording information about the previous node and it starts recording information for the new one node_iterator = next_node_iterator; break; } @@ -292,24 +265,24 @@ void RegionHandler::NodesToRegion() new_tuple = std::make_tuple(current_node, regions); nodesToRegion_.push_back(new_tuple); - if( regions.size()>1 ) + if (regions.size() > 1) interfaceNodes_.push_back(new_tuple); - if( next_node_iterator == nodes_reordered.end() ) + if (next_node_iterator == nodes_reordered.end()) break; } - TEUCHOS_TEST_FOR_EXCEPTION( !( nodesToRegion_.size()==num_total_nodes_ ), Exceptions::RuntimeError, "Number of nodes detected does not match with the initially declared one \n"<<"nodesToRegion tracks "< -void RegionHandler::CreateRowMaps() -{ - TEUCHOS_TEST_FOR_EXCEPTION( ( procs_per_region_.empty() && regions_per_proc_.empty() ), Exceptions::RuntimeError, "Process ID: "<getRank()<<" - Information about region partitioning across processors is not consistent: incorrect values for number of processors or number of regions \n"); +void RegionHandler::CreateRowMaps() { + TEUCHOS_TEST_FOR_EXCEPTION((procs_per_region_.empty() && regions_per_proc_.empty()), Exceptions::RuntimeError, "Process ID: " << comm_->getRank() << " - Information about region partitioning across processors is not consistent: incorrect values for number of processors or number of regions \n"); Array elements; Array region_elements; Array > elements_per_region; - Array > > regionToAll; + Array > > regionToAll; int myPID = comm_->getRank(); elements.clear(); @@ -319,285 +292,258 @@ void RegionHandler::CreateRowMaps() regionToAll.clear(); regionToAll.resize(num_total_regions_); - TEUCHOS_TEST_FOR_EXCEPTION( !nodes_sorted_by_regions_, Exceptions::RuntimeError, "Nodes are not sorted by regions in ascending order \n"); - TEUCHOS_TEST_FOR_EXCEPTION( num_total_nodes_>nodes_.size(), Exceptions::RuntimeError, "Number of nodes declared in input file does not match with the effective number of nodes provided\n"<<"num_total_nodes_ ="<( *(nodes_.end()-1) ), Exceptions::RuntimeError, "Number of regions declared in input file does not match with the effective number of regions provided\n"); + TEUCHOS_TEST_FOR_EXCEPTION(!nodes_sorted_by_regions_, Exceptions::RuntimeError, "Nodes are not sorted by regions in ascending order \n"); + TEUCHOS_TEST_FOR_EXCEPTION(num_total_nodes_ > nodes_.size(), Exceptions::RuntimeError, "Number of nodes declared in input file does not match with the effective number of nodes provided\n" + << "num_total_nodes_ =" << num_total_nodes_ << " whereas nodes_ tracks " << nodes_.size() << " nodes \n"); + TEUCHOS_TEST_FOR_EXCEPTION(num_total_regions_ != std::get<1>(*(nodes_.end() - 1)), Exceptions::RuntimeError, "Number of regions declared in input file does not match with the effective number of regions provided\n"); - if( !( regions_per_proc_.empty() ) ) - { + if (!(regions_per_proc_.empty())) { typename Array::iterator iter_array; - for( iter_array=regions_per_proc_.begin(); iter_array!=regions_per_proc_.end(); ++iter_array ) - { + for (iter_array = regions_per_proc_.begin(); iter_array != regions_per_proc_.end(); ++iter_array) { region_elements.clear(); checkerNode unaryPredicate(*iter_array); - typename Array< std::tuple >::iterator nodes_iterator1; - typename Array< std::tuple >::iterator nodes_iterator2; + typename Array >::iterator nodes_iterator1; + typename Array >::iterator nodes_iterator2; - //We position an iterator at the beginning of the information associated with the a region owned by the calling process - //and another iterator right at the end of the information - nodes_iterator1 = std::find_if >::iterator, checkerNode >(nodes_.begin(), nodes_.end(), unaryPredicate); - nodes_iterator2 = std::find_if_not >::iterator, checkerNode >(nodes_iterator1, nodes_.end(), unaryPredicate); + // We position an iterator at the beginning of the information associated with the a region owned by the calling process + // and another iterator right at the end of the information + nodes_iterator1 = std::find_if >::iterator, checkerNode >(nodes_.begin(), nodes_.end(), unaryPredicate); + nodes_iterator2 = std::find_if_not >::iterator, checkerNode >(nodes_iterator1, nodes_.end(), unaryPredicate); - //Coun the number of mesh nodes inside a region + // Coun the number of mesh nodes inside a region int num_region_nodes = nodes_iterator2 - nodes_iterator1; - typename Array< std::tuple >::iterator nodes_iterator_aux; + typename Array >::iterator nodes_iterator_aux; - //The policy assumes that in the input file the indexBase for the node label is 1 + // The policy assumes that in the input file the indexBase for the node label is 1 GlobalOrdinal region_node_label = 1; - for( nodes_iterator_aux=nodes_iterator1; nodes_iterator_aux!=nodes_iterator2; ++nodes_iterator_aux ) - { + for (nodes_iterator_aux = nodes_iterator1; nodes_iterator_aux != nodes_iterator2; ++nodes_iterator_aux) { GlobalOrdinal node = std::get<0>(*nodes_iterator_aux); checkerNodesToRegion unaryPredicateNode(node); - typename Array< std::tuple > >::iterator nodes_to_region_iterator; - nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(nodesToRegion_.begin(), nodesToRegion_.end(), unaryPredicateNode); - Array nodal_regions = std::get<1>(*nodes_to_region_iterator); + typename Array > >::iterator nodes_to_region_iterator; + nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(nodesToRegion_.begin(), nodesToRegion_.end(), unaryPredicateNode); + Array nodal_regions = std::get<1>(*nodes_to_region_iterator); - //By default, I choose that a node is owned by the process associated with the region that shows up first in its list of beloning - //This guarantees that each row of the composite stiffness matrix is owned only by a single process, as Trilinos requires - if( *iter_array==nodal_regions[0] ) - elements.push_back( node ); + // By default, I choose that a node is owned by the process associated with the region that shows up first in its list of beloning + // This guarantees that each row of the composite stiffness matrix is owned only by a single process, as Trilinos requires + if (*iter_array == nodal_regions[0]) + elements.push_back(node); - //Nodes on the interface still belong to multiple regions, so - //it is important to keep track of this for the row maps of region matrices + // Nodes on the interface still belong to multiple regions, so + // it is important to keep track of this for the row maps of region matrices region_elements.push_back(region_node_label); - //If a process owns a region (or even a portion of it), we provide to it a map - //from region indices to composite indices for all the nodes inside that region, - //even if a specific node is not owned by the calling process - regionToAll[*iter_array-1].push_back( std::make_tuple( region_node_label,std::get<0>(*nodes_iterator_aux) ) ); + // If a process owns a region (or even a portion of it), we provide to it a map + // from region indices to composite indices for all the nodes inside that region, + // even if a specific node is not owned by the calling process + regionToAll[*iter_array - 1].push_back(std::make_tuple(region_node_label, std::get<0>(*nodes_iterator_aux))); region_node_label++; } - //C++ indexing starts from 0, so everything is shofted backward by one to make it consistent with programming language's policies - for( typename Array::iterator iter = region_elements.begin(); iter!=region_elements.end(); ++iter ) + // C++ indexing starts from 0, so everything is shofted backward by one to make it consistent with programming language's policies + for (typename Array::iterator iter = region_elements.begin(); iter != region_elements.end(); ++iter) *iter = *iter - 1; - elements_per_region[*iter_array-1] = region_elements; - TEUCHOS_TEST_FOR_EXCEPTION( ( num_region_nodes!=regionToAll[*iter_array-1].size() ), Exceptions::RuntimeError, "Process ID: "<getRank()<<" - Number of region nodes does not match with number of nodes stored in regionToAll \n"<<"num_region_nodes= "<< num_region_nodes<<" whereas regionToAll["<<*iter_array-1<<"].size()= "<getRank() << " - Number of region nodes does not match with number of nodes stored in regionToAll \n" + << "num_region_nodes= " << num_region_nodes << " whereas regionToAll[" << *iter_array - 1 << "].size()= " << regionToAll[*iter_array - 1].size() << "\n"); } - TEUCHOS_TEST_FOR_EXCEPTION( ( num_total_regions_!=regionToAll.size() ), Exceptions::RuntimeError, "regionToAll size has been corrupted\n"<<"num_total_regions_ = "<getRank()<<" - Number of total regions does not match with regionHandler structures \n"); + TEUCHOS_TEST_FOR_EXCEPTION(!(procs_per_region_.size() == num_total_regions_), Exceptions::RuntimeError, "Process ID: " << comm_->getRank() << " - Number of total regions does not match with regionHandler structures \n"); Array region_procs; - while( !region_found ) - { + while (!region_found) { typename Array::iterator iter_proc; - for( GlobalOrdinal region_index=1; region_index<=procs_per_region_.size(); ++region_index ) - { - region_procs = std::get<1>(procs_per_region_[region_index-1]); - iter_proc = std::find( region_procs.begin(), region_procs.end(), myPID ); - if( iter_proc!=region_procs.end() ) - { - myRegion = region_index; + for (GlobalOrdinal region_index = 1; region_index <= procs_per_region_.size(); ++region_index) { + region_procs = std::get<1>(procs_per_region_[region_index - 1]); + iter_proc = std::find(region_procs.begin(), region_procs.end(), myPID); + if (iter_proc != region_procs.end()) { + myRegion = region_index; region_found = true; } } } - TEUCHOS_TEST_FOR_EXCEPTION( ( myRegion == -1 || !region_found ), Exceptions::RuntimeError, ( "Region containing PROC ID: "+ std::to_string(myPID) + " NOT FOUND \n" ) ); - region_procs = std::get<1>(procs_per_region_[myRegion-1]); + TEUCHOS_TEST_FOR_EXCEPTION((myRegion == -1 || !region_found), Exceptions::RuntimeError, ("Region containing PROC ID: " + std::to_string(myPID) + " NOT FOUND \n")); + region_procs = std::get<1>(procs_per_region_[myRegion - 1]); checkerNode unaryPredicate(myRegion); - typename Array< std::tuple >::iterator nodes_iterator1; - typename Array< std::tuple >::iterator nodes_iterator2; - nodes_iterator1 = std::find_if >::iterator, checkerNode >(nodes_.begin(), nodes_.end(), unaryPredicate); - nodes_iterator2 = std::find_if_not >::iterator, checkerNode >(nodes_iterator1, nodes_.end(), unaryPredicate); + typename Array >::iterator nodes_iterator1; + typename Array >::iterator nodes_iterator2; + nodes_iterator1 = std::find_if >::iterator, checkerNode >(nodes_.begin(), nodes_.end(), unaryPredicate); + nodes_iterator2 = std::find_if_not >::iterator, checkerNode >(nodes_iterator1, nodes_.end(), unaryPredicate); int num_region_nodes = nodes_iterator2 - nodes_iterator1; int num_region_procs = region_procs.size(); - if( num_region_nodes < num_region_procs ) - { + if (num_region_nodes < num_region_procs) { Array region_procs_reduced; region_procs_reduced.clear(); - for(int i = 0; i::iterator proc_iterator; proc_iterator = std::find::iterator, GlobalOrdinal>(region_procs_reduced.begin(), region_procs_reduced.end(), myPID); - if( proc_iterator!=region_procs_reduced.end() )//This reasoning works because the PROC ID for each region has been previously sorted in ascending order + if (proc_iterator != region_procs_reduced.end()) // This reasoning works because the PROC ID for each region has been previously sorted in ascending order { - GlobalOrdinal node = std::get<0>( *( nodes_iterator1+(proc_iterator-region_procs_reduced.begin()+1) ) ); - GlobalOrdinal region_node_label = proc_iterator-region_procs_reduced.begin()+1; + GlobalOrdinal node = std::get<0>(*(nodes_iterator1 + (proc_iterator - region_procs_reduced.begin() + 1))); + GlobalOrdinal region_node_label = proc_iterator - region_procs_reduced.begin() + 1; checkerNodesToRegion unaryPredicateNode(node); - typename Array< std::tuple > >::iterator nodes_to_region_iterator; - nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(nodesToRegion_.begin(), nodesToRegion_.end(), unaryPredicateNode); - Array nodal_regions = std::get<1>(*nodes_to_region_iterator); + typename Array > >::iterator nodes_to_region_iterator; + nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(nodesToRegion_.begin(), nodesToRegion_.end(), unaryPredicateNode); + Array nodal_regions = std::get<1>(*nodes_to_region_iterator); - //The follolwing if statement is necessary to guarantee uniqueness of the RowMap used for the composite matrix - if( myRegion == nodal_regions[0] ) - elements.push_back( node ); + // The follolwing if statement is necessary to guarantee uniqueness of the RowMap used for the composite matrix + if (myRegion == nodal_regions[0]) + elements.push_back(node); - //Although a process does not own a row in the composite matrix, it may still happen that it owns the row - //from a region matrix perspective + // Although a process does not own a row in the composite matrix, it may still happen that it owns the row + // from a region matrix perspective region_elements.push_back(region_node_label); } - //If a process owns a region (or even a portion of it), we provide to it a map - //from region indices to composite indices for all the nodes inside that region, - //even if a specific node is not owned by the calling process - //If a process owns something of a region, then the process has a global view of who owns what for that region - //Although this may seem more information than what actually needed, it is important for the computation of the collapsing. - //If the collapsing is not calculated, then this structure actually overestimates what a process needs to know. - for( proc_iterator=region_procs_reduced.begin(); proc_iterator!=region_procs_reduced.end(); ++proc_iterator ) - { - GlobalOrdinal node = std::get<0>( *( nodes_iterator1+(proc_iterator-region_procs_reduced.begin()+1) ) ); - GlobalOrdinal region_node_label = proc_iterator-region_procs_reduced.begin()+1; + // If a process owns a region (or even a portion of it), we provide to it a map + // from region indices to composite indices for all the nodes inside that region, + // even if a specific node is not owned by the calling process + // If a process owns something of a region, then the process has a global view of who owns what for that region + // Although this may seem more information than what actually needed, it is important for the computation of the collapsing. + // If the collapsing is not calculated, then this structure actually overestimates what a process needs to know. + for (proc_iterator = region_procs_reduced.begin(); proc_iterator != region_procs_reduced.end(); ++proc_iterator) { + GlobalOrdinal node = std::get<0>(*(nodes_iterator1 + (proc_iterator - region_procs_reduced.begin() + 1))); + GlobalOrdinal region_node_label = proc_iterator - region_procs_reduced.begin() + 1; checkerNodesToRegion unaryPredicateNode(node); - typename Array< std::tuple > >::iterator nodes_to_region_iterator; - nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(nodesToRegion_.begin(), nodesToRegion_.end(), unaryPredicateNode); - regionToAll[myRegion-1].push_back( std::make_tuple(region_node_label, node) ); + typename Array > >::iterator nodes_to_region_iterator; + nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(nodesToRegion_.begin(), nodesToRegion_.end(), unaryPredicateNode); + regionToAll[myRegion - 1].push_back(std::make_tuple(region_node_label, node)); } - } - else if( num_region_nodes == num_region_procs ) - { + } else if (num_region_nodes == num_region_procs) { typename Array::iterator proc_iterator; proc_iterator = std::find::iterator, GlobalOrdinal>(region_procs.begin(), region_procs.end(), myPID); - if( proc_iterator!=region_procs.end() )//This reasoning works because the PROC ID for each region has been previously sorted in ascending order + if (proc_iterator != region_procs.end()) // This reasoning works because the PROC ID for each region has been previously sorted in ascending order { - GlobalOrdinal node = std::get<0>( *( nodes_iterator1+(proc_iterator-region_procs.begin()+1) ) ); - GlobalOrdinal region_node_label = proc_iterator-region_procs.begin()+1; + GlobalOrdinal node = std::get<0>(*(nodes_iterator1 + (proc_iterator - region_procs.begin() + 1))); + GlobalOrdinal region_node_label = proc_iterator - region_procs.begin() + 1; checkerNodesToRegion unaryPredicateNode(node); - typename Array< std::tuple > >::iterator nodes_to_region_iterator; - nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(nodesToRegion_.begin(), nodesToRegion_.end(), unaryPredicateNode); - Array nodal_regions = std::get<1>(*nodes_to_region_iterator); - if( myRegion == nodal_regions[0] ) - elements.push_back( node ); + typename Array > >::iterator nodes_to_region_iterator; + nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(nodesToRegion_.begin(), nodesToRegion_.end(), unaryPredicateNode); + Array nodal_regions = std::get<1>(*nodes_to_region_iterator); + if (myRegion == nodal_regions[0]) + elements.push_back(node); region_elements.push_back(region_node_label); } - //If a process owns a region (or even a portion of it), we provide to it a map - //from region indices to composite indices for all the nodes inside that region, - //even if a specific node is not owned by the calling process - for( proc_iterator=region_procs.begin(); proc_iterator!=region_procs.end(); ++proc_iterator ) - { - GlobalOrdinal node = std::get<0>( *( nodes_iterator1+(proc_iterator-region_procs.begin()+1) ) ); - GlobalOrdinal region_node_label = proc_iterator-region_procs.begin()+1; + // If a process owns a region (or even a portion of it), we provide to it a map + // from region indices to composite indices for all the nodes inside that region, + // even if a specific node is not owned by the calling process + for (proc_iterator = region_procs.begin(); proc_iterator != region_procs.end(); ++proc_iterator) { + GlobalOrdinal node = std::get<0>(*(nodes_iterator1 + (proc_iterator - region_procs.begin() + 1))); + GlobalOrdinal region_node_label = proc_iterator - region_procs.begin() + 1; checkerNodesToRegion unaryPredicateNode(node); - typename Array< std::tuple > >::iterator nodes_to_region_iterator; - nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(nodesToRegion_.begin(), nodesToRegion_.end(), unaryPredicateNode); - regionToAll[myRegion-1].push_back( std::make_tuple(region_node_label, node) ); + typename Array > >::iterator nodes_to_region_iterator; + nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(nodesToRegion_.begin(), nodesToRegion_.end(), unaryPredicateNode); + regionToAll[myRegion - 1].push_back(std::make_tuple(region_node_label, node)); } - } - else - { + } else { typename Array::iterator proc_iterator; proc_iterator = std::find::iterator, GlobalOrdinal>(region_procs.begin(), region_procs.end(), myPID); - int num_nodes_proc = std::ceil( static_cast(num_region_nodes)/static_cast(num_region_procs) ); + int num_nodes_proc = std::ceil(static_cast(num_region_nodes) / static_cast(num_region_procs)); int num_procs_extra_node = num_region_nodes % num_region_procs; - if( proc_iterator-region_procs.begin()+1 <= num_procs_extra_node || num_procs_extra_node == 0 ) - { - int init_node = num_nodes_proc * ( proc_iterator-region_procs.begin() ); - for( int i=0; i( *( nodes_iterator1 + init_node + i ) ); + if (proc_iterator - region_procs.begin() + 1 <= num_procs_extra_node || num_procs_extra_node == 0) { + int init_node = num_nodes_proc * (proc_iterator - region_procs.begin()); + for (int i = 0; i < num_nodes_proc; ++i) { + GlobalOrdinal node = std::get<0>(*(nodes_iterator1 + init_node + i)); GlobalOrdinal region_node_label = init_node + i + 1; checkerNodesToRegion unaryPredicateNode(node); - typename Array< std::tuple > >::iterator nodes_to_region_iterator; - nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(nodesToRegion_.begin(), nodesToRegion_.end(), unaryPredicateNode); - Array nodal_regions = std::get<1>(*nodes_to_region_iterator); - if( myRegion == nodal_regions[0] ) - elements.push_back( node ); + typename Array > >::iterator nodes_to_region_iterator; + nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(nodesToRegion_.begin(), nodesToRegion_.end(), unaryPredicateNode); + Array nodal_regions = std::get<1>(*nodes_to_region_iterator); + if (myRegion == nodal_regions[0]) + elements.push_back(node); region_elements.push_back(region_node_label); } - } - else - { - int init_node = num_nodes_proc * num_procs_extra_node + (proc_iterator - region_procs.begin() - num_procs_extra_node) * (num_nodes_proc-1); - for( int i=0; i( *( nodes_iterator1 + init_node + i ) ); + } else { + int init_node = num_nodes_proc * num_procs_extra_node + (proc_iterator - region_procs.begin() - num_procs_extra_node) * (num_nodes_proc - 1); + for (int i = 0; i < num_nodes_proc - 1; ++i) { + GlobalOrdinal node = std::get<0>(*(nodes_iterator1 + init_node + i)); GlobalOrdinal region_node_label = init_node + i + 1; checkerNodesToRegion unaryPredicateNode(node); - typename Array< std::tuple > >::iterator nodes_to_region_iterator; - nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(nodesToRegion_.begin(), nodesToRegion_.end(), unaryPredicateNode); - Array nodal_regions = std::get<1>(*nodes_to_region_iterator); - if( myRegion == nodal_regions[0] ) - elements.push_back( node ); + typename Array > >::iterator nodes_to_region_iterator; + nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(nodesToRegion_.begin(), nodesToRegion_.end(), unaryPredicateNode); + Array nodal_regions = std::get<1>(*nodes_to_region_iterator); + if (myRegion == nodal_regions[0]) + elements.push_back(node); region_elements.push_back(region_node_label); } } - //If a process owns a region (or even a portion of it), we provide to it a map - //from region indices to composite indices for all the nodes inside that region, - //even if a specific node is not owned by the calling process - for( proc_iterator=region_procs.begin(); proc_iterator!=region_procs.end(); ++proc_iterator ) - { - if( proc_iterator-region_procs.begin()+1 <= num_procs_extra_node || num_procs_extra_node == 0 ) - { - int init_node = num_nodes_proc * ( proc_iterator-region_procs.begin() ); - for( int i=0; i( *( nodes_iterator1 + init_node + i ) ); + // If a process owns a region (or even a portion of it), we provide to it a map + // from region indices to composite indices for all the nodes inside that region, + // even if a specific node is not owned by the calling process + for (proc_iterator = region_procs.begin(); proc_iterator != region_procs.end(); ++proc_iterator) { + if (proc_iterator - region_procs.begin() + 1 <= num_procs_extra_node || num_procs_extra_node == 0) { + int init_node = num_nodes_proc * (proc_iterator - region_procs.begin()); + for (int i = 0; i < num_nodes_proc; ++i) { + GlobalOrdinal node = std::get<0>(*(nodes_iterator1 + init_node + i)); GlobalOrdinal region_node_label = init_node + i + 1; checkerNodesToRegion unaryPredicateNode(node); - typename Array< std::tuple > >::iterator nodes_to_region_iterator; - nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(nodesToRegion_.begin(), nodesToRegion_.end(), unaryPredicateNode); - regionToAll[myRegion-1].push_back( std::make_tuple(region_node_label, node) ); + typename Array > >::iterator nodes_to_region_iterator; + nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(nodesToRegion_.begin(), nodesToRegion_.end(), unaryPredicateNode); + regionToAll[myRegion - 1].push_back(std::make_tuple(region_node_label, node)); } - } - else - { - int init_node = num_nodes_proc * num_procs_extra_node + (proc_iterator - region_procs.begin() - num_procs_extra_node) * (num_nodes_proc-1); - for( int i=0; i( *( nodes_iterator1 + init_node + i ) ); + } else { + int init_node = num_nodes_proc * num_procs_extra_node + (proc_iterator - region_procs.begin() - num_procs_extra_node) * (num_nodes_proc - 1); + for (int i = 0; i < num_nodes_proc - 1; ++i) { + GlobalOrdinal node = std::get<0>(*(nodes_iterator1 + init_node + i)); GlobalOrdinal region_node_label = init_node + i + 1; checkerNodesToRegion unaryPredicateNode(node); - typename Array< std::tuple > >::iterator nodes_to_region_iterator; - nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(nodesToRegion_.begin(), nodesToRegion_.end(), unaryPredicateNode); - regionToAll[myRegion-1].push_back( std::make_tuple(region_node_label, node) ); + typename Array > >::iterator nodes_to_region_iterator; + nodes_to_region_iterator = std::find_if > >::iterator, checkerNodesToRegion >(nodesToRegion_.begin(), nodesToRegion_.end(), unaryPredicateNode); + regionToAll[myRegion - 1].push_back(std::make_tuple(region_node_label, node)); } } } - } - TEUCHOS_TEST_FOR_EXCEPTION( ( num_total_regions_!=regionToAll.size() ), Exceptions::RuntimeError, "Process ID: "<getRank()<<" - regionToAll size has been corrupted\n"<<"num_total_regions_ = "<getRank() << " - regionToAll size has been corrupted\n" + << "num_total_regions_ = " << num_total_regions_ << " whereas regionToAll.size()= " << regionToAll.size() << "\n"); - //C++ indexing starts from 0, so everything is shofted backward by one to make it consistent with programming language's policies - for( typename Array::iterator iter = region_elements.begin(); iter!=region_elements.end(); ++iter ) + // C++ indexing starts from 0, so everything is shofted backward by one to make it consistent with programming language's policies + for (typename Array::iterator iter = region_elements.begin(); iter != region_elements.end(); ++iter) *iter = *iter - 1; - elements_per_region[myRegion-1] = region_elements; + elements_per_region[myRegion - 1] = region_elements; } - //C++ indexing starts from 0, so everything is shofted backward by one to make it consistent with programming language's policies - for( typename Array::iterator iter = elements.begin(); iter!=elements.end(); ++iter ) + // C++ indexing starts from 0, so everything is shofted backward by one to make it consistent with programming language's policies + for (typename Array::iterator iter = elements.begin(); iter != elements.end(); ++iter) *iter = *iter - 1; maps_.composite_map_ = elements; - maps_.region_maps_ = elements_per_region; - maps_.regionToAll_ = regionToAll; + maps_.region_maps_ = elements_per_region; + maps_.regionToAll_ = regionToAll; - for (int i = 0; i < comm_->getSize(); ++i) - { + for (int i = 0; i < comm_->getSize(); ++i) { comm_->barrier(); - if (comm_->getRank() == i) - { + if (comm_->getRank() == i) { std::cout << "Proc " << i << std::endl; - for (int i = 0; i < regionToAll.size(); ++i) - { - Teuchos::Array > currArray = regionToAll[i]; - for (int j = 0; j < currArray.size(); ++j) - { + for (int i = 0; i < regionToAll.size(); ++i) { + Teuchos::Array > currArray = regionToAll[i]; + for (int j = 0; j < currArray.size(); ++j) { std::cout << std::get<0>(currArray[j]) << "\t" << std::get<1>(currArray[j]) << std::endl; } } @@ -607,68 +553,57 @@ void RegionHandler::CreateRowMaps() } } - // Get methods to allow a user to interface with private members of the RegionHandler class template -Array RegionHandler::GetRegionRowMap(GlobalOrdinal region_index)const -{ - TEUCHOS_TEST_FOR_EXCEPTION( region_index>=num_total_regions_, Exceptions::RuntimeError, "Value of region index exceeds total number of regions stored \n"<<"Trying to access informaiton about region "< RegionHandler::GetRegionRowMap(GlobalOrdinal region_index) const { + TEUCHOS_TEST_FOR_EXCEPTION(region_index >= num_total_regions_, Exceptions::RuntimeError, "Value of region index exceeds total number of regions stored \n" + << "Trying to access informaiton about region " << region_index << " when the total number of regions stored is " << num_total_regions_ << "\n"); return maps_.region_maps_[region_index]; } - template -Array > > RegionHandler::GetRegionToAll()const -{ +Array > > RegionHandler::GetRegionToAll() const { return maps_.regionToAll_; } - template -Array > RegionHandler::GetRegionToAll(GlobalOrdinal region_index)const -{ - TEUCHOS_TEST_FOR_EXCEPTION( region_index>=num_total_regions_, Exceptions::RuntimeError, "Value of region index exceeds total number of regions stored \n"<<"Trying to access informaiton about region "< > RegionHandler::GetRegionToAll(GlobalOrdinal region_index) const { + TEUCHOS_TEST_FOR_EXCEPTION(region_index >= num_total_regions_, Exceptions::RuntimeError, "Value of region index exceeds total number of regions stored \n" + << "Trying to access informaiton about region " << region_index << " when the total number of regions stored is " << num_total_regions_ << "\n"); return maps_.regionToAll_[region_index]; } template -void RegionHandler::printView() const -{ - if( 0==comm_->getRank() ) - { - std::cout<<"Total number of mesh nodes: "<(nodes_[i]) <<"\t"<< std::get<1>(nodes_[i]) <::printView() const { + if (0 == comm_->getRank()) { + std::cout << "Total number of mesh nodes: " << num_total_nodes_ << std::endl; + std::cout << "Total number of mesh regions: " << num_total_regions_ << std::endl; + std::cout << "Number of rows in nodes_ structure: " << nodes_.size() << std::endl; + for (int i = 0; i < nodes_.size(); ++i) { + std::cout << std::get<0>(nodes_[i]) << "\t" << std::get<1>(nodes_[i]) << std::endl; } } } -//Print methods +// Print methods template -void RegionHandler::printNodesToRegion() const -{ - if( 0==comm_->getRank() ) - { - std::cout<<"Total number of mesh nodes: "<(nodesToRegion_[i]) <<"\t belongs to regions: "<< std::get<1>(nodesToRegion_[i]) <::printNodesToRegion() const { + if (0 == comm_->getRank()) { + std::cout << "Total number of mesh nodes: " << num_total_nodes_ << std::endl; + std::cout << "Total number of mesh regions: " << num_total_regions_ << std::endl; + std::cout << "Number of rows in nodes_ structure: " << nodes_.size() << std::endl; + for (int i = 0; i < nodesToRegion_.size(); ++i) { + std::cout << "Node " << std::get<0>(nodesToRegion_[i]) << "\t belongs to regions: " << std::get<1>(nodesToRegion_[i]) << std::endl; } } } template -void RegionHandler::printInactive() const -{ - if( maps_.composite_map_.empty() ) - std::cout<<"INACTIVE PROC ID: "<getRank()<::printInactive() const { + if (maps_.composite_map_.empty()) + std::cout << "INACTIVE PROC ID: " << comm_->getRank() << std::endl; } -} //namespace Xpetra +} // namespace Xpetra #endif diff --git a/packages/muelu/research/q2q1/MueLu_Q2Q1PFactory.hpp b/packages/muelu/research/q2q1/MueLu_Q2Q1PFactory.hpp index 53865418746d..5db995886422 100644 --- a/packages/muelu/research/q2q1/MueLu_Q2Q1PFactory.hpp +++ b/packages/muelu/research/q2q1/MueLu_Q2Q1PFactory.hpp @@ -60,182 +60,180 @@ namespace MueLu { - template - class Q2Q1PFactory : public PFactory { +template +class Q2Q1PFactory : public PFactory { #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor - Q2Q1PFactory() { } + //! Constructor + Q2Q1PFactory() {} - //! Destructor. - virtual ~Q2Q1PFactory() { } - //@} + //! Destructor. + virtual ~Q2Q1PFactory() {} + //@} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& fineLevel, Level& coarseLevel) const; + void DeclareInput(Level& fineLevel, Level& coarseLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - void Build (Level& fineLevel, Level& coarseLevel) const; - void BuildP(Level& fineLevel, Level& coarseLevel) const; + void Build(Level& fineLevel, Level& coarseLevel) const; + void BuildP(Level& fineLevel, Level& coarseLevel) const; - //@} - }; + //@} +}; - template - RCP Q2Q1PFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP Q2Q1PFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A"); - return validParamList; - } + return validParamList; +} - template - void Q2Q1PFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { - Input(fineLevel, "A"); - } +template +void Q2Q1PFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { + Input(fineLevel, "A"); +} - template - void Q2Q1PFactory::Build(Level& fineLevel, Level& coarseLevel) const { - return BuildP(fineLevel, coarseLevel); - } +template +void Q2Q1PFactory::Build(Level& fineLevel, Level& coarseLevel) const { + return BuildP(fineLevel, coarseLevel); +} - template - void Q2Q1PFactory::BuildP(Level& fineLevel, Level& coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); +template +void Q2Q1PFactory::BuildP(Level& fineLevel, Level& coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); - RCP A = Get< RCP >(fineLevel, "A"); + RCP A = Get >(fineLevel, "A"); - RCP rowMap = A->getRowMap(); + RCP rowMap = A->getRowMap(); - Xpetra::global_size_t N = rowMap->getGlobalNumElements(); + Xpetra::global_size_t N = rowMap->getGlobalNumElements(); - int V; - size_t n = as(sqrt(N)); - if (N == n*n) { - // pressure mode - V = 1; - GetOStream(Runtime1) << "Pressure mode" << std::endl; + int V; + size_t n = as(sqrt(N)); + if (N == n * n) { + // pressure mode + V = 1; + GetOStream(Runtime1) << "Pressure mode" << std::endl; + } else { + n = as(sqrt(N / 2)); + + if (N == 2 * n * n) { + // velocity mode + V = 2; + GetOStream(Runtime1) << "Velocity mode" << std::endl; } else { - n = as(sqrt(N/2)); - - if (N == 2*n*n) { - // velocity mode - V = 2; - GetOStream(Runtime1) << "Velocity mode" << std::endl; - } else { - throw Exceptions::RuntimeError("Matrix size (" + toString(N) + ") is incompatible with both velocity and pressure"); - } + throw Exceptions::RuntimeError("Matrix size (" + toString(N) + ") is incompatible with both velocity and pressure"); } + } + + const int C = 4; + Xpetra::global_size_t nc = (n - 1) / C + 1; + TEUCHOS_TEST_FOR_EXCEPTION(C * (nc - 1) + 1 != n, Exceptions::InvalidArgument, "Incorrect dim size: " << n); - const int C = 4; - Xpetra::global_size_t nc = (n-1)/C + 1; - TEUCHOS_TEST_FOR_EXCEPTION(C*(nc-1)+1 != n, Exceptions::InvalidArgument, "Incorrect dim size: " << n); + ArrayView elementList = rowMap->getLocalElementList(); + GO indexBase = rowMap->getIndexBase(); + // Calculate offsets + GO offset = (V == 2 ? 0 : 2 * (2 * n - 1) * (2 * n - 1)); + GO coarseOffset = (V == 2 ? 0 : 2 * (2 * nc - 1) * (2 * nc - 1)); - ArrayView elementList = rowMap->getLocalElementList(); - GO indexBase = rowMap->getIndexBase(); + GetOStream(Runtime1) << "offset = " << offset << ", coarseOffset = " << coarseOffset << std::endl; - // Calculate offsets - GO offset = (V == 2 ? 0 : 2*(2*n -1)*(2*n -1)); - GO coarseOffset = (V == 2 ? 0 : 2*(2*nc-1)*(2*nc-1)); + Array coarseList; + for (LO k = 0; k < elementList.size(); k += V) { + GO GID = elementList[k] - offset - indexBase; + GO i = (GID / V) % n, ii = i / C; + GO j = (GID / V) / n, jj = j / C; - GetOStream(Runtime1) << "offset = " << offset << ", coarseOffset = " << coarseOffset << std::endl; + if (i % C == 0 && j % C == 0) + for (int q = 0; q < V; q++) + coarseList.push_back(V * (jj * nc + ii) + q + coarseOffset); + } - Array coarseList; - for (LO k = 0; k < elementList.size(); k += V) { - GO GID = elementList[k] - offset - indexBase; - GO i = (GID / V) % n, ii = i/C; - GO j = (GID / V) / n, jj = j/C; + typedef Teuchos::ScalarTraits STS; + SC one = STS::one(); + + Xpetra::global_size_t INVALID = Teuchos::OrdinalTraits::invalid(); + std::vector stridingInfo(1, 1); + const int stridedBlockId = -1; + RCP coarseMap = StridedMapFactory ::Build(rowMap->lib(), INVALID, coarseList, indexBase, stridingInfo, rowMap->getComm(), stridedBlockId, coarseOffset); + RCP coarseNullspace = MultiVectorFactory::Build(coarseMap, 1); + coarseNullspace->putScalar(one); + + int nnzEstimate = 4; + RCP P = MatrixFactory::Build(rowMap, coarseMap, nnzEstimate); + + Array inds(nnzEstimate), inds1(nnzEstimate); + Array vals(nnzEstimate, one); + int sz; + for (LO k = 0; k < elementList.size(); k += V) { + GO GID = elementList[k] - offset - indexBase; + GO i = (GID / V) % n, ii = i / C; + GO j = (GID / V) / n, jj = j / C; + + if (i % C == 0 && j % C == 0) { + sz = 1; + inds[0] = jj * nc + ii; + + } else if (i % C == 0 && j % C != 0) { + sz = 2; + inds[0] = jj * nc + ii; + inds[1] = (jj + 1) * nc + ii; + + } else if (i % C != 0 && j % C == 0) { + sz = 2; + inds[0] = jj * nc + ii; + inds[1] = jj * nc + ii + 1; - if (i % C == 0 && j % C == 0) - for (int q = 0; q < V; q++) - coarseList.push_back(V*(jj*nc + ii) + q + coarseOffset); + } else { + sz = 4; + inds[0] = jj * nc + ii; + inds[1] = jj * nc + ii + 1; + inds[2] = (jj + 1) * nc + ii; + inds[3] = (jj + 1) * nc + ii + 1; } - typedef Teuchos::ScalarTraits STS; - SC one = STS::one(); - - Xpetra::global_size_t INVALID = Teuchos::OrdinalTraits::invalid(); - std::vector stridingInfo(1,1); - const int stridedBlockId = -1; - RCP coarseMap = StridedMapFactory ::Build(rowMap->lib(), INVALID, coarseList, indexBase, stridingInfo, rowMap->getComm(), stridedBlockId, coarseOffset); - RCP coarseNullspace = MultiVectorFactory::Build(coarseMap, 1); - coarseNullspace->putScalar(one); - - int nnzEstimate = 4; - RCP P = MatrixFactory::Build(rowMap, coarseMap, nnzEstimate); - - Array inds(nnzEstimate), inds1(nnzEstimate); - Array vals(nnzEstimate, one); - int sz; - for (LO k = 0; k < elementList.size(); k += V) { - GO GID = elementList[k] - offset - indexBase; - GO i = (GID/V) % n, ii = i/C; - GO j = (GID/V) / n, jj = j/C; - - if (i % C == 0 && j % C == 0) { - sz = 1; - inds[0] = jj *nc + ii ; - - } else if (i % C == 0 && j % C != 0) { - sz = 2; - inds[0] = jj *nc + ii ; - inds[1] = (jj+1)*nc + ii ; - - } else if (i % C != 0 && j % C == 0) { - sz = 2; - inds[0] = jj *nc + ii ; - inds[1] = jj *nc + ii+1; - - } else { - sz = 4; - inds[0] = jj *nc + ii ; - inds[1] = jj *nc + ii+1; - inds[2] = (jj+1)*nc + ii ; - inds[3] = (jj+1)*nc + ii+1; - } - - for (int q = 0; q < V; q++) { - for (int p = 0; p < sz; p++) - inds1[p] = V*inds[p]+q + coarseOffset; - - P->insertGlobalValues(elementList[k]+q, inds1.view(0,sz), vals.view(0,sz)); - } - } + for (int q = 0; q < V; q++) { + for (int p = 0; p < sz; p++) + inds1[p] = V * inds[p] + q + coarseOffset; - P->fillComplete(coarseMap, A->getDomainMap()); + P->insertGlobalValues(elementList[k] + q, inds1.view(0, sz), vals.view(0, sz)); + } + } - // Level Set - Set(coarseLevel, "Nullspace", coarseNullspace); - Set(coarseLevel, "P", P); - Set(fineLevel, "CoarseMap", coarseMap); + P->fillComplete(coarseMap, A->getDomainMap()); - if (IsPrint(Statistics1)) { - RCP params = rcp(new ParameterList()); - params->set("printLoadBalancingInfo", true); - params->set("printCommInfo", true); - GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*P, "P", params); - } + // Level Set + Set(coarseLevel, "Nullspace", coarseNullspace); + Set(coarseLevel, "P", P); + Set(fineLevel, "CoarseMap", coarseMap); + if (IsPrint(Statistics1)) { + RCP params = rcp(new ParameterList()); + params->set("printLoadBalancingInfo", true); + params->set("printCommInfo", true); + GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*P, "P", params); } +} -} //namespace MueLu +} // namespace MueLu -#endif // MUELU_Q2Q1PFACTORY_DECL_HPP +#endif // MUELU_Q2Q1PFACTORY_DECL_HPP diff --git a/packages/muelu/research/q2q1/MueLu_Q2Q1uPFactory.hpp b/packages/muelu/research/q2q1/MueLu_Q2Q1uPFactory.hpp index a98daccd10f4..dc3f51a0e0b9 100644 --- a/packages/muelu/research/q2q1/MueLu_Q2Q1uPFactory.hpp +++ b/packages/muelu/research/q2q1/MueLu_Q2Q1uPFactory.hpp @@ -68,703 +68,695 @@ namespace MueLu { - template - class MyCptList_ { - typedef LocalOrdinal LO; +template +class MyCptList_ { + typedef LocalOrdinal LO; - public: - MyCptList_(int n, int nnzPerRow = 100) { - TEUCHOS_TEST_FOR_EXCEPTION(nnzPerRow <= 0, Exceptions::RuntimeError, "Why is nnzPerRow " << nnzPerRow << "?"); + public: + MyCptList_(int n, int nnzPerRow = 100) { + TEUCHOS_TEST_FOR_EXCEPTION(nnzPerRow <= 0, Exceptions::RuntimeError, "Why is nnzPerRow " << nnzPerRow << "?"); - nnzPerRow_ = nnzPerRow; - storage_.resize(n * nnzPerRow); - numCpts_.resize(n, 0); - - list_.resize(n, NULL); - for (int i = 0; i < n; i++) - list_[i] = &storage_[i*nnzPerRow]; - } - - size_t getLocalNumRows() const { return list_.size(); } - int getNnzPerRow() const { return nnzPerRow_; } - std::vector& getNumCpts() { return numCpts_; } - Teuchos::Array& getCList() { return cptlist_; } - const Teuchos::Array& getCList() const { return cptlist_; } - const std::vector& getNumCpts() const { return numCpts_; } - LO* operator()(int i) { return list_[i]; } - const LO* operator()(int i) const { return list_[i]; } - - private: - std::vector list_; // list[k] gives the CPOINTs that interpolate to the k-th fine point - // These CPOINTs are given as fine grid *local* indices - std::vector numCpts_; // Number of CPOINTs for each point - Teuchos::Array cptlist_; // List of CPOINTs for each point - int nnzPerRow_; // Max number of CPOINTs per row in order for a row to use storage_ - std::vector storage_; // Large data array used to store most CPOINT information - }; + nnzPerRow_ = nnzPerRow; + storage_.resize(n * nnzPerRow); + numCpts_.resize(n, 0); + list_.resize(n, NULL); + for (int i = 0; i < n; i++) + list_[i] = &storage_[i * nnzPerRow]; + } - template - class Q2Q1uPFactory : public PFactory { + size_t getLocalNumRows() const { return list_.size(); } + int getNnzPerRow() const { return nnzPerRow_; } + std::vector& getNumCpts() { return numCpts_; } + Teuchos::Array& getCList() { return cptlist_; } + const Teuchos::Array& getCList() const { return cptlist_; } + const std::vector& getNumCpts() const { return numCpts_; } + LO* operator()(int i) { return list_[i]; } + const LO* operator()(int i) const { return list_[i]; } + + private: + std::vector list_; // list[k] gives the CPOINTs that interpolate to the k-th fine point + // These CPOINTs are given as fine grid *local* indices + std::vector numCpts_; // Number of CPOINTs for each point + Teuchos::Array cptlist_; // List of CPOINTs for each point + int nnzPerRow_; // Max number of CPOINTs per row in order for a row to use storage_ + std::vector storage_; // Large data array used to store most CPOINT information +}; + +template +class Q2Q1uPFactory : public PFactory { #include "MueLu_UseShortNames.hpp" - typedef MyCptList_ MyCptList; - - private: - enum Status { - UNASSIGNED = '0', - CANDIDATE = '1', - FPOINT = '2', - TWOTIMER = '3', - CPOINT = '4', - CPOINT_U = '5' - }; + typedef MyCptList_ MyCptList; + + private: + enum Status { + UNASSIGNED = '0', + CANDIDATE = '1', + FPOINT = '2', + TWOTIMER = '3', + CPOINT = '4', + CPOINT_U = '5' + }; std::string getStatusString(char status) const { switch (status) { case UNASSIGNED: return "UNASSIGNED"; - case CANDIDATE : return "CANDIDATE"; - case FPOINT : return "FPOINT"; - case TWOTIMER : return "TWOTIMER"; - case CPOINT : return "CPOINT"; - case CPOINT_U : return "CPOINT_U"; - default: return "UNKNOWN"; + case CANDIDATE: return "CANDIDATE"; + case FPOINT: return "FPOINT"; + case TWOTIMER: return "TWOTIMER"; + case CPOINT: return "CPOINT"; + case CPOINT_U: return "CPOINT_U"; + default: return "UNKNOWN"; } } - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor - Q2Q1uPFactory() { } + //! Constructor + Q2Q1uPFactory() {} - //! Destructor. - virtual ~Q2Q1uPFactory() { } - //@} + //! Destructor. + virtual ~Q2Q1uPFactory() {} + //@} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& fineLevel, Level& coarseLevel) const; + void DeclareInput(Level& fineLevel, Level& coarseLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - void Build (Level& fineLevel, Level& coarseLevel) const; - void BuildP(Level& fineLevel, Level& coarseLevel) const; + void Build(Level& fineLevel, Level& coarseLevel) const; + void BuildP(Level& fineLevel, Level& coarseLevel) const; - //@} + //@} - private: - void FindDist4Cpts (const Matrix& A, const MultiVector& coords, const Array& userCpts, std::vector& status, MyCptList& myCpts, int levelID) const; - void PhaseTwoPattern (const Matrix& A, const MultiVector& coords, const std::vector& status, MyCptList& myCpts) const; - void FindMidPoints (const Matrix& A, const MultiVector& coords, Array& Cptlist, const MyCptList& myCpts) const; - void CompDistances (const Matrix& A, LO start, int numDist, std::vector& dist1, std::vector& dist2, - std::vector& dist3, std::vector& dist4) const; - void CreateCrsPointers (const Matrix& A, ArrayRCP& ia, ArrayRCP& ja) const; - void CptDepends2Pattern(const Matrix& A, const MyCptList& myCpts, RCP& P, LO offset) const; + private: + void FindDist4Cpts(const Matrix& A, const MultiVector& coords, const Array& userCpts, std::vector& status, MyCptList& myCpts, int levelID) const; + void PhaseTwoPattern(const Matrix& A, const MultiVector& coords, const std::vector& status, MyCptList& myCpts) const; + void FindMidPoints(const Matrix& A, const MultiVector& coords, Array& Cptlist, const MyCptList& myCpts) const; + void CompDistances(const Matrix& A, LO start, int numDist, std::vector& dist1, std::vector& dist2, + std::vector& dist3, std::vector& dist4) const; + void CreateCrsPointers(const Matrix& A, ArrayRCP& ia, ArrayRCP& ja) const; + void CptDepends2Pattern(const Matrix& A, const MyCptList& myCpts, RCP& P, LO offset) const; - void DumpStatus(const std::string& filename, const std::vector& status, int NDim, bool isAmalgamated = true) const; - void DumpCoords(const std::string& filename, const MultiVector& coords) const; - }; + void DumpStatus(const std::string& filename, const std::vector& status, int NDim, bool isAmalgamated = true) const; + void DumpCoords(const std::string& filename, const MultiVector& coords) const; +}; - // Sort a double array and move along list2 to match sorted array - template - void Muelu_az_dsort2(std::vector& dlist, std::vector& list2) { - int l, r, j, i, flag; - int RR2; - SC dRR, dK; - - int N = dlist.size(); - if (N <= 1) return; - - l = N / 2 + 1; - r = N - 1; - l = l - 1; - dRR = dlist[l - 1]; - dK = dlist[l - 1]; - - if (list2.size()) { - RR2 = list2[l - 1]; - while (r != 0) { - j = l; - flag = 1; +// Sort a double array and move along list2 to match sorted array +template +void Muelu_az_dsort2(std::vector& dlist, std::vector& list2) { + int l, r, j, i, flag; + int RR2; + SC dRR, dK; + + int N = dlist.size(); + if (N <= 1) return; - while (flag == 1) { - i = j; - j = j + j; + l = N / 2 + 1; + r = N - 1; + l = l - 1; + dRR = dlist[l - 1]; + dK = dlist[l - 1]; - if (j > r + 1) + if (list2.size()) { + RR2 = list2[l - 1]; + while (r != 0) { + j = l; + flag = 1; + + while (flag == 1) { + i = j; + j = j + j; + + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (dlist[j] > dlist[j - 1]) j = j + 1; + + if (dlist[j - 1] > dK) { + dlist[i - 1] = dlist[j - 1]; + list2[i - 1] = list2[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (dlist[j] > dlist[j - 1]) j = j + 1; - - if (dlist[j - 1] > dK) { - dlist[i - 1] = dlist[j - 1]; - list2[i - 1] = list2[j - 1]; - } - else { - flag = 0; - } } } - dlist[i - 1] = dRR; - list2[i - 1] = RR2; - - if (l == 1) { - dRR = dlist[r]; - RR2 = list2[r]; - dK = dlist[r]; - dlist[r] = dlist[0]; - list2[r] = list2[0]; - r = r - 1; - } - else { - l = l - 1; - dRR = dlist[l - 1]; - RR2 = list2[l - 1]; - dK = dlist[l - 1]; - } } - dlist[0] = dRR; - list2[0] = RR2; + dlist[i - 1] = dRR; + list2[i - 1] = RR2; + + if (l == 1) { + dRR = dlist[r]; + RR2 = list2[r]; + dK = dlist[r]; + dlist[r] = dlist[0]; + list2[r] = list2[0]; + r = r - 1; + } else { + l = l - 1; + dRR = dlist[l - 1]; + RR2 = list2[l - 1]; + dK = dlist[l - 1]; + } } - else { - while (r != 0) { - j = l; - flag = 1; - while (flag == 1) { - i = j; - j = j + j; - if (j > r + 1) + dlist[0] = dRR; + list2[0] = RR2; + } else { + while (r != 0) { + j = l; + flag = 1; + while (flag == 1) { + i = j; + j = j + j; + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (dlist[j] > dlist[j - 1]) j = j + 1; + if (dlist[j - 1] > dK) { + dlist[i - 1] = dlist[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (dlist[j] > dlist[j - 1]) j = j + 1; - if (dlist[j - 1] > dK) { - dlist[i - 1] = dlist[j - 1]; - } - else { - flag = 0; - } } } - dlist[i - 1] = dRR; - if (l == 1) { - dRR = dlist[r]; - dK = dlist[r]; - dlist[r] = dlist[0]; - r = r - 1; - } - else { - l = l - 1; - dRR = dlist[l - 1]; - dK = dlist[l - 1]; - } } - dlist[0] = dRR; + dlist[i - 1] = dRR; + if (l == 1) { + dRR = dlist[r]; + dK = dlist[r]; + dlist[r] = dlist[0]; + r = r - 1; + } else { + l = l - 1; + dRR = dlist[l - 1]; + dK = dlist[l - 1]; + } } + dlist[0] = dRR; } +} + +/* ******************************************************************* */ +/* sort an array and move along list2 and/or list to match sorted array*/ +/* ------------------------------------------------------------------- */ +template +void Muelu_az_sort(int list[], int N, int list2[], SC list3[]) { + int l, r, RR, K, j, i, flag; + int RR2; + SC RR3; + + if (N <= 1) return; + + l = N / 2 + 1; + r = N - 1; + l = l - 1; + RR = list[l - 1]; + K = list[l - 1]; + + if ((list2 != NULL) && (list3 != NULL)) { + RR2 = list2[l - 1]; + RR3 = list3[l - 1]; + while (r != 0) { + j = l; + flag = 1; - /* ******************************************************************* */ - /* sort an array and move along list2 and/or list to match sorted array*/ - /* ------------------------------------------------------------------- */ - template - void Muelu_az_sort(int list[], int N, int list2[], SC list3[]) { - int l, r, RR, K, j, i, flag; - int RR2; - SC RR3; - - if (N <= 1) return; - - l = N / 2 + 1; - r = N - 1; - l = l - 1; - RR = list[l - 1]; - K = list[l - 1]; - - if ((list2 != NULL) && (list3 != NULL)) { - RR2 = list2[l - 1]; - RR3 = list3[l - 1]; - while (r != 0) { - j = l; - flag = 1; + while (flag == 1) { + i = j; + j = j + j; - while (flag == 1) { - i = j; - j = j + j; + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (list[j] > list[j - 1]) j = j + 1; - if (j > r + 1) + if (list[j - 1] > K) { + list[i - 1] = list[j - 1]; + list2[i - 1] = list2[j - 1]; + list3[i - 1] = list3[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (list[j] > list[j - 1]) j = j + 1; - - if (list[j - 1] > K) { - list [i - 1] = list [j - 1]; - list2[i - 1] = list2[j - 1]; - list3[i - 1] = list3[j - 1]; - } - else { - flag = 0; - } } } + } - list [i - 1] = RR; - list2[i - 1] = RR2; - list3[i - 1] = RR3; + list[i - 1] = RR; + list2[i - 1] = RR2; + list3[i - 1] = RR3; - if (l == 1) { - RR = list [r]; - RR2 = list2[r]; - RR3 = list3[r]; + if (l == 1) { + RR = list[r]; + RR2 = list2[r]; + RR3 = list3[r]; - K = list[r]; - list[r ] = list[0]; - list2[r] = list2[0]; - list3[r] = list3[0]; - r = r - 1; - } - else { - l = l - 1; - RR = list [l - 1]; - RR2 = list2[l - 1]; - RR3 = list3[l - 1]; - K = list [l - 1]; - } + K = list[r]; + list[r] = list[0]; + list2[r] = list2[0]; + list3[r] = list3[0]; + r = r - 1; + } else { + l = l - 1; + RR = list[l - 1]; + RR2 = list2[l - 1]; + RR3 = list3[l - 1]; + K = list[l - 1]; } - - list [0] = RR; - list2[0] = RR2; - list3[0] = RR3; } - else if (list2 != NULL) { - RR2 = list2[l - 1]; - while (r != 0) { - j = l; - flag = 1; - while (flag == 1) { - i = j; - j = j + j; + list[0] = RR; + list2[0] = RR2; + list3[0] = RR3; + } else if (list2 != NULL) { + RR2 = list2[l - 1]; + while (r != 0) { + j = l; + flag = 1; + + while (flag == 1) { + i = j; + j = j + j; + + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (list[j] > list[j - 1]) j = j + 1; - if (j > r + 1) + if (list[j - 1] > K) { + list[i - 1] = list[j - 1]; + list2[i - 1] = list2[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (list[j] > list[j - 1]) j = j + 1; - - if (list[j - 1] > K) { - list [i - 1] = list [j - 1]; - list2[i - 1] = list2[j - 1]; - } - else { - flag = 0; - } } } + } - list [i - 1] = RR; - list2[i - 1] = RR2; + list[i - 1] = RR; + list2[i - 1] = RR2; - if (l == 1) { - RR = list [r]; - RR2 = list2[r]; + if (l == 1) { + RR = list[r]; + RR2 = list2[r]; - K = list[r]; - list[r ] = list[0]; - list2[r] = list2[0]; - r = r - 1; - } - else { - l = l - 1; - RR = list [l - 1]; - RR2 = list2[l - 1]; - K = list [l - 1]; - } + K = list[r]; + list[r] = list[0]; + list2[r] = list2[0]; + r = r - 1; + } else { + l = l - 1; + RR = list[l - 1]; + RR2 = list2[l - 1]; + K = list[l - 1]; } - - list [0] = RR; - list2[0] = RR2; } - else if (list3 != NULL) { - RR3 = list3[l - 1]; - while (r != 0) { - j = l; - flag = 1; - while (flag == 1) { - i = j; - j = j + j; + list[0] = RR; + list2[0] = RR2; + } else if (list3 != NULL) { + RR3 = list3[l - 1]; + while (r != 0) { + j = l; + flag = 1; + + while (flag == 1) { + i = j; + j = j + j; + + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (list[j] > list[j - 1]) j = j + 1; - if (j > r + 1) + if (list[j - 1] > K) { + list[i - 1] = list[j - 1]; + list3[i - 1] = list3[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (list[j] > list[j - 1]) j = j + 1; - - if (list[j - 1] > K) { - list [i - 1] = list [j - 1]; - list3[i - 1] = list3[j - 1]; - } - else { - flag = 0; - } } } + } - list [i - 1] = RR; - list3[i - 1] = RR3; + list[i - 1] = RR; + list3[i - 1] = RR3; - if (l == 1) { - RR = list [r]; - RR3 = list3[r]; + if (l == 1) { + RR = list[r]; + RR3 = list3[r]; - K = list[r]; - list[r ] = list[0]; - list3[r] = list3[0]; - r = r - 1; - } - else { - l = l - 1; - RR = list [l - 1]; - RR3 = list3[l - 1]; - K = list [l - 1]; - } + K = list[r]; + list[r] = list[0]; + list3[r] = list3[0]; + r = r - 1; + } else { + l = l - 1; + RR = list[l - 1]; + RR3 = list3[l - 1]; + K = list[l - 1]; } + } - list [0] = RR; - list3[0] = RR3; + list[0] = RR; + list3[0] = RR3; - } - else { - while (r != 0) { - j = l; - flag = 1; + } else { + while (r != 0) { + j = l; + flag = 1; - while (flag == 1) { - i = j; - j = j + j; + while (flag == 1) { + i = j; + j = j + j; + + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (list[j] > list[j - 1]) j = j + 1; - if (j > r + 1) + if (list[j - 1] > K) { + list[i - 1] = list[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (list[j] > list[j - 1]) j = j + 1; - - if (list[j - 1] > K) { - list[i - 1] = list[j - 1]; - } - else { - flag = 0; - } } } + } - list[i - 1] = RR; + list[i - 1] = RR; - if (l == 1) { - RR = list [r]; + if (l == 1) { + RR = list[r]; - K = list[r]; - list[r ] = list[0]; - r = r - 1; - } - else { - l = l - 1; - RR = list[l - 1]; - K = list[l - 1]; - } + K = list[r]; + list[r] = list[0]; + r = r - 1; + } else { + l = l - 1; + RR = list[l - 1]; + K = list[l - 1]; } - - list[0] = RR; } + + list[0] = RR; } +} + +// Merge two already sorted lists into one combined sorted list. +// NOTE: lists are given as integer arrays. These integer arrays give +// locations in CoordDist[] defining the list values. That the ith value +// associated with the Candidates list is actually CoordDist[Candidates[i]]. +template +void MergeSort(std::vector& oldCandidates, size_t numOldCandidates, const std::vector& newCandidates, const std::vector& coordDist, ArrayRCP ia) { + size_t numNewCandidates = newCandidates.size(); + size_t numCandidates = numOldCandidates + numNewCandidates; + + oldCandidates.resize(numCandidates); + + int i = numOldCandidates - 1; + int j = numNewCandidates - 1; + int k = numCandidates - 1; + while ((i >= 0) || (j >= 0)) { + if (i < 0) + oldCandidates[k--] = newCandidates[j--]; + else if (j < 0) + oldCandidates[k--] = oldCandidates[i--]; + else { + int ii = oldCandidates[i]; + int jj = newCandidates[j]; - // Merge two already sorted lists into one combined sorted list. - // NOTE: lists are given as integer arrays. These integer arrays give - // locations in CoordDist[] defining the list values. That the ith value - // associated with the Candidates list is actually CoordDist[Candidates[i]]. - template - void MergeSort(std::vector& oldCandidates, size_t numOldCandidates, const std::vector& newCandidates, const std::vector& coordDist, ArrayRCP ia) { - size_t numNewCandidates = newCandidates.size(); - size_t numCandidates = numOldCandidates + numNewCandidates; - - oldCandidates.resize(numCandidates); - - int i = numOldCandidates - 1; - int j = numNewCandidates - 1; - int k = numCandidates - 1; - while ((i >= 0) || (j >= 0)) { - if (i < 0) oldCandidates[k--] = newCandidates[j--]; - else if (j < 0) oldCandidates[k--] = oldCandidates[i--]; - else { - int ii = oldCandidates[i]; - int jj = newCandidates[j]; - - // Must match code above. There is something arbitrary - // and crappy about the current weighting. + // Must match code above. There is something arbitrary + // and crappy about the current weighting. #ifdef optimal - if (-coordDist[ii] - .01*(ia[ii+1]-ia[ii]) + 1.e-10*(ii+1) < - -coordDist[jj] - .01*(ia[jj+1]-ia[jj]) + 1.e-10*(jj+1)) + if (-coordDist[ii] - .01 * (ia[ii + 1] - ia[ii]) + 1.e-10 * (ii + 1) < + -coordDist[jj] - .01 * (ia[jj + 1] - ia[jj]) + 1.e-10 * (jj + 1)) #else - if (coordDist[ii] - .0*(ia[ii+1]-ia[ii]) + 1.e-3*(ii+1) < - coordDist[jj] - .0*(ia[jj+1]-ia[jj]) + 1.e-3*(jj+1)) - // if (ii < jj) + if (coordDist[ii] - .0 * (ia[ii + 1] - ia[ii]) + 1.e-3 * (ii + 1) < + coordDist[jj] - .0 * (ia[jj + 1] - ia[jj]) + 1.e-3 * (jj + 1)) + // if (ii < jj) #endif - oldCandidates[k--] = newCandidates[j--]; - else - oldCandidates[k--] = oldCandidates[i--]; - } + oldCandidates[k--] = newCandidates[j--]; + else + oldCandidates[k--] = oldCandidates[i--]; } } +} - template - RCP Q2Q1uPFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set< RCP >("A", null, "Generating factory of the matrix A"); - - RCP rcpThis = rcpFromRef(*this); - validParamList->set< RCP >("CoordinatesVelocity", rcpThis, "Generating factory of the coordinates"); - validParamList->set< RCP >("AForPat", rcpThis, "Generating factory for Apattern"); - validParamList->set< RCP >("CoordinatesPressure", rcpThis, "Generating factory of the coordinates"); - validParamList->set< RCP >("p2vMap", rcpThis, "Mapping of pressure coords to u-velocity coords"); - - validParamList->set< std::string > ("mode", "pressure", "Mode"); - validParamList->set< bool > ("phase2", false, "Use extra phase to improve pattern"); - validParamList->set< bool > ("dump status", false, "Output status"); - - validParamList->set< double > ("tau_2", sqrt(0.0015), "tau_2 parameter from the paper (used for mid points)"); - - return validParamList; - } - - template - void Q2Q1uPFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { - Input(fineLevel, "A"); - - const ParameterList& pL = GetParameterList(); - bool pressureMode = (pL.get("mode") == "pressure"); - - // NOTE: we cannot simply do Input(fineLevel, "CoordinatePressure", as in - // valid parameter list we specified *this as the generating factory - if (fineLevel.GetLevelID()) { - if (pressureMode) { - Input(fineLevel, "CoordinatesPressure"); - } else { - Input(fineLevel, "CoordinatesVelocity"); - Input(fineLevel, "AForPat"); - Input(fineLevel, "p2vMap"); - } - } - } +template +RCP Q2Q1uPFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - template - void Q2Q1uPFactory::Build(Level& fineLevel, Level& coarseLevel) const { - return BuildP(fineLevel, coarseLevel); - } + validParamList->set >("A", null, "Generating factory of the matrix A"); - template - void Q2Q1uPFactory::BuildP(Level& fineLevel, Level& coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); + RCP rcpThis = rcpFromRef(*this); + validParamList->set >("CoordinatesVelocity", rcpThis, "Generating factory of the coordinates"); + validParamList->set >("AForPat", rcpThis, "Generating factory for Apattern"); + validParamList->set >("CoordinatesPressure", rcpThis, "Generating factory of the coordinates"); + validParamList->set >("p2vMap", rcpThis, "Mapping of pressure coords to u-velocity coords"); - typedef Teuchos::ScalarTraits STS; + validParamList->set("mode", "pressure", "Mode"); + validParamList->set("phase2", false, "Use extra phase to improve pattern"); + validParamList->set("dump status", false, "Output status"); - const ParameterList& pL = GetParameterList(); - bool pressureMode = (pL.get("mode") == "pressure"); - GetOStream(Runtime0) << (pressureMode ? "Pressure" : "Velocity") << " mode" << std::endl; + validParamList->set("tau_2", sqrt(0.0015), "tau_2 parameter from the paper (used for mid points)"); - bool fineLevelID = fineLevel.GetLevelID(); + return validParamList; +} - RCP A = Get< RCP >(fineLevel, "A"); - Xpetra::global_size_t N = A->getRowMap()->getGlobalNumElements(); +template +void Q2Q1uPFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { + Input(fineLevel, "A"); - RCP myCpts = rcp(new MyCptList(N)); - std::vector status(N, UNASSIGNED); + const ParameterList& pL = GetParameterList(); + bool pressureMode = (pL.get("mode") == "pressure"); - RCP coords; - RCP AForPat; - int NDim = -1; + // NOTE: we cannot simply do Input(fineLevel, "CoordinatePressure", as in + // valid parameter list we specified *this as the generating factory + if (fineLevel.GetLevelID()) { if (pressureMode) { - if (fineLevelID == 0) coords = fineLevel.Get< RCP > ("CoordinatesPressure", NoFactory::get()); - else coords = Get< RCP >(fineLevel, "CoordinatesPressure"); - NDim = coords->getNumVectors(); - - Array userCpts; // pressure does not reuse any CPOINTs - FindDist4Cpts(*A, *coords, userCpts, status, *myCpts, fineLevelID); - - if (pL.get("phase2")) { - // Beef up any limited pattern - PhaseTwoPattern(*A, *coords, status, *myCpts); - } - + Input(fineLevel, "CoordinatesPressure"); } else { - // Do all the coarsening/pattern stuff on amalgamated velocities. - // We need to guarantee that all velocity dofs are treated identically - // This means that we must amalgmate AForPat and the velocity coordinates - if (fineLevelID == 0) coords = fineLevel.Get< RCP > ("CoordinatesVelocity", NoFactory::get()); - else coords = Get< RCP >(fineLevel, "CoordinatesVelocity"); - if (fineLevelID == 0) AForPat = fineLevel.Get< RCP > ("AForPat", NoFactory::get()); - else AForPat = Get< RCP > (fineLevel, "AForPat"); - NDim = coords->getNumVectors(); + Input(fineLevel, "CoordinatesVelocity"); + Input(fineLevel, "AForPat"); + Input(fineLevel, "p2vMap"); + } + } +} - TEUCHOS_TEST_FOR_EXCEPTION(!coarseLevel.IsAvailable("PresCptsAndMids"), Exceptions::RuntimeError, - "Pressure points are not available"); +template +void Q2Q1uPFactory::Build(Level& fineLevel, Level& coarseLevel) const { + return BuildP(fineLevel, coarseLevel); +} - Array userCpts = coarseLevel.Get >("PresCptsAndMids"); - GetOStream(Runtime1) << "Found stored pressure C-points: " << userCpts.size() << " " << userCpts << std::endl; +template +void Q2Q1uPFactory::BuildP(Level& fineLevel, Level& coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); - TEUCHOS_TEST_FOR_EXCEPTION(N % NDim, Exceptions::RuntimeError, "Number of velocity DOFs is odd"); - Xpetra::global_size_t NN = N/NDim; + typedef Teuchos::ScalarTraits STS; - std::vector gNodeIds(NN); - for (size_t k = 0; k < NN; k++) - gNodeIds[k] = k; + const ParameterList& pL = GetParameterList(); + bool pressureMode = (pL.get("mode") == "pressure"); + GetOStream(Runtime0) << (pressureMode ? "Pressure" : "Velocity") << " mode" << std::endl; - RCP nodeMap = MapFactory::Build(AForPat->getRowMap()->lib(), NN, gNodeIds, 0, AForPat->getRowMap()->getComm()); + bool fineLevelID = fineLevel.GetLevelID(); - // FIXME: remove magic number 30 - RCP amalgA = MatrixFactory::Build(nodeMap, nodeMap, 30); - RCP amalgA_crs = rcp_dynamic_cast(amalgA)->getCrsMatrix(); + RCP A = Get >(fineLevel, "A"); + Xpetra::global_size_t N = A->getRowMap()->getGlobalNumElements(); - // FIXME: this should be written similar to CoalesceDropFactory Merge - for (LO row = 0; row < as(AForPat->getRowMap()->getLocalNumElements()); row += NDim) { - GO grid = AForPat->getRowMap()->getGlobalElement(row); - GO currentId = grid/NDim; + RCP myCpts = rcp(new MyCptList(N)); + std::vector status(N, UNASSIGNED); - Teuchos::ArrayView inds; - Teuchos::ArrayView vals; - AForPat->getLocalRowView(row, inds, vals); + RCP coords; + RCP AForPat; + int NDim = -1; + if (pressureMode) { + if (fineLevelID == 0) + coords = fineLevel.Get >("CoordinatesPressure", NoFactory::get()); + else + coords = Get >(fineLevel, "CoordinatesPressure"); + NDim = coords->getNumVectors(); - size_t nnz = inds.size(); + Array userCpts; // pressure does not reuse any CPOINTs + FindDist4Cpts(*A, *coords, userCpts, status, *myCpts, fineLevelID); - // Count the number of nonzero block columns in this row - // NOTE: this assumes that blocks are dense, i.e. that if one column is - // nonzero, then all columns in the same block are nonzeros - LO realnnz = 0; - for (LO col = 0; col < Teuchos::as(nnz); col++) - if (inds[col] % NDim == 0) - realnnz++; + if (pL.get("phase2")) { + // Beef up any limited pattern + PhaseTwoPattern(*A, *coords, status, *myCpts); + } - if (realnnz == 0) - continue; + } else { + // Do all the coarsening/pattern stuff on amalgamated velocities. + // We need to guarantee that all velocity dofs are treated identically + // This means that we must amalgmate AForPat and the velocity coordinates + if (fineLevelID == 0) + coords = fineLevel.Get >("CoordinatesVelocity", NoFactory::get()); + else + coords = Get >(fineLevel, "CoordinatesVelocity"); + if (fineLevelID == 0) + AForPat = fineLevel.Get >("AForPat", NoFactory::get()); + else + AForPat = Get >(fineLevel, "AForPat"); + NDim = coords->getNumVectors(); + + TEUCHOS_TEST_FOR_EXCEPTION(!coarseLevel.IsAvailable("PresCptsAndMids"), Exceptions::RuntimeError, + "Pressure points are not available"); + + Array userCpts = coarseLevel.Get >("PresCptsAndMids"); + GetOStream(Runtime1) << "Found stored pressure C-points: " << userCpts.size() << " " << userCpts << std::endl; + + TEUCHOS_TEST_FOR_EXCEPTION(N % NDim, Exceptions::RuntimeError, "Number of velocity DOFs is odd"); + Xpetra::global_size_t NN = N / NDim; + + std::vector gNodeIds(NN); + for (size_t k = 0; k < NN; k++) + gNodeIds[k] = k; + + RCP nodeMap = MapFactory::Build(AForPat->getRowMap()->lib(), NN, gNodeIds, 0, AForPat->getRowMap()->getComm()); + + // FIXME: remove magic number 30 + RCP amalgA = MatrixFactory::Build(nodeMap, nodeMap, 30); + RCP amalgA_crs = rcp_dynamic_cast(amalgA)->getCrsMatrix(); + + // FIXME: this should be written similar to CoalesceDropFactory Merge + for (LO row = 0; row < as(AForPat->getRowMap()->getLocalNumElements()); row += NDim) { + GO grid = AForPat->getRowMap()->getGlobalElement(row); + GO currentId = grid / NDim; + + Teuchos::ArrayView inds; + Teuchos::ArrayView vals; + AForPat->getLocalRowView(row, inds, vals); + + size_t nnz = inds.size(); + + // Count the number of nonzero block columns in this row + // NOTE: this assumes that blocks are dense, i.e. that if one column is + // nonzero, then all columns in the same block are nonzeros + LO realnnz = 0; + for (LO col = 0; col < Teuchos::as(nnz); col++) + if (inds[col] % NDim == 0) + realnnz++; + + if (realnnz == 0) + continue; - Teuchos::Array cnodeIds(realnnz, 0); - Teuchos::Array ones(realnnz, STS::one()); //Pattern has all 1's + Teuchos::Array cnodeIds(realnnz, 0); + Teuchos::Array ones(realnnz, STS::one()); // Pattern has all 1's - realnnz = 0; - for (LO col = 0; col < Teuchos::as(nnz); col++) { - if (inds[col] % NDim == 0) { - GO gcid = AForPat->getColMap()->getGlobalElement(inds[col]); - cnodeIds[realnnz++] = gcid/NDim; - } + realnnz = 0; + for (LO col = 0; col < Teuchos::as(nnz); col++) { + if (inds[col] % NDim == 0) { + GO gcid = AForPat->getColMap()->getGlobalElement(inds[col]); + cnodeIds[realnnz++] = gcid / NDim; } - amalgA_crs->insertGlobalValues(currentId, cnodeIds, ones); } - amalgA_crs->fillComplete(nodeMap, nodeMap); + amalgA_crs->insertGlobalValues(currentId, cnodeIds, ones); + } + amalgA_crs->fillComplete(nodeMap, nodeMap); - // Amalgmate the velocity coordinates - // NOTE: This assumes that the original coords vector contains duplicated (x NDim) degrees of freedom - RCP amalgCoords = Xpetra::MultiVectorFactory::Build(nodeMap, NDim); + // Amalgmate the velocity coordinates + // NOTE: This assumes that the original coords vector contains duplicated (x NDim) degrees of freedom + RCP amalgCoords = Xpetra::MultiVectorFactory::Build(nodeMap, NDim); - for (int j = 0; j < NDim; j++) { - ArrayRCP coordView = coords ->getDataNonConst(j); - ArrayRCP amalgCoordView = amalgCoords->getDataNonConst(j); - for (size_t k = 0; k < NN; k++) - amalgCoordView[k] = coordView[k*NDim]; - } + for (int j = 0; j < NDim; j++) { + ArrayRCP coordView = coords->getDataNonConst(j); + ArrayRCP amalgCoordView = amalgCoords->getDataNonConst(j); + for (size_t k = 0; k < NN; k++) + amalgCoordView[k] = coordView[k * NDim]; + } - // On the finest level, we must map userCpts (which corresponds to - // pressure cpts and pressure mid-points) to the velocity variables - // - // NOTE: on coarser levels the lower numbered velocity dofs correspond - // to points that are co-located with pressures and the two numberings - // are identical so no translation is needed. - if (fineLevelID == 0) { - ArrayRCP p2vMap = fineLevel.Get< ArrayRCP >("p2vMap", NoFactory::get()); - - for (int k = 0; k < userCpts.size(); k++) - userCpts[k] = p2vMap[userCpts[k]]/NDim; - } + // On the finest level, we must map userCpts (which corresponds to + // pressure cpts and pressure mid-points) to the velocity variables + // + // NOTE: on coarser levels the lower numbered velocity dofs correspond + // to points that are co-located with pressures and the two numberings + // are identical so no translation is needed. + if (fineLevelID == 0) { + ArrayRCP p2vMap = fineLevel.Get >("p2vMap", NoFactory::get()); + + for (int k = 0; k < userCpts.size(); k++) + userCpts[k] = p2vMap[userCpts[k]] / NDim; + } - GetOStream(Runtime1) << "Amalgamated velocity C-points: " << userCpts.size() << " " << userCpts << std::endl; + GetOStream(Runtime1) << "Amalgamated velocity C-points: " << userCpts.size() << " " << userCpts << std::endl; - // Now determine velocity CPOINTs for amalgamated system - RCP amalgCpts = rcp(new MyCptList(NN)); - std::vector amalgStatus(NN, UNASSIGNED); + // Now determine velocity CPOINTs for amalgamated system + RCP amalgCpts = rcp(new MyCptList(NN)); + std::vector amalgStatus(NN, UNASSIGNED); - FindDist4Cpts(*amalgA, *amalgCoords, userCpts, amalgStatus, *amalgCpts, fineLevelID); + FindDist4Cpts(*amalgA, *amalgCoords, userCpts, amalgStatus, *amalgCpts, fineLevelID); - if (pL.get("phase2")) { - // Beef up any limited pattern - PhaseTwoPattern(*amalgA, *amalgCoords, amalgStatus, *amalgCpts); - } + if (pL.get("phase2")) { + // Beef up any limited pattern + PhaseTwoPattern(*amalgA, *amalgCoords, amalgStatus, *amalgCpts); + } - // Unamalgamate data - Array& Cptlist = myCpts ->getCList(); - Array& amalgCptlist = amalgCpts->getCList(); - std::vector& numCpts = myCpts ->getNumCpts(); - std::vector& amalgNumCpts = amalgCpts->getNumCpts(); + // Unamalgamate data + Array& Cptlist = myCpts->getCList(); + Array& amalgCptlist = amalgCpts->getCList(); + std::vector& numCpts = myCpts->getNumCpts(); + std::vector& amalgNumCpts = amalgCpts->getNumCpts(); - int p = amalgCptlist.size(); + int p = amalgCptlist.size(); - Cptlist.resize(p*NDim); - for (int k = 0; k < p; k++) { - Cptlist[k*NDim] = amalgCptlist[k] * NDim; - for (int j = 1; j < NDim; j++) - Cptlist[k*NDim+j] = Cptlist[k*NDim] + j; - } + Cptlist.resize(p * NDim); + for (int k = 0; k < p; k++) { + Cptlist[k * NDim] = amalgCptlist[k] * NDim; + for (int j = 1; j < NDim; j++) + Cptlist[k * NDim + j] = Cptlist[k * NDim] + j; + } - for (Xpetra::global_size_t i = 0; i < NN; i++) { - for (int j = 0; j < NDim; j++) { - status [N-1-(i*NDim+j)] = amalgStatus [NN-1-(i)]; - numCpts[i*NDim+j] = amalgNumCpts[i]; - for (int k = 0; k < amalgNumCpts[i]; k++) - (*myCpts)(i*NDim+j)[k] = (*amalgCpts)(i)[k]*NDim + j; - } + for (Xpetra::global_size_t i = 0; i < NN; i++) { + for (int j = 0; j < NDim; j++) { + status[N - 1 - (i * NDim + j)] = amalgStatus[NN - 1 - (i)]; + numCpts[i * NDim + j] = amalgNumCpts[i]; + for (int k = 0; k < amalgNumCpts[i]; k++) + (*myCpts)(i * NDim + j)[k] = (*amalgCpts)(i)[k] * NDim + j; } } + } - const bool doStatusOutput = pL.get("dump status"); - if (doStatusOutput) { - const Array& Cptlist = myCpts->getCList(); - std::vector& numCpts = myCpts->getNumCpts(); - - std::string depPrefix = std::string("dep0-l") + toString(fineLevel.GetLevelID()) + (pressureMode ? "-p-" : "-v-"); - - std::vector depStatus(N); - // Graph is unamalgamted, so we need to skip some CPOINTs as they are - // essentially duplicated for different velocities - for (int k = 0; k < Cptlist.size(); k += NDim) { + const bool doStatusOutput = pL.get("dump status"); + if (doStatusOutput) { + const Array& Cptlist = myCpts->getCList(); + std::vector& numCpts = myCpts->getNumCpts(); - for (Xpetra::global_size_t i = 0; i < N; i++) { - bool isPresent = false; - for (int j = 0; j < numCpts[i]; j++) - if ((*myCpts)(i)[j] == Cptlist[k]) - isPresent = true; - depStatus[i] = (isPresent ? FPOINT : UNASSIGNED); - } - depStatus[Cptlist[k]] = CPOINT; + std::string depPrefix = std::string("dep0-l") + toString(fineLevel.GetLevelID()) + (pressureMode ? "-p-" : "-v-"); - DumpStatus(depPrefix + toString(k), depStatus, NDim, false); + std::vector depStatus(N); + // Graph is unamalgamted, so we need to skip some CPOINTs as they are + // essentially duplicated for different velocities + for (int k = 0; k < Cptlist.size(); k += NDim) { + for (Xpetra::global_size_t i = 0; i < N; i++) { + bool isPresent = false; + for (int j = 0; j < numCpts[i]; j++) + if ((*myCpts)(i)[j] == Cptlist[k]) + isPresent = true; + depStatus[i] = (isPresent ? FPOINT : UNASSIGNED); } + depStatus[Cptlist[k]] = CPOINT; + + DumpStatus(depPrefix + toString(k), depStatus, NDim, false); } + } - RCP P; - // FIXME :hardwired hack, pressure gids must not overlap with velocity gids - if (pressureMode) CptDepends2Pattern(*A, *myCpts, P, 999999); - else CptDepends2Pattern(*AForPat, *myCpts, P, 0); + RCP P; + // FIXME :hardwired hack, pressure gids must not overlap with velocity gids + if (pressureMode) + CptDepends2Pattern(*A, *myCpts, P, 999999); + else + CptDepends2Pattern(*AForPat, *myCpts, P, 0); #if 0 if (pressureMode) { @@ -776,1080 +768,1079 @@ namespace MueLu { } #endif - // Construct coarse map - RCP coarseMap = P->getDomainMap(); + // Construct coarse map + RCP coarseMap = P->getDomainMap(); - // Construct coarse nullspace - RCP coarseNullspace = MultiVectorFactory::Build(coarseMap, 1); - coarseNullspace->putScalar(STS::one()); + // Construct coarse nullspace + RCP coarseNullspace = MultiVectorFactory::Build(coarseMap, 1); + coarseNullspace->putScalar(STS::one()); - // Construct coarse coordinates - const Array& Cptlist = myCpts->getCList(); - RCP coarseCoords = MultiVectorFactory::Build(coarseMap, NDim); - for (int k = 0; k < NDim; k++) { - ArrayRCP coords1D = coords ->getData(k); - ArrayRCP coarseCoords1D = coarseCoords->getDataNonConst(k); + // Construct coarse coordinates + const Array& Cptlist = myCpts->getCList(); + RCP coarseCoords = MultiVectorFactory::Build(coarseMap, NDim); + for (int k = 0; k < NDim; k++) { + ArrayRCP coords1D = coords->getData(k); + ArrayRCP coarseCoords1D = coarseCoords->getDataNonConst(k); - for (int i = 0; i < coarseCoords1D.size(); i++) - coarseCoords1D[i] = coords1D[Cptlist[i]]; - } - - // Level Set - Set(coarseLevel, "P", P); - Set(fineLevel, "CoarseMap", coarseMap); - if (pressureMode) { - Set(coarseLevel, "CoordinatesPressure", coarseCoords); - - } else { - Set(coarseLevel, "CoordinatesVelocity", coarseCoords); - // FIXME: why does coarse pattern matrix look like? - RCP AP = Xpetra::MatrixMatrix::Multiply(*AForPat, false, *P, false, GetOStream(Statistics2), true, true); - RCP RAP = Xpetra::MatrixMatrix::Multiply(*P, true, *AP, false, GetOStream(Statistics2), true, true); - Set(coarseLevel, "AForPat", RAP); - } - Set(coarseLevel, "Nullspace", coarseNullspace); - - // Compute data for velocity - if (pressureMode) { - Array velCptlist = Cptlist; - FindMidPoints(*A, *coords, velCptlist, *myCpts); - coarseLevel.Set >("PresCptsAndMids", velCptlist, NoFactory::get()); - } + for (int i = 0; i < coarseCoords1D.size(); i++) + coarseCoords1D[i] = coords1D[Cptlist[i]]; } - template - void PrintVector(const std::vector& v, const std::string& name, int n = -1) { - std::cout << "======================" << std::endl; - if (!name.empty()) - std::cout << "=== " << name << " ===" << std::endl; - if (n == -1) - n = v.size(); - for (int i = 0; i < n; i++) - std::cout << i << ": " << v[i] << std::endl; - std::cout << "======================" << std::endl; + // Level Set + Set(coarseLevel, "P", P); + Set(fineLevel, "CoarseMap", coarseMap); + if (pressureMode) { + Set(coarseLevel, "CoordinatesPressure", coarseCoords); + + } else { + Set(coarseLevel, "CoordinatesVelocity", coarseCoords); + // FIXME: why does coarse pattern matrix look like? + RCP AP = Xpetra::MatrixMatrix::Multiply(*AForPat, false, *P, false, GetOStream(Statistics2), true, true); + RCP RAP = Xpetra::MatrixMatrix::Multiply(*P, true, *AP, false, GetOStream(Statistics2), true, true); + Set(coarseLevel, "AForPat", RAP); } + Set(coarseLevel, "Nullspace", coarseNullspace); - // distance2 returns _squared_ distance - // i.e. the final sqrt calculation is not done - template - SC distance2(const ArrayRCP >& coords1D, int i, int j) { - const int NDim = coords1D.size(); - - SC d = Teuchos::ScalarTraits::zero(); - for (int k = 0; k < NDim; k++) { - SC dtmp = coords1D[k][j] - coords1D[k][i]; - d += dtmp*dtmp; - } - - return d; + // Compute data for velocity + if (pressureMode) { + Array velCptlist = Cptlist; + FindMidPoints(*A, *coords, velCptlist, *myCpts); + coarseLevel.Set >("PresCptsAndMids", velCptlist, NoFactory::get()); + } +} + +template +void PrintVector(const std::vector& v, const std::string& name, int n = -1) { + std::cout << "======================" << std::endl; + if (!name.empty()) + std::cout << "=== " << name << " ===" << std::endl; + if (n == -1) + n = v.size(); + for (int i = 0; i < n; i++) + std::cout << i << ": " << v[i] << std::endl; + std::cout << "======================" << std::endl; +} + +// distance2 returns _squared_ distance +// i.e. the final sqrt calculation is not done +template +SC distance2(const ArrayRCP >& coords1D, int i, int j) { + const int NDim = coords1D.size(); + + SC d = Teuchos::ScalarTraits::zero(); + for (int k = 0; k < NDim; k++) { + SC dtmp = coords1D[k][j] - coords1D[k][i]; + d += dtmp * dtmp; } - template - std::string i2s(int i) { - std::ostringstream os; - if (i >= N) { - os << i; - } else { - if (i < 10) os << "0"; - if (i < 100) os << "0"; - if (i < 1000) os << "0"; - if (i < 10000) os << "0"; - if (i < 100000) os << "0"; - if (i < 1000000) os << "0"; - if (i < 10000000) os << "0"; - if (i < 100000000) os << "0"; - if (i < 1000000000) os << "0"; - } + return d; +} + +template +std::string i2s(int i) { + std::ostringstream os; + if (i >= N) { os << i; - return os.str(); + } else { + if (i < 10) os << "0"; + if (i < 100) os << "0"; + if (i < 1000) os << "0"; + if (i < 10000) os << "0"; + if (i < 100000) os << "0"; + if (i < 1000000) os << "0"; + if (i < 10000000) os << "0"; + if (i < 100000000) os << "0"; + if (i < 1000000000) os << "0"; + } + os << i; + return os.str(); +} + +// Initial fill Cptlist with a set of distance 4 points (during phase one). +// Additional Cpts are then determined looking for large gaps between the +// phase one Cpts. Candidate additional Cpts corresponds to phase one FPOINTs +// that have only 1 or 2 Cpts within a graph distance of 3 and are generally +// far (via graph or coordinate distances) from existing Cpts. We also define +// a sparsity pattern. An initial pattern is computed which basically +// includes all FPOINTs within a distance 3 from a Cpt. Additional entries +// are added to the initial sparsity pattern via PhaseTwoPattern(). These +// points correspond to Fpoints that only interpolate from 2 or less Cpts, +// are also far from existing Cpoints, and where the orientation of the +// interpolation Cpts is skewed to one side of the Fpoint (see +// PhaseTwoPattern for more details). +// +// NOTE: inefficiencies +// The main inefficiency is the sorting of the CandidateList. This is +// expensive and it is not clear how important it really is. My guess is that +// we could actually leave the CandidateList unsorted and things would work +// almost as well. We could also do some kind of hybrid where we sort the +// first 100 candidates. Use this data to create something like 10 bins and +// for any further candidates just put them in the right bin. When we need to +// choose a new Cpt from the CandidateList, just pick any one from the lowest +// populated bin. There are also potential inefficiences with respect to +// malloc(). I doubt that these are a big problem, but one could allocate +// some workspaces ahead of time to avoid the constant malloc/free cycle in +// CompDistances(). +template +void Q2Q1uPFactory:: + FindDist4Cpts(const Matrix& A, const MultiVector& coords, const Array& userCpts, std::vector& status, MyCptList& myCpts, int levelID) const { + int NDim = coords.getNumVectors(); + size_t numRows = A.getLocalNumRows(); + + ArrayRCP ia; + ArrayRCP ja; + CreateCrsPointers(A, ia, ja); + + ArrayRCP > coords1D(NDim); + for (int k = 0; k < NDim; k++) + coords1D[k] = coords.getData(k); + + typedef Teuchos::ScalarTraits STS; + SC zero = STS::zero(); + + // Initialize coordDist to some large value. + // coordDist is an attempt to measure an average distance from a given + // point to all the CPOINTs that it depends on. The averages are harmonic, so + // basically the initial large coordDist will be averaged away with the 1st + // harmonic average. The initial big value computed here is + // (Max(x)-Min(x))^2 + (Max(y)-Min(y))^2 + (Max(z)-Min(z))^2 + SC big = zero; + for (int i = 0; i < NDim; i++) { + SC dmin = *std::min_element(coords1D[i].begin(), coords1D[i].end()); + SC dmax = *std::max_element(coords1D[i].begin(), coords1D[i].end()); + + big += ((dmax - dmin) * (dmax - dmin)); + } + // MATCH_MATLAB + std::vector coordDist(numRows, 10000 * big); + + const ParameterList& pL = GetParameterList(); + const bool doStatusOutput = pL.get("dump status"); + const bool pressureMode = (pL.get("mode") == "pressure"); + + // Set all Dirichlet points as Fpoints FIXME: why FPOINTs? + // However, if a Dirichlet point is in userCpts, it will be added to the + // Cpt list later + for (size_t i = 0; i < numRows; i++) + if (ia[i + 1] - ia[i] == 1) + status[i] = FPOINT; + + // userCpts have already been fixed to be CPOINTs so we want to first mark + // them appropriately, and put them first in the CPOINT list, but still go + // through loops below to update distances and FPOINTs. Initialization is + // done here so that these points do not end up with more than 1 nnz in the + // associated sparsity pattern row. + TEUCHOS_TEST_FOR_EXCEPTION(myCpts.getCList().size(), Exceptions::RuntimeError, + "myCpts in FindDist4Points must be uninitialized"); + Array& Cptlist = myCpts.getCList(); + for (int i = 0; i < userCpts.size(); i++) { + status[userCpts[i]] = CPOINT_U; + Cptlist.push_back(userCpts[i]); } + std::string st = std::string("status-l") + toString(levelID) + (pressureMode ? "-p-" : "-v-"); + int dumpCount = 0; + if (doStatusOutput) { + DumpCoords("coord-l" + toString(levelID) + (pressureMode ? "-p" : "-v"), coords); + DumpStatus(st + i2s(dumpCount++) + "-A", status, NDim); + } - // Initial fill Cptlist with a set of distance 4 points (during phase one). - // Additional Cpts are then determined looking for large gaps between the - // phase one Cpts. Candidate additional Cpts corresponds to phase one FPOINTs - // that have only 1 or 2 Cpts within a graph distance of 3 and are generally - // far (via graph or coordinate distances) from existing Cpts. We also define - // a sparsity pattern. An initial pattern is computed which basically - // includes all FPOINTs within a distance 3 from a Cpt. Additional entries - // are added to the initial sparsity pattern via PhaseTwoPattern(). These - // points correspond to Fpoints that only interpolate from 2 or less Cpts, - // are also far from existing Cpoints, and where the orientation of the - // interpolation Cpts is skewed to one side of the Fpoint (see - // PhaseTwoPattern for more details). - // - // NOTE: inefficiencies - // The main inefficiency is the sorting of the CandidateList. This is - // expensive and it is not clear how important it really is. My guess is that - // we could actually leave the CandidateList unsorted and things would work - // almost as well. We could also do some kind of hybrid where we sort the - // first 100 candidates. Use this data to create something like 10 bins and - // for any further candidates just put them in the right bin. When we need to - // choose a new Cpt from the CandidateList, just pick any one from the lowest - // populated bin. There are also potential inefficiences with respect to - // malloc(). I doubt that these are a big problem, but one could allocate - // some workspaces ahead of time to avoid the constant malloc/free cycle in - // CompDistances(). - template - void Q2Q1uPFactory:: - FindDist4Cpts(const Matrix& A, const MultiVector& coords, const Array& userCpts, std::vector& status, MyCptList& myCpts, int levelID) const { - int NDim = coords.getNumVectors(); - size_t numRows = A.getLocalNumRows(); - - ArrayRCP ia; - ArrayRCP ja; - CreateCrsPointers(A, ia, ja); - - ArrayRCP > coords1D(NDim); - for (int k = 0; k < NDim; k++) - coords1D[k] = coords.getData(k); - - typedef Teuchos::ScalarTraits STS; - SC zero = STS::zero(); - - // Initialize coordDist to some large value. - // coordDist is an attempt to measure an average distance from a given - // point to all the CPOINTs that it depends on. The averages are harmonic, so - // basically the initial large coordDist will be averaged away with the 1st - // harmonic average. The initial big value computed here is - // (Max(x)-Min(x))^2 + (Max(y)-Min(y))^2 + (Max(z)-Min(z))^2 - SC big = zero; - for (int i = 0; i < NDim; i++) { - SC dmin = *std::min_element(coords1D[i].begin(), coords1D[i].end()); - SC dmax = *std::max_element(coords1D[i].begin(), coords1D[i].end()); - - big += ((dmax - dmin)*(dmax - dmin)); + std::vector& numCpts = myCpts.getNumCpts(); + + // Determine CPOINTs + int userCcount = 0; + size_t numCandidates = 0; + std::vector distIncrement(numRows, 0); + std::vector cumGraphDist(numRows, 0.); + std::vector candidateList(numRows, 0); + size_t i = 0; + while (i < numRows) { + LO newCpt = -1; + + // Check userCpts list + // + // These were essentially already determined to be CPOINTs by some other + // functions. We want to then add them one-by-one, updating distances, + // and look to see if further CPOINTs can be added once we have finished + // all of the userCpts + if (userCcount < userCpts.size()) + newCpt = userCpts[userCcount++]; + + // Check for possible CPOINT on candidate list + // FIXME: Could CANDIDATE list contain non-CANDIDATE statuses? + while ((newCpt == -1) && (numCandidates > 0)) { + if (status[candidateList[numCandidates - 1]] <= CANDIDATE) { + newCpt = candidateList[numCandidates - 1]; + status[newCpt] = CPOINT; + + if (doStatusOutput) + DumpStatus(st + i2s(dumpCount++) + "-B", status, NDim); + } + numCandidates--; + // FIXME: Why is there no i++ here? } - // MATCH_MATLAB - std::vector coordDist(numRows, 10000*big); - const ParameterList& pL = GetParameterList(); - const bool doStatusOutput = pL.get("dump status"); - const bool pressureMode = (pL.get("mode") == "pressure"); + // If no new CPOINT identified in candidate list, check the unassigned list + while ((newCpt == -1) && (i < numRows)) { + if (status[i] == UNASSIGNED) { + newCpt = i; + status[newCpt] = CPOINT; - // Set all Dirichlet points as Fpoints FIXME: why FPOINTs? - // However, if a Dirichlet point is in userCpts, it will be added to the - // Cpt list later - for (size_t i = 0; i < numRows; i++) - if (ia[i+1] - ia[i] == 1) - status[i] = FPOINT; - - // userCpts have already been fixed to be CPOINTs so we want to first mark - // them appropriately, and put them first in the CPOINT list, but still go - // through loops below to update distances and FPOINTs. Initialization is - // done here so that these points do not end up with more than 1 nnz in the - // associated sparsity pattern row. - TEUCHOS_TEST_FOR_EXCEPTION(myCpts.getCList().size(), Exceptions::RuntimeError, - "myCpts in FindDist4Points must be uninitialized"); - Array& Cptlist = myCpts.getCList(); - for (int i = 0; i < userCpts.size(); i++) { - status[userCpts[i]] = CPOINT_U; - Cptlist.push_back(userCpts[i]); - } - - std::string st = std::string("status-l") + toString(levelID) + (pressureMode ? "-p-" : "-v-"); - int dumpCount = 0; - if (doStatusOutput) { - DumpCoords("coord-l" + toString(levelID) + (pressureMode ? "-p" : "-v"), coords); - DumpStatus(st + i2s(dumpCount++) + "-A", status, NDim); + if (doStatusOutput) + DumpStatus(st + i2s(dumpCount++) + "-C", status, NDim); + } + i++; } - std::vector& numCpts = myCpts.getNumCpts(); - - // Determine CPOINTs - int userCcount = 0; - size_t numCandidates = 0; - std::vector distIncrement(numRows, 0); - std::vector cumGraphDist (numRows, 0.); - std::vector candidateList(numRows, 0); - size_t i = 0; - while (i < numRows) { - LO newCpt = -1; - - // Check userCpts list - // - // These were essentially already determined to be CPOINTs by some other - // functions. We want to then add them one-by-one, updating distances, - // and look to see if further CPOINTs can be added once we have finished - // all of the userCpts - if (userCcount < userCpts.size()) - newCpt = userCpts[userCcount++]; - - // Check for possible CPOINT on candidate list - // FIXME: Could CANDIDATE list contain non-CANDIDATE statuses? - while ((newCpt == -1) && (numCandidates > 0)) { - if (status[candidateList[numCandidates-1]] <= CANDIDATE) { - newCpt = candidateList[numCandidates-1]; - status[newCpt] = CPOINT; - - if (doStatusOutput) - DumpStatus(st + i2s(dumpCount++) + "-B", status, NDim); - } - numCandidates--; - // FIXME: Why is there no i++ here? + // Update distances and the status of neighbors neighbors to reflect a + // newly found CPOINT + if (newCpt != -1) { + std::vector dist1, dist2, dist3, dist4; + // FIXME: Should CompDistances automatically exclude other CPOINTs? + CompDistances(A, newCpt, 4, dist1, dist2, dist3, dist4); + + // Make sure that the only CPOINT in dist3 is newCpt. All others should be excluded. + int numDist3 = 0; + for (size_t k = 0; k < dist3.size(); k++) { + LO j = dist3[k]; + if (status[j] < CPOINT) + dist3[numDist3++] = j; } - - // If no new CPOINT identified in candidate list, check the unassigned list - while ((newCpt == -1) && (i < numRows)) { - if (status[i] == UNASSIGNED) { - newCpt = i; - status[newCpt] = CPOINT; - - if (doStatusOutput) - DumpStatus(st + i2s(dumpCount++) + "-C", status, NDim); + dist3.resize(numDist3); + dist3.push_back(newCpt); + + // UNASSIGNED or CANDIDATE distance 3 and closer neighbors are put into FPOINT list + // FIXME: why not put TWOTIMER there too? + bool dumpStatus = false; + for (size_t k = 0; k < dist3.size(); k++) { + LO j = dist3[k]; + if (status[j] == UNASSIGNED || status[j] == CANDIDATE) { + status[j] = FPOINT; + dumpStatus = true; } - i++; } + if (dumpStatus && doStatusOutput) + DumpStatus(st + i2s(dumpCount++) + "-D", status, NDim); - // Update distances and the status of neighbors neighbors to reflect a - // newly found CPOINT - if (newCpt != -1) { - std::vector dist1, dist2, dist3, dist4; - // FIXME: Should CompDistances automatically exclude other CPOINTs? - CompDistances(A, newCpt, 4, dist1, dist2, dist3, dist4); - - // Make sure that the only CPOINT in dist3 is newCpt. All others should be excluded. - int numDist3 = 0; - for (size_t k = 0; k < dist3.size(); k++) { - LO j = dist3[k]; - if (status[j] < CPOINT) - dist3[numDist3++] = j; - } - dist3.resize(numDist3); - dist3.push_back(newCpt); - - // UNASSIGNED or CANDIDATE distance 3 and closer neighbors are put into FPOINT list - // FIXME: why not put TWOTIMER there too? - bool dumpStatus = false; - for (size_t k = 0; k < dist3.size(); k++) { - LO j = dist3[k]; - if (status[j] == UNASSIGNED || status[j] == CANDIDATE) { - status[j] = FPOINT; - dumpStatus = true; - } - } - if (dumpStatus && doStatusOutput) - DumpStatus(st + i2s(dumpCount++) + "-D", status, NDim); + // Update myCpts() to reflect dependence of neighbors on newCpt + for (size_t k = 0; k < dist3.size(); k++) { + LO j = dist3[k]; - // Update myCpts() to reflect dependence of neighbors on newCpt - for (size_t k = 0; k < dist3.size(); k++) { - LO j = dist3[k]; + TEUCHOS_TEST_FOR_EXCEPTION(numCpts[j] >= myCpts.getNnzPerRow(), Exceptions::RuntimeError, + "Increase max number of C points per row"); + myCpts(j)[numCpts[j]++] = newCpt; + } - TEUCHOS_TEST_FOR_EXCEPTION(numCpts[j] >= myCpts.getNnzPerRow(), Exceptions::RuntimeError, - "Increase max number of C points per row"); - myCpts(j)[numCpts[j]++] = newCpt; - } + // Update cumGraphDist + // NOTE: order matters as dist2 is contained within dist3, etc. + // FIXME: Do dist2 and dist1 contain CPOINTs? + for (size_t k = 0; k < dist3.size(); k++) distIncrement[dist3[k]] = 3; + for (size_t k = 0; k < dist2.size(); k++) distIncrement[dist2[k]] = 2; + for (size_t k = 0; k < dist1.size(); k++) distIncrement[dist1[k]] = 1; + distIncrement[newCpt] = 0; + + for (size_t k = 0; k < dist3.size(); k++) { + LO j = dist3[k]; + // MATCH_MATLAB: (numCpts[j]-1) is to match Matlab, where numCpts is updated after distance calculation + cumGraphDist[j] = (cumGraphDist[j] * (numCpts[j] - 1) + distIncrement[j]) / numCpts[j]; + } + cumGraphDist[newCpt] = 0; - // Update cumGraphDist - // NOTE: order matters as dist2 is contained within dist3, etc. - // FIXME: Do dist2 and dist1 contain CPOINTs? - for (size_t k = 0; k < dist3.size(); k++) distIncrement[dist3[k]] = 3; - for (size_t k = 0; k < dist2.size(); k++) distIncrement[dist2[k]] = 2; - for (size_t k = 0; k < dist1.size(); k++) distIncrement[dist1[k]] = 1; - distIncrement[newCpt] = 0; - - for (size_t k = 0; k < dist3.size(); k++) { - LO j = dist3[k]; - // MATCH_MATLAB: (numCpts[j]-1) is to match Matlab, where numCpts is updated after distance calculation - cumGraphDist[j] = (cumGraphDist[j]*(numCpts[j]-1) + distIncrement[j])/numCpts[j]; - } - cumGraphDist[newCpt] = 0; - - // Compute coordinate distance to CPOINT - // - // Distance of CANDIDATEs to CPOINTs will be used to determine the next - // chosen CPOINT from the candidate list. Distances are also used to - // decide where new CPOINTs should be added. - for (size_t k = 0; k < dist4.size(); k++) { - LO j = dist4[k]; - - SC distance = distance2(coords1D, newCpt, j); - - // Harmonic average new distance with old distance - // There should really be a '2' in front of this expression. This - // is actually a bug in the code. However, if I put a '2', I don't - // get the perfect coarsening for a uniform mesh ... so I'm leaving - // if for now without the 2. - // MATCH_MATLAB - coordDist[j] = 2.0*(coordDist[j]*distance) / (coordDist[j] + distance); + // Compute coordinate distance to CPOINT + // + // Distance of CANDIDATEs to CPOINTs will be used to determine the next + // chosen CPOINT from the candidate list. Distances are also used to + // decide where new CPOINTs should be added. + for (size_t k = 0; k < dist4.size(); k++) { + LO j = dist4[k]; + + SC distance = distance2(coords1D, newCpt, j); + + // Harmonic average new distance with old distance + // There should really be a '2' in front of this expression. This + // is actually a bug in the code. However, if I put a '2', I don't + // get the perfect coarsening for a uniform mesh ... so I'm leaving + // if for now without the 2. + // MATCH_MATLAB + coordDist[j] = 2.0 * (coordDist[j] * distance) / (coordDist[j] + distance); #if 0 SC kkk = 10.; if (coordDist[j] > distance) coordDist[j] = (kkk*coordDist[j]*distance)/(coordDist[j]*(kkk-1)+ distance); coordDist[j] = (kkk*coordDist[j]*distance)/(coordDist[j] + distance*(kkk-1)); #endif - } + } - // Mark all unassigned dist4 points as CANDIDATE and compress - // dist4 so that it only contains entries for the candidate list. - size_t numNewCandidates = 0; - dumpStatus = false; - for (size_t k = 0; k < dist4.size(); k++) { - LO j = dist4[k]; - - // NOTE: numNewCandidates is always <= k, so we don't overwrite the - // dist4 before reading - if (status[j] == CANDIDATE) { - // Mark as already being assigned again to candidate list so that - // entry in old 'sorted' candidate list can be removed and a new - // 'unsorted' entry can be created. This new entry will later be - // sorted reflecting the new coordinate distance. - status[j] = TWOTIMER; - dist4[numNewCandidates++] = j; - dumpStatus = true; - - } else if (status[j] == UNASSIGNED) { - status[j] = CANDIDATE; - dist4[numNewCandidates++] = j; - dumpStatus = true; - } + // Mark all unassigned dist4 points as CANDIDATE and compress + // dist4 so that it only contains entries for the candidate list. + size_t numNewCandidates = 0; + dumpStatus = false; + for (size_t k = 0; k < dist4.size(); k++) { + LO j = dist4[k]; + + // NOTE: numNewCandidates is always <= k, so we don't overwrite the + // dist4 before reading + if (status[j] == CANDIDATE) { + // Mark as already being assigned again to candidate list so that + // entry in old 'sorted' candidate list can be removed and a new + // 'unsorted' entry can be created. This new entry will later be + // sorted reflecting the new coordinate distance. + status[j] = TWOTIMER; + dist4[numNewCandidates++] = j; + dumpStatus = true; + + } else if (status[j] == UNASSIGNED) { + status[j] = CANDIDATE; + dist4[numNewCandidates++] = j; + dumpStatus = true; } - dist4.resize(numNewCandidates); - if (dumpStatus && doStatusOutput) - DumpStatus(st + i2s(dumpCount++) + "-E", status, pressureMode); - - // Now remove all TWOTIMERs from the old candidate list - size_t numOldCandidates = 0; - dumpStatus = false; - for (size_t k = 0; k < numCandidates; k++) { - LO j = candidateList[k]; - - if (status[j] == CANDIDATE) { candidateList[numOldCandidates++] = j; } - if (status[j] == TWOTIMER ) { - status[j] = CANDIDATE; - dumpStatus = true; - } + } + dist4.resize(numNewCandidates); + if (dumpStatus && doStatusOutput) + DumpStatus(st + i2s(dumpCount++) + "-E", status, pressureMode); + + // Now remove all TWOTIMERs from the old candidate list + size_t numOldCandidates = 0; + dumpStatus = false; + for (size_t k = 0; k < numCandidates; k++) { + LO j = candidateList[k]; + + if (status[j] == CANDIDATE) { + candidateList[numOldCandidates++] = j; } - if (dumpStatus && doStatusOutput) - DumpStatus(st + i2s(dumpCount++) + "-F", status, NDim); - - // Sort the candidates based on distances (breaking ties via degrees, - // encouraging points near boundary). First, we order new candidates - // and then we merge together two sorted lists. - // - // NOTE: to match matlab (and break ties), I added the 1.e-10 term - std::vector ddtemp(numNewCandidates); - for (size_t k = 0; k < numNewCandidates; k++) { - LO j = dist4[k]; - // MATCH_MATLAB + if (status[j] == TWOTIMER) { + status[j] = CANDIDATE; + dumpStatus = true; + } + } + if (dumpStatus && doStatusOutput) + DumpStatus(st + i2s(dumpCount++) + "-F", status, NDim); + + // Sort the candidates based on distances (breaking ties via degrees, + // encouraging points near boundary). First, we order new candidates + // and then we merge together two sorted lists. + // + // NOTE: to match matlab (and break ties), I added the 1.e-10 term + std::vector ddtemp(numNewCandidates); + for (size_t k = 0; k < numNewCandidates; k++) { + LO j = dist4[k]; + // MATCH_MATLAB #ifdef optimal - // This one is better, but we are trying to replicate Matlab now - ddtemp[k] = -coordDist[j] - .01*(ia[j+1]-ia[j]) + 1e-10*(j+1); + // This one is better, but we are trying to replicate Matlab now + ddtemp[k] = -coordDist[j] - .01 * (ia[j + 1] - ia[j]) + 1e-10 * (j + 1); #else - ddtemp[k] = +coordDist[j] - 0.0*(ia[j+1]-ia[j]) + 1e-3*(j+1); + ddtemp[k] = +coordDist[j] - 0.0 * (ia[j + 1] - ia[j]) + 1e-3 * (j + 1); #endif - } - Muelu_az_dsort2(ddtemp, dist4); - - MergeSort(candidateList, numOldCandidates, dist4, coordDist, ia); - numCandidates = numOldCandidates + numNewCandidates; } + Muelu_az_dsort2(ddtemp, dist4); + + MergeSort(candidateList, numOldCandidates, dist4, coordDist, ia); + numCandidates = numOldCandidates + numNewCandidates; } + } - // Add additional CPOINTs based on some score which includes the number of - // CPOINTs that an FPOINT depends on as well as its distance (both graph - // and coordinate) to nearby CPOINTs. - const double graphWeight = 0.8; - const double orientWeight = 0.5; - for (int numCDepends = 1; numCDepends <= 2; numCDepends++) { - numCandidates = 0; - - std::vector candidates; - for (i = 0; i < numRows; i++) - if (status[i] < CPOINT && numCpts[i] == numCDepends) { - candidates.push_back(i); - numCandidates++; - } + // Add additional CPOINTs based on some score which includes the number of + // CPOINTs that an FPOINT depends on as well as its distance (both graph + // and coordinate) to nearby CPOINTs. + const double graphWeight = 0.8; + const double orientWeight = 0.5; + for (int numCDepends = 1; numCDepends <= 2; numCDepends++) { + numCandidates = 0; - if (numCandidates != 0) { - // Sort FPOINTs based on distance to CPOINTs and orientation - double maxGraphDist = -1e20; - double maxCoordDist = -1e20; - for (size_t p = 0; p < numCandidates; p++) { - LO j = candidates[p]; + std::vector candidates; + for (i = 0; i < numRows; i++) + if (status[i] < CPOINT && numCpts[i] == numCDepends) { + candidates.push_back(i); + numCandidates++; + } - maxGraphDist = std::max(maxGraphDist, cumGraphDist[j]); - maxCoordDist = std::max(maxCoordDist, coordDist[j]); - } + if (numCandidates != 0) { + // Sort FPOINTs based on distance to CPOINTs and orientation + double maxGraphDist = -1e20; + double maxCoordDist = -1e20; + for (size_t p = 0; p < numCandidates; p++) { + LO j = candidates[p]; - std::vector score (numCandidates); - std::vector orientation(numCandidates); - for (size_t p = 0; p < numCandidates; p++) { - LO j = candidates[p]; + maxGraphDist = std::max(maxGraphDist, cumGraphDist[j]); + maxCoordDist = std::max(maxCoordDist, coordDist[j]); + } - double graphScore = cumGraphDist[j] / maxGraphDist; - double coordScore = coordDist [j] / maxCoordDist; - // MATCH_MATLAB - score[p] = -(graphWeight*graphScore + (1-graphWeight)*coordScore + 1e-6*(j+1)); - - if (numCDepends == 2) { - // Orientation of -1 means that we have two CPOINTs on opposite - // sides of the FPOINT. Orientation of 0 implies that things are - // orthogonal. orientation of 1 implies that Cpoints are on the - // same side - SC norm = zero, vec1[3], vec2[3]; - for (int k = 0; k < NDim; k++) { - vec1[k] = coords1D[k][j] - coords1D[k][myCpts(j)[0]]; - norm += vec1[k]*vec1[k]; - } - norm = sqrt(norm); - for (int k = 0; k < NDim; k++) - vec1[k] /= norm; - - norm = zero; - for (int k = 0; k < NDim; k++) { - vec2[k] = coords1D[k][j] - coords1D[k][myCpts(j)[1]]; - norm += vec2[k]*vec2[k]; - } - norm = sqrt(norm); - for (int k = 0; k < NDim; k++) - vec2[k] /= norm; - - orientation[p] = zero; - for (int k = 0; k < NDim; k++) - orientation[p] += vec1[k]*vec2[k]; - - score[p] = -(orientWeight*orientation[p] - (1-orientWeight)*score[p]); + std::vector score(numCandidates); + std::vector orientation(numCandidates); + for (size_t p = 0; p < numCandidates; p++) { + LO j = candidates[p]; - } else { - orientation[p] = 1.0; + double graphScore = cumGraphDist[j] / maxGraphDist; + double coordScore = coordDist[j] / maxCoordDist; + // MATCH_MATLAB + score[p] = -(graphWeight * graphScore + (1 - graphWeight) * coordScore + 1e-6 * (j + 1)); + + if (numCDepends == 2) { + // Orientation of -1 means that we have two CPOINTs on opposite + // sides of the FPOINT. Orientation of 0 implies that things are + // orthogonal. orientation of 1 implies that Cpoints are on the + // same side + SC norm = zero, vec1[3], vec2[3]; + for (int k = 0; k < NDim; k++) { + vec1[k] = coords1D[k][j] - coords1D[k][myCpts(j)[0]]; + norm += vec1[k] * vec1[k]; } - } + norm = sqrt(norm); + for (int k = 0; k < NDim; k++) + vec1[k] /= norm; - std::vector index(numCandidates); - for (size_t p = 0; p < numCandidates; p++) - index[p] = p; - Muelu_az_dsort2(score, index); - - for (size_t p = 0; p < numCandidates; p++) { - int newCpt = candidates[index[p]]; - - if (numCpts[newCpt] == numCDepends && - cumGraphDist[newCpt] >= 2.6 && - orientation[index[p]] > -0.2) { - status [newCpt] = CPOINT; - numCpts[newCpt] = 1; - myCpts(newCpt)[0] = newCpt; - - if (doStatusOutput) - DumpStatus(st + i2s(dumpCount++) + "-G", status, NDim); - - std::vector dist1, dist2, dist3, dist4; - CompDistances(A, newCpt, 3, dist1, dist2, dist3, dist4); - - // Make sure that there are no CPOINTs in dist1, dist2, dist3. - int numDist1 = 0; - for (size_t k = 0; k < dist1.size(); k++) { - LO j = dist1[k]; - if (status[j] < CPOINT) - dist1[numDist1++] = j; - } - dist1.resize(numDist1); - int numDist2 = 0; - for (size_t k = 0; k < dist2.size(); k++) { - LO j = dist2[k]; - if (status[j] < CPOINT) - dist2[numDist2++] = j; - } - dist2.resize(numDist2); - int numDist3 = 0; - for (size_t k = 0; k < dist3.size(); k++) { - LO j = dist3[k]; - if (status[j] < CPOINT) - dist3[numDist3++] = j; - } - dist3.resize(numDist3); - - // Update cumGraphDist - // NOTE: order matters as dist2 is contained within dist3, etc. - for (size_t k = 0; k < dist3.size(); k++) distIncrement[dist3[k]] = 3; - for (size_t k = 0; k < dist2.size(); k++) distIncrement[dist2[k]] = 2; - for (size_t k = 0; k < dist1.size(); k++) distIncrement[dist1[k]] = 1; - distIncrement[newCpt] = 0; - - // Update myCpts() to reflect dependence of neighbors on newCpt - for (size_t k = 0; k < dist3.size(); k++) { - LO j = dist3[k]; - - TEUCHOS_TEST_FOR_EXCEPTION(numCpts[j] >= myCpts.getNnzPerRow(), Exceptions::RuntimeError, - "Increase max number of C points per row"); - myCpts(j)[numCpts[j]++] = newCpt; - } - - for (size_t k = 0; k < dist3.size(); k++) { - LO j = dist3[k]; - // (numCpts[j]-1) is to match Matlab, where numCpts is updated after distance calculation - cumGraphDist[j] = (cumGraphDist[j]*(numCpts[j]-1) + distIncrement[j])/numCpts[j]; - } - cumGraphDist[newCpt] = 0; + norm = zero; + for (int k = 0; k < NDim; k++) { + vec2[k] = coords1D[k][j] - coords1D[k][myCpts(j)[1]]; + norm += vec2[k] * vec2[k]; } + norm = sqrt(norm); + for (int k = 0; k < NDim; k++) + vec2[k] /= norm; + + orientation[p] = zero; + for (int k = 0; k < NDim; k++) + orientation[p] += vec1[k] * vec2[k]; + + score[p] = -(orientWeight * orientation[p] - (1 - orientWeight) * score[p]); + + } else { + orientation[p] = 1.0; } } - } - // Build up the CPOINT list - for (i = 0; i < numRows; i++) - if (status[i] == CPOINT) - Cptlist.push_back(i); - else if (status[i] == CPOINT_U) - status[i] = CPOINT; - } + std::vector index(numCandidates); + for (size_t p = 0; p < numCandidates; p++) + index[p] = p; + Muelu_az_dsort2(score, index); + for (size_t p = 0; p < numCandidates; p++) { + int newCpt = candidates[index[p]]; - // Look at pattern rows which have only one or two nonzeros and consider - // adding additional nonzero entries. New nonzero possibilities for row k are - // obtained by looking at k's neighbors (as determined by the matrix) to see - // what CPOINTs these neighbors interpolate from. The final determination is - // based on a composite score that considers the distance between k and the - // possible new CPOINT as well as the orientation of the new possible CPOINT - // with respect to k's current CPOINTs. Generally, points which are on the - // opposite side of k' current CPOINTs are favored. - template - void Q2Q1uPFactory:: - PhaseTwoPattern(const Matrix& A, const MultiVector& coords, const std::vector& status, MyCptList& myCpts) const { - GetOStream(Runtime0) << "Starting phase 2" << std::endl; + if (numCpts[newCpt] == numCDepends && + cumGraphDist[newCpt] >= 2.6 && + orientation[index[p]] > -0.2) { + status[newCpt] = CPOINT; + numCpts[newCpt] = 1; + myCpts(newCpt)[0] = newCpt; - int NDim = coords.getNumVectors(); - size_t numRows = A.getLocalNumRows(); + if (doStatusOutput) + DumpStatus(st + i2s(dumpCount++) + "-G", status, NDim); - ArrayRCP ia; - ArrayRCP ja; - CreateCrsPointers(A, ia, ja); + std::vector dist1, dist2, dist3, dist4; + CompDistances(A, newCpt, 3, dist1, dist2, dist3, dist4); - ArrayRCP > coords1D(NDim); - for (int k = 0; k < NDim; k++) - coords1D[k] = coords.getData(k); - - std::vector& numCpts = myCpts.getNumCpts(); - - typedef Teuchos::ScalarTraits STS; - SC zero = STS::zero(); - - size_t N = myCpts.getCList().size(); - std::vector nearbyCs(N); - std::vector score (N); - std::vector dists (N); - - std::vector scratch (numRows, 'n'); - std::vector candidates(numRows); - - for (int numCDepends = 1; numCDepends <= 2; numCDepends++) { - int numCandidates = 0; - for (size_t i = 0; i < numRows; i++) - if (numCpts[i] == numCDepends && status[i] < CPOINT) - candidates[numCandidates++] = i; - - for (int p = 0; p < numCandidates; p++) { - // Mark already existing CPOINT dependencies - LO i = candidates[p]; - LO* cpts = myCpts(i); - for (int k = 0; k < numCpts[i]; k++) - scratch[cpts[k]] = 'y'; - - // Make a list of my neighbors' CPOINT dependencies, excluding all - // already existing CPOINT dependencies for candidates[p] - const LO* neighs = &ja[ia[i]]; - int numNeighbors = ia[i+1] - ia[i]; - int numNearbyCs = 0; - for (int k = 0; k < numNeighbors; k++) { - LO curNeigh = neighs[k]; - const LO* neighCs = myCpts(curNeigh); - - for (int j = 0; j < numCpts[curNeigh]; j++) { - LO neighNeighC = neighCs[j]; - - if (scratch[neighNeighC] != 'y') { - scratch[neighNeighC] = 'y'; - nearbyCs[numNearbyCs++] = neighNeighC; - } + // Make sure that there are no CPOINTs in dist1, dist2, dist3. + int numDist1 = 0; + for (size_t k = 0; k < dist1.size(); k++) { + LO j = dist1[k]; + if (status[j] < CPOINT) + dist1[numDist1++] = j; } + dist1.resize(numDist1); + int numDist2 = 0; + for (size_t k = 0; k < dist2.size(); k++) { + LO j = dist2[k]; + if (status[j] < CPOINT) + dist2[numDist2++] = j; + } + dist2.resize(numDist2); + int numDist3 = 0; + for (size_t k = 0; k < dist3.size(); k++) { + LO j = dist3[k]; + if (status[j] < CPOINT) + dist3[numDist3++] = j; + } + dist3.resize(numDist3); + + // Update cumGraphDist + // NOTE: order matters as dist2 is contained within dist3, etc. + for (size_t k = 0; k < dist3.size(); k++) distIncrement[dist3[k]] = 3; + for (size_t k = 0; k < dist2.size(); k++) distIncrement[dist2[k]] = 2; + for (size_t k = 0; k < dist1.size(); k++) distIncrement[dist1[k]] = 1; + distIncrement[newCpt] = 0; + + // Update myCpts() to reflect dependence of neighbors on newCpt + for (size_t k = 0; k < dist3.size(); k++) { + LO j = dist3[k]; + + TEUCHOS_TEST_FOR_EXCEPTION(numCpts[j] >= myCpts.getNnzPerRow(), Exceptions::RuntimeError, + "Increase max number of C points per row"); + myCpts(j)[numCpts[j]++] = newCpt; + } + + for (size_t k = 0; k < dist3.size(); k++) { + LO j = dist3[k]; + // (numCpts[j]-1) is to match Matlab, where numCpts is updated after distance calculation + cumGraphDist[j] = (cumGraphDist[j] * (numCpts[j] - 1) + distIncrement[j]) / numCpts[j]; + } + cumGraphDist[newCpt] = 0; } + } + } + } - // Reset scratch - for (int k = 0; k < numCpts[i]; k++) - scratch[cpts[k]] = 'n'; - for (int k = 0; k < numNearbyCs; k++) - scratch[nearbyCs[k]] = 'n'; + // Build up the CPOINT list + for (i = 0; i < numRows; i++) + if (status[i] == CPOINT) + Cptlist.push_back(i); + else if (status[i] == CPOINT_U) + status[i] = CPOINT; +} + +// Look at pattern rows which have only one or two nonzeros and consider +// adding additional nonzero entries. New nonzero possibilities for row k are +// obtained by looking at k's neighbors (as determined by the matrix) to see +// what CPOINTs these neighbors interpolate from. The final determination is +// based on a composite score that considers the distance between k and the +// possible new CPOINT as well as the orientation of the new possible CPOINT +// with respect to k's current CPOINTs. Generally, points which are on the +// opposite side of k' current CPOINTs are favored. +template +void Q2Q1uPFactory:: + PhaseTwoPattern(const Matrix& A, const MultiVector& coords, const std::vector& status, MyCptList& myCpts) const { + GetOStream(Runtime0) << "Starting phase 2" << std::endl; + + int NDim = coords.getNumVectors(); + size_t numRows = A.getLocalNumRows(); + + ArrayRCP ia; + ArrayRCP ja; + CreateCrsPointers(A, ia, ja); + + ArrayRCP > coords1D(NDim); + for (int k = 0; k < NDim; k++) + coords1D[k] = coords.getData(k); + + std::vector& numCpts = myCpts.getNumCpts(); + + typedef Teuchos::ScalarTraits STS; + SC zero = STS::zero(); + + size_t N = myCpts.getCList().size(); + std::vector nearbyCs(N); + std::vector score(N); + std::vector dists(N); + + std::vector scratch(numRows, 'n'); + std::vector candidates(numRows); + + for (int numCDepends = 1; numCDepends <= 2; numCDepends++) { + int numCandidates = 0; + for (size_t i = 0; i < numRows; i++) + if (numCpts[i] == numCDepends && status[i] < CPOINT) + candidates[numCandidates++] = i; + + for (int p = 0; p < numCandidates; p++) { + // Mark already existing CPOINT dependencies + LO i = candidates[p]; + LO* cpts = myCpts(i); + for (int k = 0; k < numCpts[i]; k++) + scratch[cpts[k]] = 'y'; + + // Make a list of my neighbors' CPOINT dependencies, excluding all + // already existing CPOINT dependencies for candidates[p] + const LO* neighs = &ja[ia[i]]; + int numNeighbors = ia[i + 1] - ia[i]; + int numNearbyCs = 0; + for (int k = 0; k < numNeighbors; k++) { + LO curNeigh = neighs[k]; + const LO* neighCs = myCpts(curNeigh); + + for (int j = 0; j < numCpts[curNeigh]; j++) { + LO neighNeighC = neighCs[j]; + + if (scratch[neighNeighC] != 'y') { + scratch[neighNeighC] = 'y'; + nearbyCs[numNearbyCs++] = neighNeighC; + } + } + } + + // Reset scratch + for (int k = 0; k < numCpts[i]; k++) + scratch[cpts[k]] = 'n'; + for (int k = 0; k < numNearbyCs; k++) + scratch[nearbyCs[k]] = 'n'; - // MATCH_MATLB - std::sort(nearbyCs.begin(), nearbyCs.begin() + numNearbyCs); + // MATCH_MATLB + std::sort(nearbyCs.begin(), nearbyCs.begin() + numNearbyCs); - if (numNearbyCs != 0) { - SC norm = zero, vec1[3] = {0.0, 0.0, 0.0}, vec2[3] = {0.0, 0.0, 0.0}; + if (numNearbyCs != 0) { + SC norm = zero, vec1[3] = {0.0, 0.0, 0.0}, vec2[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < NDim; k++) { + vec1[k] = coords1D[k][i] - coords1D[k][cpts[0]]; + norm += vec1[k] * vec1[k]; + } + norm = sqrt(norm); + for (int k = 0; k < NDim; k++) + vec1[k] /= norm; + + if (numCDepends == 2) { + norm = zero; for (int k = 0; k < NDim; k++) { - vec1[k] = coords1D[k][i] - coords1D[k][cpts[0]]; - norm += vec1[k]*vec1[k]; + vec2[k] = coords1D[k][i] - coords1D[k][cpts[1]]; + norm += vec2[k] * vec2[k]; } norm = sqrt(norm); for (int k = 0; k < NDim; k++) - vec1[k] /= norm; + vec2[k] /= norm; - if (numCDepends == 2) { - norm = zero; - for (int k = 0; k < NDim; k++) { - vec2[k] = coords1D[k][i] - coords1D[k][cpts[1]]; - norm += vec2[k]*vec2[k]; - } - norm = sqrt(norm); - for (int k = 0; k < NDim; k++) - vec2[k] /= norm; - - } else { - for (int k = 0; k < NDim; k++) - vec2[k] = vec1[k]; - } + } else { + for (int k = 0; k < NDim; k++) + vec2[k] = vec1[k]; + } - for (int k = 0; k < numNearbyCs; k++) score[k] = 0; - for (int k = 0; k < numNearbyCs; k++) dists[k] = 0; + for (int k = 0; k < numNearbyCs; k++) score[k] = 0; + for (int k = 0; k < numNearbyCs; k++) dists[k] = 0; - for (int j = 0; j < numNearbyCs; j++) { - SC newVec[3]; + for (int j = 0; j < numNearbyCs; j++) { + SC newVec[3]; - norm = 0; - for (int k = 0; k < NDim; k++) { - newVec[k] = coords1D[k][nearbyCs[j]] - coords1D[k][i]; - norm += newVec[k]*newVec[k]; - } - norm = sqrt(norm); - for (int k = 0; k < NDim; k++) - newVec[k] /= norm; + norm = 0; + for (int k = 0; k < NDim; k++) { + newVec[k] = coords1D[k][nearbyCs[j]] - coords1D[k][i]; + norm += newVec[k] * newVec[k]; + } + norm = sqrt(norm); + for (int k = 0; k < NDim; k++) + newVec[k] /= norm; - score[j] = 0; - for (int k = 0; k < NDim; k++) - score[j] += newVec[k]*(vec1[k] + vec2[k]); - // Why?? - score[j] /= 2; + score[j] = 0; + for (int k = 0; k < NDim; k++) + score[j] += newVec[k] * (vec1[k] + vec2[k]); + // Why?? + score[j] /= 2; - dists[j] = norm; - } + dists[j] = norm; + } - // Normalize distances - double maxDist = 0.; - for (int j = 0; j < numNearbyCs; j++) - if (maxDist < dists[j]) - maxDist = dists[j]; - - for (int j = 0; j < numNearbyCs; j++) - dists[j] /= maxDist; - - const double distWeight = 0.3; - double maxComposite = -10000; - int maxIndex = -1; - for (int j = 0; j < numNearbyCs; j++) { - // The formula is - // if (score[j] - distWeight*dists[j] > maxComposite) - // MATCH_MATLAB - double composite = score[j] - distWeight*dists[j] + 1.0e-7*(nearbyCs[j]-1); - if (maxComposite < composite) { - maxComposite = composite; - maxIndex = j; - } + // Normalize distances + double maxDist = 0.; + for (int j = 0; j < numNearbyCs; j++) + if (maxDist < dists[j]) + maxDist = dists[j]; + + for (int j = 0; j < numNearbyCs; j++) + dists[j] /= maxDist; + + const double distWeight = 0.3; + double maxComposite = -10000; + int maxIndex = -1; + for (int j = 0; j < numNearbyCs; j++) { + // The formula is + // if (score[j] - distWeight*dists[j] > maxComposite) + // MATCH_MATLAB + double composite = score[j] - distWeight * dists[j] + 1.0e-7 * (nearbyCs[j] - 1); + if (maxComposite < composite) { + maxComposite = composite; + maxIndex = j; } + } - if (score[maxIndex] - 0.2*numCDepends > -0.3) { - TEUCHOS_TEST_FOR_EXCEPTION(numCpts[i] >= myCpts.getNnzPerRow(), - Exceptions::RuntimeError, "Increase max number of C points per row"); - myCpts(i)[numCpts[i]++] = nearbyCs[maxIndex]; - } + if (score[maxIndex] - 0.2 * numCDepends > -0.3) { + TEUCHOS_TEST_FOR_EXCEPTION(numCpts[i] >= myCpts.getNnzPerRow(), + Exceptions::RuntimeError, "Increase max number of C points per row"); + myCpts(i)[numCpts[i]++] = nearbyCs[maxIndex]; } } } } +} - // Compute mid-points associated with adjacent Cpts. - // - // Basically, the location of each Fpoint is compared with the average - // location of each Cpoint in Pat(Fpoint,:). If the location of the Fpoint is - // close to this average it is considered as a possible mid-point. In - // addition, however, we look to see if a possible mid-point is "close" or - // not to an already computed mid-point. If it is NOT too close, then this - // possible mid-point is declared to be an actual mid-point. - template - void Q2Q1uPFactory:: - FindMidPoints(const Matrix& A, const MultiVector& coords, Array& Cptlist, const MyCptList& myCpts) const { - int NDim = coords.getNumVectors(); - size_t numRows = A.getLocalNumRows(); - - const ParameterList& pL = GetParameterList(); - double tau_2 = pL.get("tau_2"); - // In calculations, we use tau_2^2 - tau_2 = tau_2 * tau_2; - - const std::vector& numCpts = myCpts.getNumCpts(); - - ArrayRCP ia; - ArrayRCP ja; - CreateCrsPointers(A, ia, ja); - - ArrayRCP > coords1D(NDim); - for (int k = 0; k < NDim; k++) - coords1D[k] = coords.getData(k); - - typedef Teuchos::ScalarTraits STS; - SC zero = STS::zero(); +// Compute mid-points associated with adjacent Cpts. +// +// Basically, the location of each Fpoint is compared with the average +// location of each Cpoint in Pat(Fpoint,:). If the location of the Fpoint is +// close to this average it is considered as a possible mid-point. In +// addition, however, we look to see if a possible mid-point is "close" or +// not to an already computed mid-point. If it is NOT too close, then this +// possible mid-point is declared to be an actual mid-point. +template +void Q2Q1uPFactory:: + FindMidPoints(const Matrix& A, const MultiVector& coords, Array& Cptlist, const MyCptList& myCpts) const { + int NDim = coords.getNumVectors(); + size_t numRows = A.getLocalNumRows(); + + const ParameterList& pL = GetParameterList(); + double tau_2 = pL.get("tau_2"); + // In calculations, we use tau_2^2 + tau_2 = tau_2 * tau_2; + + const std::vector& numCpts = myCpts.getNumCpts(); + + ArrayRCP ia; + ArrayRCP ja; + CreateCrsPointers(A, ia, ja); + + ArrayRCP > coords1D(NDim); + for (int k = 0; k < NDim; k++) + coords1D[k] = coords.getData(k); + + typedef Teuchos::ScalarTraits STS; + SC zero = STS::zero(); + + // Calculate number of nonzeros per row, make it negative, and then sort. + // The idea is that when assigning midpoints, we want to start by looking + // at points which have many coarse point dependencies + std::vector nnzPerRow(numRows); + std::vector index(numRows); + for (size_t i = 0; i < numRows; i++) { + nnzPerRow[i] = (numCpts[i] ? numCpts[i] : 1); + nnzPerRow[i] = -100000 * numCpts[i] + i; + index[i] = i; + } - // Calculate number of nonzeros per row, make it negative, and then sort. - // The idea is that when assigning midpoints, we want to start by looking - // at points which have many coarse point dependencies - std::vector nnzPerRow(numRows); - std::vector index (numRows); - for (size_t i = 0; i < numRows; i++) { - nnzPerRow[i] = (numCpts[i] ? numCpts[i] : 1); - nnzPerRow[i] = -100000*numCpts[i] + i; - index [i] = i; - } + // Sort only for the purposes of filling 'index', which determines the + // order that we search for possible midpoints + Muelu_az_sort(&nnzPerRow[0], numRows, &index[0], NULL); + + // Reset so that we have unsorted version of nnzPerRow and also mark points + // which cannot be mid points + std::vector lookedAt(numRows, 'n'); + for (size_t i = 0; i < numRows; i++) { + nnzPerRow[i] = (numCpts[i] ? numCpts[i] : 1); + if (nnzPerRow[i] == 1) + lookedAt[i] = 'y'; + } + for (int i = 0; i < Cptlist.size(); i++) + lookedAt[Cptlist[i]] = 'y'; - // Sort only for the purposes of filling 'index', which determines the - // order that we search for possible midpoints - Muelu_az_sort(&nnzPerRow[0], numRows, &index[0], NULL); + // Compute some target midpoints based on taking averages associated with + // the sparsity pattern and coarse grid point locations. + ArrayRCP > targetMidCoords1D(NDim); + for (int k = 0; k < NDim; k++) { + ArrayRCP& target1D = targetMidCoords1D[k]; - // Reset so that we have unsorted version of nnzPerRow and also mark points - // which cannot be mid points - std::vector lookedAt(numRows, 'n'); + target1D.resize(numRows); for (size_t i = 0; i < numRows; i++) { - nnzPerRow[i] = (numCpts[i] ? numCpts[i] : 1); - if (nnzPerRow[i] == 1) - lookedAt[i] = 'y'; - } - for (int i = 0; i < Cptlist.size(); i++) - lookedAt[Cptlist[i]] = 'y'; - - // Compute some target midpoints based on taking averages associated with - // the sparsity pattern and coarse grid point locations. - ArrayRCP > targetMidCoords1D(NDim); - for (int k = 0; k < NDim; k++) { - ArrayRCP& target1D = targetMidCoords1D[k]; + target1D[i] = zero; - target1D.resize(numRows); - for (size_t i = 0; i < numRows; i++) { - target1D[i] = zero; - - for (int j = 0; j < numCpts[i]; j++) - target1D[i] += coords1D[k][myCpts(i)[j]]; + for (int j = 0; j < numCpts[i]; j++) + target1D[i] += coords1D[k][myCpts(i)[j]]; - target1D[i] /= nnzPerRow[i]; - } + target1D[i] /= nnzPerRow[i]; } + } - std::vector isMidPoint(numRows, 'n'); - std::vector inNearbyCs(numRows, 'n'); - std::vector inNeighs (numRows, 'n'); - std::vector neighs(numRows); - std::vector sameCGroup(50); + std::vector isMidPoint(numRows, 'n'); + std::vector inNearbyCs(numRows, 'n'); + std::vector inNeighs(numRows, 'n'); + std::vector neighs(numRows); + std::vector sameCGroup(50); - int numMidPoints = 0; - for (size_t i = 0; i < numRows; i++) { - int curF = index[i]; + int numMidPoints = 0; + for (size_t i = 0; i < numRows; i++) { + int curF = index[i]; - if (lookedAt[curF] == 'y') - continue; - lookedAt[curF] = 'y'; + if (lookedAt[curF] == 'y') + continue; + lookedAt[curF] = 'y'; - const LO* curFCs = myCpts(curF); + const LO* curFCs = myCpts(curF); - for (int j = 0; j < numCpts[curF]; j++) - inNearbyCs[curFCs[j]] = 'y'; + for (int j = 0; j < numCpts[curF]; j++) + inNearbyCs[curFCs[j]] = 'y'; - // Find all FPOINTs with the same curFCs (perhaps - // containing additional curFCs) as curF and - // put them in sameCGroup - int numNeigh = 0; - neighs[numNeigh++] = curF; - inNeighs[curF] = 'y'; + // Find all FPOINTs with the same curFCs (perhaps + // containing additional curFCs) as curF and + // put them in sameCGroup + int numNeigh = 0; + neighs[numNeigh++] = curF; + inNeighs[curF] = 'y'; - int nextLayerStart = 0; - int nextLayerEnd = 0; - int numSameGrp = 0; + int nextLayerStart = 0; + int nextLayerEnd = 0; + int numSameGrp = 0; - int flag = 1; - while (flag == 1) { - flag = 0; - - for (int k = nextLayerStart; k <= nextLayerEnd; k++) { - LO curNeigh = neighs[k]; - const LO* neighCs = myCpts(curNeigh); + int flag = 1; + while (flag == 1) { + flag = 0; - // Check if subset of this neighbor's CPOINT dependencies include all - // the CPOINT dependencies of curF - int sum = 0; - for (int j = 0; j < numCpts[curNeigh]; j++) - if (inNearbyCs[neighCs[j]] == 'y') - sum++; + for (int k = nextLayerStart; k <= nextLayerEnd; k++) { + LO curNeigh = neighs[k]; + const LO* neighCs = myCpts(curNeigh); - if (sum == nnzPerRow[curF]) { - lookedAt[curNeigh] = 'y'; + // Check if subset of this neighbor's CPOINT dependencies include all + // the CPOINT dependencies of curF + int sum = 0; + for (int j = 0; j < numCpts[curNeigh]; j++) + if (inNearbyCs[neighCs[j]] == 'y') + sum++; - // Make sure we have enough space - if (Teuchos::as(sameCGroup.size()) <= numSameGrp) - sameCGroup.resize(2*numSameGrp); + if (sum == nnzPerRow[curF]) { + lookedAt[curNeigh] = 'y'; - sameCGroup[numSameGrp++] = curNeigh; - flag = 1; - } + // Make sure we have enough space + if (Teuchos::as(sameCGroup.size()) <= numSameGrp) + sameCGroup.resize(2 * numSameGrp); - // Add neighbors of curNeigh that haven't already been - // add to the neighbor list while processing curF - for (size_t j = ia[curNeigh]; j < ia[curNeigh+1]; j++) - if (inNeighs[ja[j]] == 'n') { - neighs[numNeigh++] = ja[j]; - inNeighs[ja[j]] = 'y'; - } + sameCGroup[numSameGrp++] = curNeigh; + flag = 1; } - nextLayerStart = nextLayerEnd + 1; - nextLayerEnd = numNeigh - 1; + // Add neighbors of curNeigh that haven't already been + // add to the neighbor list while processing curF + for (size_t j = ia[curNeigh]; j < ia[curNeigh + 1]; j++) + if (inNeighs[ja[j]] == 'n') { + neighs[numNeigh++] = ja[j]; + inNeighs[ja[j]] = 'y'; + } } - // Reset status arrays - for (int j = 0; j < numNeigh; j++) - inNeighs[neighs[j]] = 'n'; - for (int j = 0; j < numCpts[curF]; j++) - inNearbyCs[curFCs[j]] = 'n'; - - // At this point we have now constructed a group of possible mid points - // all with the same Cpt dependencies. Now, we need to find the one in - // this group which is closest to the target midpoint coordinates. - double smallest = 1e30; - int smallestIndex = -1; - for (int j = 0; j < numSameGrp; j++) { - // MATCH_MATLAB - double dist = 1e-8*(sameCGroup[j]+1); + nextLayerStart = nextLayerEnd + 1; + nextLayerEnd = numNeigh - 1; + } - for (int k = 0; k < NDim; k++) { - double dtemp = coords1D[k][sameCGroup[j]] - targetMidCoords1D[k][curF]; - dist += dtemp*dtemp; - } - if (dist < smallest) { - smallest = dist; - smallestIndex = sameCGroup[j]; - } + // Reset status arrays + for (int j = 0; j < numNeigh; j++) + inNeighs[neighs[j]] = 'n'; + for (int j = 0; j < numCpts[curF]; j++) + inNearbyCs[curFCs[j]] = 'n'; + + // At this point we have now constructed a group of possible mid points + // all with the same Cpt dependencies. Now, we need to find the one in + // this group which is closest to the target midpoint coordinates. + double smallest = 1e30; + int smallestIndex = -1; + for (int j = 0; j < numSameGrp; j++) { + // MATCH_MATLAB + double dist = 1e-8 * (sameCGroup[j] + 1); + + for (int k = 0; k < NDim; k++) { + double dtemp = coords1D[k][sameCGroup[j]] - targetMidCoords1D[k][curF]; + dist += dtemp * dtemp; } + if (dist < smallest) { + smallest = dist; + smallestIndex = sameCGroup[j]; + } + } - // So now smallestIndex is the best midpoint candidate within sameCGroup. - // We now need to check if smallestIndex is really close to an already - // existing mid-point. In fact, we could have multiple copies of - // mid-points or some very close mid-points. To see this, consider - // - // P1 P2 - // - // - // - // P3 P4 - // where P1/P4's midpoint is the same as P2/P3's midpoint. We get rid of - // these by checking if a new potential midpoint is close to any previous - // midpoints. - - // Check if anybody in sameCGroup is already a midpoint. If so, check - // each one of these midpoints to see if any of these is real close to - // the curF. - flag = 0; - for (int j = 0; j < numSameGrp; j++) - if (isMidPoint[sameCGroup[j]] == 'y') - flag = 1; - - if (flag == 1) { - // Get an idea of the spacing between curFCs - double delta = 0.0; - for (int k = 0; k < NDim; k++) { - double dmin = coords1D[k][curFCs[0]]; - double dmax = dmin; - - for (int j = 1; j < numCpts[curF]; j++) { - SC c = coords1D[k][curFCs[j]]; - if (c < dmin) dmin = c; - if (c > dmax) dmax = c; - } - delta += ((dmax-dmin)*(dmax-dmin)); - } + // So now smallestIndex is the best midpoint candidate within sameCGroup. + // We now need to check if smallestIndex is really close to an already + // existing mid-point. In fact, we could have multiple copies of + // mid-points or some very close mid-points. To see this, consider + // + // P1 P2 + // + // + // + // P3 P4 + // where P1/P4's midpoint is the same as P2/P3's midpoint. We get rid of + // these by checking if a new potential midpoint is close to any previous + // midpoints. + + // Check if anybody in sameCGroup is already a midpoint. If so, check + // each one of these midpoints to see if any of these is real close to + // the curF. + flag = 0; + for (int j = 0; j < numSameGrp; j++) + if (isMidPoint[sameCGroup[j]] == 'y') + flag = 1; - // Now find the closest point among all sameCGroup midPoints to - // smallestIndex. If this point is not too close, we make smallestIndex - // a new mid-point. - double close = 1000000.; - for (int j = 0; j < numSameGrp; j++) { - int t = sameCGroup[j]; - if (isMidPoint[t] == 'y') { - double current = distance2(coords1D, smallestIndex, t); - if (current < close) - close = current; - } + if (flag == 1) { + // Get an idea of the spacing between curFCs + double delta = 0.0; + for (int k = 0; k < NDim; k++) { + double dmin = coords1D[k][curFCs[0]]; + double dmax = dmin; + + for (int j = 1; j < numCpts[curF]; j++) { + SC c = coords1D[k][curFCs[j]]; + if (c < dmin) dmin = c; + if (c > dmax) dmax = c; } + delta += ((dmax - dmin) * (dmax - dmin)); + } - if (close/delta > tau_2) { - isMidPoint[smallestIndex] = 'y'; - numMidPoints++; + // Now find the closest point among all sameCGroup midPoints to + // smallestIndex. If this point is not too close, we make smallestIndex + // a new mid-point. + double close = 1000000.; + for (int j = 0; j < numSameGrp; j++) { + int t = sameCGroup[j]; + if (isMidPoint[t] == 'y') { + double current = distance2(coords1D, smallestIndex, t); + if (current < close) + close = current; } + } - } else { + if (close / delta > tau_2) { isMidPoint[smallestIndex] = 'y'; numMidPoints++; } + + } else { + isMidPoint[smallestIndex] = 'y'; + numMidPoints++; } + } - // This loop also sorts mid points - int count = 0; - for (size_t i = 0; i < numRows; i++) - if (isMidPoint[i] == 'y') { - Cptlist.push_back(i); - count++; - } + // This loop also sorts mid points + int count = 0; + for (size_t i = 0; i < numRows; i++) + if (isMidPoint[i] == 'y') { + Cptlist.push_back(i); + count++; + } - TEUCHOS_TEST_FOR_EXCEPTION(count != numMidPoints, Exceptions::RuntimeError, - "Wrong with the number of mid points: " << count << " vs. " << numMidPoints); - } + TEUCHOS_TEST_FOR_EXCEPTION(count != numMidPoints, Exceptions::RuntimeError, + "Wrong with the number of mid points: " << count << " vs. " << numMidPoints); +} - // Convert information in Cptlist, myCpts into a sparsity pattern matrix - template - void Q2Q1uPFactory:: - CptDepends2Pattern(const Matrix& A, const MyCptList& myCpts, RCP& P, LocalOrdinal offset) const { - RCP rowMap = A.getRowMap(); - size_t numRows = myCpts.getLocalNumRows(); +// Convert information in Cptlist, myCpts into a sparsity pattern matrix +template +void Q2Q1uPFactory:: + CptDepends2Pattern(const Matrix& A, const MyCptList& myCpts, RCP& P, LocalOrdinal offset) const { + RCP rowMap = A.getRowMap(); + size_t numRows = myCpts.getLocalNumRows(); - // FIXME: how does offset play here? - const Array& Cptlist = myCpts.getCList(); - RCP coarseMap = MapFactory::Build(rowMap->lib(), Cptlist.size(), rowMap->getIndexBase() + offset, rowMap->getComm()); + // FIXME: how does offset play here? + const Array& Cptlist = myCpts.getCList(); + RCP coarseMap = MapFactory::Build(rowMap->lib(), Cptlist.size(), rowMap->getIndexBase() + offset, rowMap->getComm()); - P = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0)); - RCP Pcrs = rcp_dynamic_cast(P)->getCrsMatrix(); + P = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0)); + RCP Pcrs = rcp_dynamic_cast(P)->getCrsMatrix(); - ArrayRCP iaP; - ArrayRCP jaP; - ArrayRCP valP; + ArrayRCP iaP; + ArrayRCP jaP; + ArrayRCP valP; - const std::vector& numCpts = myCpts.getNumCpts(); - size_t nnzEstimate = std::accumulate(numCpts.begin(), numCpts.end(), 0); + const std::vector& numCpts = myCpts.getNumCpts(); + size_t nnzEstimate = std::accumulate(numCpts.begin(), numCpts.end(), 0); - Pcrs->allocateAllValues(nnzEstimate, iaP, jaP, valP); + Pcrs->allocateAllValues(nnzEstimate, iaP, jaP, valP); - ArrayView ia = iaP(); - ArrayView ja = jaP(); - ArrayView val = valP(); + ArrayView ia = iaP(); + ArrayView ja = jaP(); + ArrayView val = valP(); - std::vector coarseCmap(numRows, -1); - for (int i = 0; i < Cptlist.size(); i++) - coarseCmap[Cptlist[i]] = i; + std::vector coarseCmap(numRows, -1); + for (int i = 0; i < Cptlist.size(); i++) + coarseCmap[Cptlist[i]] = i; - SC one = Teuchos::ScalarTraits::one(); + SC one = Teuchos::ScalarTraits::one(); - // Build up the prolongator sparsity pattern and the initial - // guess used for Emin (which must have row sums equal to one) + // Build up the prolongator sparsity pattern and the initial + // guess used for Emin (which must have row sums equal to one) - ia[0] = 0; - size_t nnzCount = 0; - for (size_t i = 0; i < numRows; i++) { - const LO* cpts = myCpts(i); - - for (int j = 0; j < numCpts[i]; j++) { - ja [nnzCount] = coarseCmap[cpts[j]]; - TEUCHOS_TEST_FOR_EXCEPTION(ja[nnzCount] == -1, Exceptions::RuntimeError, - "Point " << cpts[j] << " is not in the global list of cpoints, but it is in the list for " << i); - val[nnzCount] = one/((SC) numCpts[i]); - nnzCount++; - } - // NOTE: we could theoretically sort here - // Do we need to, though? - ia[i+1] = nnzCount; - } + ia[0] = 0; + size_t nnzCount = 0; + for (size_t i = 0; i < numRows; i++) { + const LO* cpts = myCpts(i); - if (rowMap->lib() == Xpetra::UseTpetra) { - // - Cannot resize for Epetra, as it checks for same pointers - // - Need to resize for Tpetra, as it check ().size() == ia[numRows] - // NOTE: these invalidate ja and val views - jaP .resize(nnzCount); - valP.resize(nnzCount); + for (int j = 0; j < numCpts[i]; j++) { + ja[nnzCount] = coarseCmap[cpts[j]]; + TEUCHOS_TEST_FOR_EXCEPTION(ja[nnzCount] == -1, Exceptions::RuntimeError, + "Point " << cpts[j] << " is not in the global list of cpoints, but it is in the list for " << i); + val[nnzCount] = one / ((SC)numCpts[i]); + nnzCount++; } + // NOTE: we could theoretically sort here + // Do we need to, though? + ia[i + 1] = nnzCount; + } - Pcrs->setAllValues(iaP, jaP, valP); - Pcrs->expertStaticFillComplete(coarseMap, A.getDomainMap()); + if (rowMap->lib() == Xpetra::UseTpetra) { + // - Cannot resize for Epetra, as it checks for same pointers + // - Need to resize for Tpetra, as it check ().size() == ia[numRows] + // NOTE: these invalidate ja and val views + jaP.resize(nnzCount); + valP.resize(nnzCount); } - // Compute all points which are within a distance 1-4 from StartPt - template - void Q2Q1uPFactory:: - CompDistances(const Matrix& A, LO start, int numDist, std::vector& dist1, std::vector& dist2, std::vector& dist3, std::vector& dist4) const { - TEUCHOS_TEST_FOR_EXCEPTION(numDist < 1 || numDist > 4, Exceptions::InvalidArgument, "CompDistances() cannot compute " << numDist << " distances"); + Pcrs->setAllValues(iaP, jaP, valP); + Pcrs->expertStaticFillComplete(coarseMap, A.getDomainMap()); +} - size_t numRows = A.getGlobalNumRows(); +// Compute all points which are within a distance 1-4 from StartPt +template +void Q2Q1uPFactory:: + CompDistances(const Matrix& A, LO start, int numDist, std::vector& dist1, std::vector& dist2, std::vector& dist3, std::vector& dist4) const { + TEUCHOS_TEST_FOR_EXCEPTION(numDist < 1 || numDist > 4, Exceptions::InvalidArgument, "CompDistances() cannot compute " << numDist << " distances"); - ArrayRCP ia; - ArrayRCP ja; - CreateCrsPointers(A, ia, ja); + size_t numRows = A.getGlobalNumRows(); - std::vector added(numRows, 'n'); - std::vector neighs; - neighs.reserve(100); + ArrayRCP ia; + ArrayRCP ja; + CreateCrsPointers(A, ia, ja); - neighs.push_back(start); - added[start] = 'y'; + std::vector added(numRows, 'n'); + std::vector neighs; + neighs.reserve(100); - for (int k = 1; k <= numDist; k++) { + neighs.push_back(start); + added[start] = 'y'; - int numNeighs = neighs.size(); - for (int i = 0; i < numNeighs; i++) - for (size_t j = ia[neighs[i]]; j < ia[neighs[i]+1]; j++) - if (added[ja[j]] == 'n') { - added[ja[j]] = 'y'; - neighs.push_back(ja[j]); - } + for (int k = 1; k <= numDist; k++) { + int numNeighs = neighs.size(); + for (int i = 0; i < numNeighs; i++) + for (size_t j = ia[neighs[i]]; j < ia[neighs[i] + 1]; j++) + if (added[ja[j]] == 'n') { + added[ja[j]] = 'y'; + neighs.push_back(ja[j]); + } - if (k == 1) dist1 = neighs; - if (k == 2) dist2 = neighs; - if (k == 3) dist3 = neighs; - if (k == 4) dist4 = neighs; - } + if (k == 1) dist1 = neighs; + if (k == 2) dist2 = neighs; + if (k == 3) dist3 = neighs; + if (k == 4) dist4 = neighs; } +} - template - void Q2Q1uPFactory:: - CreateCrsPointers(const Matrix& A, ArrayRCP& ia, ArrayRCP& ja) const { - RCP Awrap = rcp_dynamic_cast(rcpFromRef(A)); - TEUCHOS_TEST_FOR_EXCEPTION(Awrap.is_null(), Exceptions::RuntimeError, "A is not of CrsMatrixWrap type"); +template +void Q2Q1uPFactory:: + CreateCrsPointers(const Matrix& A, ArrayRCP& ia, ArrayRCP& ja) const { + RCP Awrap = rcp_dynamic_cast(rcpFromRef(A)); + TEUCHOS_TEST_FOR_EXCEPTION(Awrap.is_null(), Exceptions::RuntimeError, "A is not of CrsMatrixWrap type"); - ArrayRCP val; - Awrap->getCrsMatrix()->getAllValues(ia, ja, val); - } + ArrayRCP val; + Awrap->getCrsMatrix()->getAllValues(ia, ja, val); +} - const std::string OUTPUT_DIR = "status/"; +const std::string OUTPUT_DIR = "status/"; - template - void Q2Q1uPFactory:: - DumpStatus(const std::string& filename, const std::vector& status, int NDim, bool isAmalgamated) const { - const std::string dirName = OUTPUT_DIR; +template +void Q2Q1uPFactory:: + DumpStatus(const std::string& filename, const std::vector& status, int NDim, bool isAmalgamated) const { + const std::string dirName = OUTPUT_DIR; - struct stat sb; - TEUCHOS_TEST_FOR_EXCEPTION(stat(dirName.c_str(), &sb) != 0 || !S_ISDIR(sb.st_mode), Exceptions::RuntimeError, - "Please create a \"" << dirName << "\" directory"); + struct stat sb; + TEUCHOS_TEST_FOR_EXCEPTION(stat(dirName.c_str(), &sb) != 0 || !S_ISDIR(sb.st_mode), Exceptions::RuntimeError, + "Please create a \"" << dirName << "\" directory"); - std::ofstream ofs((dirName + filename).c_str()); - size_t step = (isAmalgamated ? 1 : NDim); - for (size_t i = 0; i < status.size(); i += step) - ofs << status[i] << std::endl; - } + std::ofstream ofs((dirName + filename).c_str()); + size_t step = (isAmalgamated ? 1 : NDim); + for (size_t i = 0; i < status.size(); i += step) + ofs << status[i] << std::endl; +} - template - void Q2Q1uPFactory:: - DumpCoords(const std::string& filename, const MultiVector& coords) const { - const std::string dirName = OUTPUT_DIR; +template +void Q2Q1uPFactory:: + DumpCoords(const std::string& filename, const MultiVector& coords) const { + const std::string dirName = OUTPUT_DIR; - struct stat sb; - if (stat(dirName.c_str(), &sb) != 0 || !S_ISDIR(sb.st_mode)) - GetOStream(Errors) << "Please create a \"" << dirName << "\" directory" << std::endl; + struct stat sb; + if (stat(dirName.c_str(), &sb) != 0 || !S_ISDIR(sb.st_mode)) + GetOStream(Errors) << "Please create a \"" << dirName << "\" directory" << std::endl; - const int NDim = coords.getNumVectors(); - const int n = coords.getLocalLength(); + const int NDim = coords.getNumVectors(); + const int n = coords.getLocalLength(); - ArrayRCP > coords1D(NDim); - for (int k = 0; k < NDim; k++) - coords1D[k] = coords.getData(k); + ArrayRCP > coords1D(NDim); + for (int k = 0; k < NDim; k++) + coords1D[k] = coords.getData(k); - std::ofstream ofs((dirName + filename).c_str()); - for (int i = 0; i < n; i++) { - for (int k = 0; k < NDim; k++) - ofs << " " << coords1D[k][i]; - ofs << std::endl; - } + std::ofstream ofs((dirName + filename).c_str()); + for (int i = 0; i < n; i++) { + for (int k = 0; k < NDim; k++) + ofs << " " << coords1D[k][i]; + ofs << std::endl; } +} -} // namespace MueLu +} // namespace MueLu -#endif // MUELU_Q2Q1UPFACTORY_DECL_HPP +#endif // MUELU_Q2Q1UPFACTORY_DECL_HPP diff --git a/packages/muelu/research/q2q1/Q2Q1.cpp b/packages/muelu/research/q2q1/Q2Q1.cpp index 8608c62f6906..ef80b7dff5a6 100644 --- a/packages/muelu/research/q2q1/Q2Q1.cpp +++ b/packages/muelu/research/q2q1/Q2Q1.cpp @@ -101,30 +101,31 @@ // FIXME #include "MueLu_UseDefaultTypes.hpp" -template -Teuchos::RCP > +template +Teuchos::RCP > ReadBinary(const std::string& fileName, const Teuchos::RCP >& comm) { - typedef Scalar SC; - typedef LocalOrdinal LO; + typedef Scalar SC; + typedef LocalOrdinal LO; typedef GlobalOrdinal GO; - typedef Node NO; + typedef Node NO; TEUCHOS_TEST_FOR_EXCEPTION(comm->getSize() != 1, MueLu::Exceptions::RuntimeError, "Serial read only"); std::ifstream ifs(fileName.c_str(), std::ios::binary); TEUCHOS_TEST_FOR_EXCEPTION(!ifs.good(), MueLu::Exceptions::RuntimeError, "Can not read \"" << fileName << "\""); int m, n, nnz; - ifs.read(reinterpret_cast(&m), sizeof(m)); - ifs.read(reinterpret_cast(&n), sizeof(n)); + ifs.read(reinterpret_cast(&m), sizeof(m)); + ifs.read(reinterpret_cast(&n), sizeof(n)); ifs.read(reinterpret_cast(&nnz), sizeof(nnz)); - typedef Tpetra::Map tMap; - typedef Tpetra::CrsMatrix tCrsMatrix; + typedef Tpetra::Map tMap; + typedef Tpetra::CrsMatrix tCrsMatrix; - GO indexBase = 0; - Teuchos::RCP rowMap = rcp(new tMap(m, indexBase, comm)), rangeMap = rowMap; - Teuchos::RCP colMap = rcp(new tMap(n, indexBase, comm)), domainMap = colMap;; - Teuchos::RCP A = rcp(new tCrsMatrix(rowMap, colMap, 9)); + GO indexBase = 0; + Teuchos::RCP rowMap = rcp(new tMap(m, indexBase, comm)), rangeMap = rowMap; + Teuchos::RCP colMap = rcp(new tMap(n, indexBase, comm)), domainMap = colMap; + ; + Teuchos::RCP A = rcp(new tCrsMatrix(rowMap, colMap, 9)); TEUCHOS_TEST_FOR_EXCEPTION(sizeof(int) != sizeof(GO), MueLu::Exceptions::RuntimeError, "Incompatible sizes"); @@ -132,7 +133,7 @@ ReadBinary(const std::string& fileName, const Teuchos::RCP vals; for (int i = 0; i < m; i++) { int row, rownnz; - ifs.read(reinterpret_cast(&row), sizeof(row)); + ifs.read(reinterpret_cast(&row), sizeof(row)); ifs.read(reinterpret_cast(&rownnz), sizeof(rownnz)); inds.resize(rownnz); vals.resize(rownnz); @@ -154,20 +155,20 @@ ReadBinary(const std::string& fileName, const Teuchos::RCP - using Teuchos::RCP; - using Teuchos::rcp; - using Teuchos::ParameterList; using Teuchos::Array; using Teuchos::ArrayRCP; - using Teuchos::rcp_dynamic_cast; - using Teuchos::null; using Teuchos::as; + using Teuchos::null; + using Teuchos::ParameterList; + using Teuchos::RCP; + using Teuchos::rcp; + using Teuchos::rcp_dynamic_cast; using Teuchos::TimeMonitor; - using Tpetra::MatrixMarket::Reader; using Thyra::tpetraVectorSpace; + using Tpetra::MatrixMarket::Reader; // ========================================================================= // MPI initialization using Teuchos @@ -190,36 +191,47 @@ int main(int argc, char *argv[]) { Xpetra::Parameters xpetraParameters(clp); // configure problem - std::string prefix = "./Q2Q1_9x9_"; clp.setOption("prefix", &prefix, "prefix for data files"); - std::string rhs = ""; clp.setOption("rhs", &rhs, "rhs"); + std::string prefix = "./Q2Q1_9x9_"; + clp.setOption("prefix", &prefix, "prefix for data files"); + std::string rhs = ""; + clp.setOption("rhs", &rhs, "rhs"); // configure run - std::string xmlFileName = "driver.xml"; clp.setOption("xml", &xmlFileName, "read parameters from a file [default = 'driver.xml']"); - double tol = 1e-8; clp.setOption("tol", &tol, "solver convergence tolerance"); - std::string type = "unstructured"; clp.setOption("type", &type, "structured/unstructured"); - int use9ptPatA = 1; clp.setOption("use9pt", &use9ptPatA, "use 9-point stencil matrix for velocity prolongator construction"); - int useFilters = 1; clp.setOption("usefilters", &useFilters, "use filters on A and BB^T"); - - double tau_1 = 0.06; clp.setOption("tau_1", &tau_1, "tau_1 parameter from paper (for matrix filtering)"); - double tau_2 = sqrt(0.0015); clp.setOption("tau_2", &tau_2, "tau_2 parameter from paper (for mid point detection)"); - - int binary = 0; clp.setOption("binary", &binary, "read matrix in binary format"); + std::string xmlFileName = "driver.xml"; + clp.setOption("xml", &xmlFileName, "read parameters from a file [default = 'driver.xml']"); + double tol = 1e-8; + clp.setOption("tol", &tol, "solver convergence tolerance"); + std::string type = "unstructured"; + clp.setOption("type", &type, "structured/unstructured"); + int use9ptPatA = 1; + clp.setOption("use9pt", &use9ptPatA, "use 9-point stencil matrix for velocity prolongator construction"); + int useFilters = 1; + clp.setOption("usefilters", &useFilters, "use filters on A and BB^T"); + + double tau_1 = 0.06; + clp.setOption("tau_1", &tau_1, "tau_1 parameter from paper (for matrix filtering)"); + double tau_2 = sqrt(0.0015); + clp.setOption("tau_2", &tau_2, "tau_2 parameter from paper (for mid point detection)"); + + int binary = 0; + clp.setOption("binary", &binary, "read matrix in binary format"); // configure misc - int printTimings = 0; clp.setOption("timings", &printTimings, "print timings to screen"); + int printTimings = 0; + clp.setOption("timings", &printTimings, "print timings to screen"); switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } - typedef Tpetra::CrsMatrix tCrsMatrix; - typedef Tpetra::Operator tOperator; - typedef Tpetra::MultiVector tMultiVector; - typedef Tpetra::Map tMap; - typedef Thyra::TpetraVectorSpace THTP_Vs; + typedef Tpetra::CrsMatrix tCrsMatrix; + typedef Tpetra::Operator tOperator; + typedef Tpetra::MultiVector tMultiVector; + typedef Tpetra::Map tMap; + typedef Thyra::TpetraVectorSpace THTP_Vs; // Read data from files RCP A11, A119Pt, A21, A12; @@ -230,36 +242,37 @@ int main(int argc, char *argv[]) { try { if (!binary) { filename = prefix + "A.mm"; - A11 = Reader::readSparseFile(filename.c_str(), comm); - A119Pt = A11; + A11 = Reader::readSparseFile(filename.c_str(), comm); + A119Pt = A11; if (use9ptPatA) { filename = prefix + "AForPat.mm"; - A119Pt = Reader::readSparseFile(filename.c_str(), comm); + A119Pt = Reader::readSparseFile(filename.c_str(), comm); } } else { filename = prefix + "A.dat"; - A11 = ReadBinary(filename.c_str(), comm); - A119Pt = A11; + A11 = ReadBinary(filename.c_str(), comm); + A119Pt = A11; if (use9ptPatA) { filename = prefix + "AForPat.dat"; - A119Pt = ReadBinary(filename.c_str(), comm); + A119Pt = ReadBinary(filename.c_str(), comm); } } filename = prefix + "B.mm"; - A21 = Reader::readSparseFile(filename.c_str(), comm); + A21 = Reader::readSparseFile(filename.c_str(), comm); filename = prefix + "Bt.mm"; - A12 = Reader::readSparseFile(filename.c_str(), comm); + A12 = Reader::readSparseFile(filename.c_str(), comm); RCP cmap1 = A11->getDomainMap(), cmap2 = A12->getDomainMap(); filename = prefix + "VelCoords.mm"; - Vcoords = Reader::readDenseFile(filename.c_str(), comm, cmap1); + Vcoords = Reader::readDenseFile(filename.c_str(), comm, cmap1); filename = prefix + "PresCoords.mm"; - Pcoords = Reader::readDenseFile(filename.c_str(), comm, cmap2); + Pcoords = Reader::readDenseFile(filename.c_str(), comm, cmap2); // For now, we assume that p2v maps local pressure DOF to a local x-velocity DOF - filename = prefix + "p2vMap.mm"; - ArrayRCP slop = Xpetra::IO::ReadMultiVector(filename.c_str(), - Xpetra::toXpetra(A21->getRangeMap()))->getData(0); + filename = prefix + "p2vMap.mm"; + ArrayRCP slop = Xpetra::IO::ReadMultiVector(filename.c_str(), + Xpetra::toXpetra(A21->getRangeMap())) + ->getData(0); p2vMap.resize(slop.size()); for (int i = 0; i < slop.size(); i++) p2vMap[i] = as(slop[i]); @@ -272,14 +285,14 @@ int main(int argc, char *argv[]) { RCP domain12 = tpetraVectorSpace(A12->getDomainMap()); RCP domain21 = tpetraVectorSpace(A21->getDomainMap()); - RCP range11 = tpetraVectorSpace(A11->getRangeMap()); - RCP range12 = tpetraVectorSpace(A12->getRangeMap()); - RCP range21 = tpetraVectorSpace(A21->getRangeMap()); + RCP range11 = tpetraVectorSpace(A11->getRangeMap()); + RCP range12 = tpetraVectorSpace(A12->getRangeMap()); + RCP range21 = tpetraVectorSpace(A21->getRangeMap()); - Teko::LinearOp thA11 = Thyra::tpetraLinearOp(range11, domain11, A11); - Teko::LinearOp thA12 = Thyra::tpetraLinearOp(range12, domain12, A12); - Teko::LinearOp thA21 = Thyra::tpetraLinearOp(range21, domain21, A21); - Teko::LinearOp thA11_9Pt = Thyra::tpetraLinearOp(range11, domain11, A119Pt); + Teko::LinearOp thA11 = Thyra::tpetraLinearOp(range11, domain11, A11); + Teko::LinearOp thA12 = Thyra::tpetraLinearOp(range12, domain12, A12); + Teko::LinearOp thA21 = Thyra::tpetraLinearOp(range21, domain21, A21); + Teko::LinearOp thA11_9Pt = Thyra::tpetraLinearOp(range11, domain11, A119Pt); // Bang together the parameter list. Right now, all the MueLu details is // hardwired in the MueLu-TpetraQ2Q1 classes. We probably want to switch @@ -287,65 +300,65 @@ int main(int argc, char *argv[]) { // via parameter lists. RCP stratimikosList = rcp(new ParameterList); - stratimikosList->set("Linear Solver Type", "Belos"); + stratimikosList->set("Linear Solver Type", "Belos"); stratimikosList->set("Preconditioner Type", "MueLu-TpetraQ2Q1"); ParameterList& BelosList = stratimikosList->sublist("Linear Solver Types").sublist("Belos"); - BelosList.set("Solver Type", "Block GMRES"); // FIXME: should it be "Pseudo Block GMRES"? - BelosList.sublist("VerboseObject").set("Verbosity Level", "low"); // this is needed, as otherwise Stratimikos ignores Belos output + BelosList.set("Solver Type", "Block GMRES"); // FIXME: should it be "Pseudo Block GMRES"? + BelosList.sublist("VerboseObject").set("Verbosity Level", "low"); // this is needed, as otherwise Stratimikos ignores Belos output ParameterList& GmresDetails = BelosList.sublist("Solver Types").sublist("Block GMRES"); - GmresDetails.set("Maximum Iterations", 100); - GmresDetails.set("Convergence Tolerance", tol); - GmresDetails.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); - GmresDetails.set("Output Frequency", 1); - GmresDetails.set("Output Style", Belos::Brief); + GmresDetails.set("Maximum Iterations", 100); + GmresDetails.set("Convergence Tolerance", tol); + GmresDetails.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); + GmresDetails.set("Output Frequency", 1); + GmresDetails.set("Output Style", Belos::Brief); ParameterList& Q2Q1List = stratimikosList->sublist("Preconditioner Types").sublist("MueLu-TpetraQ2Q1"); - Q2Q1List.set("useFilters", useFilters); - Q2Q1List.set("tau_1", tau_1); - Q2Q1List.set("tau_2", tau_2); - Q2Q1List.set("Velcoords", Vcoords); - Q2Q1List.set("Prescoords", Pcoords); - Q2Q1List.set("p2vMap", p2vMap); - Q2Q1List.set("A11", thA11); - Q2Q1List.set("A12", thA12); - Q2Q1List.set("A21", thA21); - Q2Q1List.set("A11_9Pt", thA11_9Pt); + Q2Q1List.set("useFilters", useFilters); + Q2Q1List.set("tau_1", tau_1); + Q2Q1List.set("tau_2", tau_2); + Q2Q1List.set("Velcoords", Vcoords); + Q2Q1List.set("Prescoords", Pcoords); + Q2Q1List.set("p2vMap", p2vMap); + Q2Q1List.set("A11", thA11); + Q2Q1List.set("A12", thA12); + Q2Q1List.set("A21", thA21); + Q2Q1List.set("A11_9Pt", thA11_9Pt); Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, Teuchos::Ptr(&Q2Q1List), *comm); std::cout << "Input parameters: " << *stratimikosList << std::endl; // Stratimikos vodou - typedef Thyra::PreconditionerFactoryBase Base; - typedef Thyra::LinearOpWithSolveFactoryBase LOWSFB; - typedef Thyra::LinearOpWithSolveBase LOWSB; - typedef Thyra::MultiVectorBase TH_Mvb; + typedef Thyra::PreconditionerFactoryBase Base; + typedef Thyra::LinearOpWithSolveFactoryBase LOWSFB; + typedef Thyra::LinearOpWithSolveBase LOWSB; + typedef Thyra::MultiVectorBase TH_Mvb; Stratimikos::LinearSolverBuilder linearSolverBuilder; - //Thyra::addMueLuToStratimikosBuilder(linearSolverBuilder); + // Thyra::addMueLuToStratimikosBuilder(linearSolverBuilder); - Stratimikos::enableMueLu(linearSolverBuilder); // Epetra - Stratimikos::enableMueLuTpetraQ2Q1(linearSolverBuilder, "MueLu-TpetraQ2Q1"); // Tpetra + Stratimikos::enableMueLu(linearSolverBuilder); // Epetra + Stratimikos::enableMueLuTpetraQ2Q1(linearSolverBuilder, "MueLu-TpetraQ2Q1"); // Tpetra linearSolverBuilder.setParameterList(stratimikosList); RCP lowsFactory = Thyra::createLinearSolveStrategy(linearSolverBuilder); - RCP nsA = lowsFactory->createOp(); + RCP nsA = lowsFactory->createOp(); // I've hacked together a big matrix that does not use strided maps by // simply reading the data again. Normally, this would be supplied by Eric // Cyr and would be Teko operators. - int numElem = A12->getRangeMap()->getLocalNumElements() + A21->getRangeMap()->getLocalNumElements(); + int numElem = A12->getRangeMap()->getLocalNumElements() + A21->getRangeMap()->getLocalNumElements(); RCP fullMap = Utilities::Map2TpetraMap(*(MapFactory::createUniformContigMap(Xpetra::UseTpetra, numElem, comm))); RCP A; if (!binary) A = Reader::readSparseFile((prefix + "BigA.mm").c_str(), fullMap, fullMap, fullMap, fullMap, true, true, false); else - A = ReadBinary((prefix + "BigA.dat").c_str(), comm); + A = ReadBinary((prefix + "BigA.dat").c_str(), comm); const RCP > thA = Thyra::createLinearOp(A); Thyra::initializeOp(*lowsFactory, thA, nsA.ptr()); @@ -366,8 +379,8 @@ int main(int argc, char *argv[]) { // Set the initial guess Dirichlet points to the proper value. // This step is pretty important as the preconditioner may return zero at Dirichlet points ArrayRCP dirBCs = Utilities::DetectDirichletRows(*MueLu::TpetraCrs_To_XpetraMatrix(rcp_dynamic_cast(A))); - ArrayRCP tXdata = tX->getDataNonConst(0); - ArrayRCP tBdata = tB->getData(0); + ArrayRCP tXdata = tX->getDataNonConst(0); + ArrayRCP tBdata = tB->getData(0); for (LO i = 0; i < tXdata.size(); i++) if (dirBCs[i]) tXdata[i] = tBdata[i]; @@ -389,5 +402,5 @@ int main(int argc, char *argv[]) { } TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); - return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); + return (success ? EXIT_SUCCESS : EXIT_FAILURE); } diff --git a/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp b/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp index f760776a9f38..56cad37696e0 100644 --- a/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp +++ b/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp @@ -73,17 +73,16 @@ #include "SetupRegionMatrix_def.hpp" #include "SetupRegionSmoothers_def.hpp" - #if defined(HAVE_MUELU_AMESOS2) #include #include #endif -using Teuchos::RCP; -using Teuchos::ArrayRCP; using Teuchos::Array; +using Teuchos::ArrayRCP; using Teuchos::ArrayView; using Teuchos::ParameterList; +using Teuchos::RCP; /*! \brief Create coarse level maps with continuous GIDs * @@ -95,12 +94,11 @@ using Teuchos::ParameterList; * to deal with them in particular. */ template -void createContinuousCoarseLevelMaps(const RCP > rowMap, ///< row map - const RCP > colMap, ///< column map - RCP >& contRowMap, ///< row map with continuous GIDs - RCP >& contColMap ///< column map with continuous GIDs - ) -{ +void createContinuousCoarseLevelMaps(const RCP> rowMap, ///< row map + const RCP> colMap, ///< column map + RCP>& contRowMap, ///< row map with continuous GIDs + RCP>& contColMap ///< column map with continuous GIDs +) { #include "Xpetra_UseShortNamesOrdinal.hpp" // /!\ This function is pure ordinal, no scalar type is passed as input // This means that use only three template paramters and that we @@ -114,8 +112,7 @@ void createContinuousCoarseLevelMaps(const RCPgetComm()); return; -} // createContinuousCoarseLevelMaps - +} // createContinuousCoarseLevelMaps /* Reconstruct coarse-level maps (assuming fully structured grids) * @@ -133,8 +130,7 @@ void createContinuousCoarseLevelMaps(const RCP void MakeCoarseLevelMaps(const int maxRegPerGID, - Teuchos::RCP > regHierarchy) { - + Teuchos::RCP> regHierarchy) { #include "Xpetra_UseShortNames.hpp" #include "MueLu_UseShortNames.hpp" @@ -142,39 +138,39 @@ void MakeCoarseLevelMaps(const int maxRegPerGID, RCP level0 = regHierarchy->GetLevel(0); - const GO GO_INV = Teuchos::OrdinalTraits::invalid(); - const int numLevels = regHierarchy->GetNumLevels(); + const GO GO_INV = Teuchos::OrdinalTraits::invalid(); + const int numLevels = regHierarchy->GetNumLevels(); // RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); // Teuchos::FancyOStream& out = *fancy; const int myRank = level0->GetComm()->getRank(); Teuchos::Array coarseCompositeToRegionLIDs; - Teuchos::ArrayView compositeToRegionLIDs = level0->Get > ("compositeToRegionLIDs"); + Teuchos::ArrayView compositeToRegionLIDs = level0->Get>("compositeToRegionLIDs"); - for(int currentLevel = 1; currentLevel < numLevels; ++currentLevel) { - RCP level = regHierarchy->GetLevel(currentLevel); - RCP regProlong = level->Get >("P"); - RCP regRowMap = regProlong->getColMap(); + for (int currentLevel = 1; currentLevel < numLevels; ++currentLevel) { + RCP level = regHierarchy->GetLevel(currentLevel); + RCP regProlong = level->Get>("P"); + RCP regRowMap = regProlong->getColMap(); - RCP levelFine = regHierarchy->GetLevel(currentLevel-1); - RCP regRowImportFine = levelFine->Get >("rowImport"); - RCP regMatFine = levelFine->Get >("A"); - RCP regRowMapFine = regMatFine->getRowMap(); + RCP levelFine = regHierarchy->GetLevel(currentLevel - 1); + RCP regRowImportFine = levelFine->Get>("rowImport"); + RCP regMatFine = levelFine->Get>("A"); + RCP regRowMapFine = regMatFine->getRowMap(); // Extracting some basic information about local mesh in composite/region format const size_t numFineRegionNodes = regProlong->getLocalNumRows(); const size_t numFineCompositeNodes = compositeToRegionLIDs.size(); const size_t numFineDuplicateNodes = numFineRegionNodes - numFineCompositeNodes; - const size_t numCoarseRegionNodes = regProlong->getColMap()->getLocalNumElements(); + const size_t numCoarseRegionNodes = regProlong->getColMap()->getLocalNumElements(); // Find the regionLIDs associated with local duplicated nodes // This will allow us to later loop only on duplicated nodes size_t countComposites = 0, countDuplicates = 0; Array fineDuplicateLIDs(numFineDuplicateNodes); - for(size_t regionIdx = 0; regionIdx < numFineRegionNodes; ++regionIdx) { - if(compositeToRegionLIDs[countComposites] == static_cast(regionIdx)) { + for (size_t regionIdx = 0; regionIdx < numFineRegionNodes; ++regionIdx) { + if (compositeToRegionLIDs[countComposites] == static_cast(regionIdx)) { ++countComposites; } else { fineDuplicateLIDs[countDuplicates] = regionIdx; @@ -183,17 +179,16 @@ void MakeCoarseLevelMaps(const int maxRegPerGID, } // We gather the coarse GIDs associated with each fine point in the local composite mesh part. - RCP > coarseCompositeGIDs - = Xpetra::VectorFactory::Build(regRowImportFine->getSourceMap(), false); - Teuchos::ArrayRCP coarseCompositeGIDsData = coarseCompositeGIDs->getDataNonConst(0); + RCP> coarseCompositeGIDs = Xpetra::VectorFactory::Build(regRowImportFine->getSourceMap(), false); + Teuchos::ArrayRCP coarseCompositeGIDsData = coarseCompositeGIDs->getDataNonConst(0); - for(size_t compositeNodeIdx = 0; compositeNodeIdx < numFineCompositeNodes; ++compositeNodeIdx) { - ArrayView coarseRegionLID; // Should contain a single value - ArrayView dummyData; // Should contain a single value + for (size_t compositeNodeIdx = 0; compositeNodeIdx < numFineCompositeNodes; ++compositeNodeIdx) { + ArrayView coarseRegionLID; // Should contain a single value + ArrayView dummyData; // Should contain a single value regProlong->getLocalRowView(compositeToRegionLIDs[compositeNodeIdx], - coarseRegionLID, - dummyData); - if(coarseRegionLID.size() == 1) { + coarseRegionLID, + dummyData); + if (coarseRegionLID.size() == 1) { coarseCompositeGIDsData[compositeNodeIdx] = regProlong->getColMap()->getGlobalElement(coarseRegionLID[0]); } else { coarseCompositeGIDsData[compositeNodeIdx] = -1; @@ -202,42 +197,42 @@ void MakeCoarseLevelMaps(const int maxRegPerGID, // We communicate the above GIDs to their duplicate so that we can replace GIDs of the region // column map and form the quasiRegion column map. - RCP > coarseQuasiregionGIDs; - RCP > coarseRegionGIDs; + RCP> coarseQuasiregionGIDs; + RCP> coarseRegionGIDs; compositeToRegional(coarseCompositeGIDs, coarseQuasiregionGIDs, coarseRegionGIDs, regRowMapFine, regRowImportFine); - RCP > regionsPerGIDWithGhosts = - Xpetra::MultiVectorFactory::Build(regRowMapFine, - maxRegPerGID, - false); - RCP> interfaceGIDs = Xpetra::MultiVectorFactory::Build(regRowMapFine, - maxRegPerGID, - false); - - Array > regionsPerGIDWithGhostsFine(maxRegPerGID); - Array > regionsPerGIDWithGhostsCoarse(maxRegPerGID); - Array > interfaceGIDsCoarse(maxRegPerGID); - for(size_t idx = 0; idx < static_cast(maxRegPerGID); ++idx) { - regionsPerGIDWithGhostsFine[idx] = levelFine->Get > >("regionsPerGIDWithGhosts")->getData(idx); + RCP> regionsPerGIDWithGhosts = + Xpetra::MultiVectorFactory::Build(regRowMapFine, + maxRegPerGID, + false); + RCP> interfaceGIDs = Xpetra::MultiVectorFactory::Build(regRowMapFine, + maxRegPerGID, + false); + + Array> regionsPerGIDWithGhostsFine(maxRegPerGID); + Array> regionsPerGIDWithGhostsCoarse(maxRegPerGID); + Array> interfaceGIDsCoarse(maxRegPerGID); + for (size_t idx = 0; idx < static_cast(maxRegPerGID); ++idx) { + regionsPerGIDWithGhostsFine[idx] = levelFine->Get>>("regionsPerGIDWithGhosts")->getData(idx); regionsPerGIDWithGhostsCoarse[idx] = regionsPerGIDWithGhosts->getDataNonConst(idx); - interfaceGIDsCoarse[idx] = interfaceGIDs->getDataNonConst(idx); - for(size_t coarseIdx = 0; - coarseIdx < regionsPerGIDWithGhosts->getLocalLength(); ++coarseIdx) { + interfaceGIDsCoarse[idx] = interfaceGIDs->getDataNonConst(idx); + for (size_t coarseIdx = 0; + coarseIdx < regionsPerGIDWithGhosts->getLocalLength(); ++coarseIdx) { regionsPerGIDWithGhostsCoarse[idx][coarseIdx] = -1; - interfaceGIDsCoarse[idx][coarseIdx] = 0; + interfaceGIDsCoarse[idx][coarseIdx] = 0; } } - for(size_t fineIdx = 0; fineIdx < numFineRegionNodes; ++fineIdx) { - ArrayView coarseRegionLID; // Should contain a single value - ArrayView dummyData; // Should contain a single value + for (size_t fineIdx = 0; fineIdx < numFineRegionNodes; ++fineIdx) { + ArrayView coarseRegionLID; // Should contain a single value + ArrayView dummyData; // Should contain a single value regProlong->getLocalRowView(fineIdx, - coarseRegionLID, - dummyData); + coarseRegionLID, + dummyData); const LO coarseIdx = coarseRegionLID[0]; // Now fill regionPerGIDWithGhostsCoarse[:][coarseRegionLID] @@ -251,15 +246,19 @@ void MakeCoarseLevelMaps(const int maxRegPerGID, // For now let us assume that maxRegPerGID is constant and hope for the best. LO countFinePIDs = 0; LO countCoarsePIDs = 0; - for(LO idx = 0; idx < maxRegPerGID; ++idx) { - if(-1 < regionsPerGIDWithGhostsFine[idx][fineIdx]) {++countFinePIDs;} - if(-1 < regionsPerGIDWithGhostsCoarse[idx][coarseIdx]) {++countCoarsePIDs;} + for (LO idx = 0; idx < maxRegPerGID; ++idx) { + if (-1 < regionsPerGIDWithGhostsFine[idx][fineIdx]) { + ++countFinePIDs; + } + if (-1 < regionsPerGIDWithGhostsCoarse[idx][coarseIdx]) { + ++countCoarsePIDs; + } } - if(countCoarsePIDs < countFinePIDs) { - for(LO idx = 0; idx < countFinePIDs; ++idx) { + if (countCoarsePIDs < countFinePIDs) { + for (LO idx = 0; idx < countFinePIDs; ++idx) { regionsPerGIDWithGhostsCoarse[idx][coarseIdx] = regionsPerGIDWithGhostsFine[idx][fineIdx]; - if(regionsPerGIDWithGhostsCoarse[idx][coarseIdx] == myRank) { + if (regionsPerGIDWithGhostsCoarse[idx][coarseIdx] == myRank) { interfaceGIDsCoarse[idx][coarseIdx] = regRowMap->getGlobalElement(coarseIdx); } } @@ -268,12 +267,12 @@ void MakeCoarseLevelMaps(const int maxRegPerGID, Array fineRegionDuplicateCoarseLIDs(numFineDuplicateNodes); Array fineRegionDuplicateCoarseGIDs(numFineDuplicateNodes); - for(size_t duplicateIdx = 0; duplicateIdx < numFineDuplicateNodes; ++duplicateIdx) { - ArrayView coarseRegionLID; // Should contain a single value - ArrayView dummyData; // Should contain a single value + for (size_t duplicateIdx = 0; duplicateIdx < numFineDuplicateNodes; ++duplicateIdx) { + ArrayView coarseRegionLID; // Should contain a single value + ArrayView dummyData; // Should contain a single value regProlong->getLocalRowView(fineDuplicateLIDs[duplicateIdx], - coarseRegionLID, - dummyData); + coarseRegionLID, + dummyData); fineRegionDuplicateCoarseLIDs[duplicateIdx] = regProlong->getColMap()->getGlobalElement(coarseRegionLID[0]); fineRegionDuplicateCoarseGIDs[duplicateIdx] = (coarseQuasiregionGIDs->getDataNonConst(0))[fineDuplicateLIDs[duplicateIdx]]; } @@ -283,17 +282,17 @@ void MakeCoarseLevelMaps(const int maxRegPerGID, coarseCompositeToRegionLIDs.resize(numCoarseRegionNodes); Array coarseQuasiregRowMapData = regProlong->getColMap()->getLocalElementList(); Array coarseCompRowMapData(numCoarseRegionNodes, -1); - for(size_t regionIdx = 0; regionIdx < numCoarseRegionNodes; ++regionIdx) { + for (size_t regionIdx = 0; regionIdx < numCoarseRegionNodes; ++regionIdx) { const GO initialValue = coarseQuasiregRowMapData[regionIdx]; - for(size_t duplicateIdx = 0; duplicateIdx < numFineDuplicateNodes; ++duplicateIdx) { - if((initialValue == fineRegionDuplicateCoarseLIDs[duplicateIdx]) && - (fineRegionDuplicateCoarseGIDs[duplicateIdx] < coarseQuasiregRowMapData[regionIdx]) && - (-1 < fineRegionDuplicateCoarseGIDs[duplicateIdx])){ + for (size_t duplicateIdx = 0; duplicateIdx < numFineDuplicateNodes; ++duplicateIdx) { + if ((initialValue == fineRegionDuplicateCoarseLIDs[duplicateIdx]) && + (fineRegionDuplicateCoarseGIDs[duplicateIdx] < coarseQuasiregRowMapData[regionIdx]) && + (-1 < fineRegionDuplicateCoarseGIDs[duplicateIdx])) { coarseQuasiregRowMapData[regionIdx] = fineRegionDuplicateCoarseGIDs[duplicateIdx]; } } - if(initialValue == coarseQuasiregRowMapData[regionIdx]) { - coarseCompRowMapData[countCoarseComposites] = coarseQuasiregRowMapData[regionIdx]; + if (initialValue == coarseQuasiregRowMapData[regionIdx]) { + coarseCompRowMapData[countCoarseComposites] = coarseQuasiregRowMapData[regionIdx]; coarseCompositeToRegionLIDs[countCoarseComposites] = regionIdx; ++countCoarseComposites; } @@ -302,59 +301,57 @@ void MakeCoarseLevelMaps(const int maxRegPerGID, coarseCompositeToRegionLIDs.resize(countCoarseComposites); // We are now ready to fill up the outputs - RCP regRowMapCurrent = regProlong->getColMap(); + RCP regRowMapCurrent = regProlong->getColMap(); - RCP quasiRegRowMap = MapFactory::Build(regProlong->getColMap()->lib(), - GO_INV, - coarseQuasiregRowMapData(), - regProlong->getColMap()->getIndexBase(), - regProlong->getColMap()->getComm()); + RCP quasiRegRowMap = MapFactory::Build(regProlong->getColMap()->lib(), + GO_INV, + coarseQuasiregRowMapData(), + regProlong->getColMap()->getIndexBase(), + regProlong->getColMap()->getComm()); - RCP compRowMap = MapFactory::Build(regProlong->getColMap()->lib(), - GO_INV, - coarseCompRowMapData(), - regProlong->getColMap()->getIndexBase(), - regProlong->getColMap()->getComm()); + RCP compRowMap = MapFactory::Build(regProlong->getColMap()->lib(), + GO_INV, + coarseCompRowMapData(), + regProlong->getColMap()->getIndexBase(), + regProlong->getColMap()->getComm()); RCP regRowImportCurrent = ImportFactory::Build(compRowMap, quasiRegRowMap); // Now generate matvec data Teuchos::ArrayRCP regionMatVecLIDs; - Teuchos::RCP > regionInterfaceImporter; + Teuchos::RCP> regionInterfaceImporter; SetupMatVec(interfaceGIDs, regionsPerGIDWithGhosts, regRowMapCurrent, regRowImportCurrent, regionMatVecLIDs, regionInterfaceImporter); // Fill level with the outputs - level->Set >("regionMatVecLIDs",regionMatVecLIDs); - level->Set > >("regionInterfaceImporter", regionInterfaceImporter); - level->Set > >("interfaceGIDs", interfaceGIDs); - level->Set > >("regionsPerGIDWithGhosts", regionsPerGIDWithGhosts); - level->Set > >("rowImport",regRowImportCurrent); + level->Set>("regionMatVecLIDs", regionMatVecLIDs); + level->Set>>("regionInterfaceImporter", regionInterfaceImporter); + level->Set>>("interfaceGIDs", interfaceGIDs); + level->Set>>("regionsPerGIDWithGhosts", regionsPerGIDWithGhosts); + level->Set>>("rowImport", regRowImportCurrent); // Finally reset compositeToRegionLIDs compositeToRegionLIDs = coarseCompositeToRegionLIDs(); - } // Loop over numLevels -} // MakeCoarseLevelMaps - + } // Loop over numLevels +} // MakeCoarseLevelMaps // Form the composite coarse level operator -template -void MakeCoarseCompositeOperator(RCP >& compRowMap, - RCP >& quasiRegRowMap, - RCP >& quasiRegColMap, - RCP >& regRowImporter, - RCP >& regMatrix, - RCP::coordinateType, LocalOrdinal, GlobalOrdinal, Node> >& regCoarseCoordinates, - RCP >& coarseCompOp, - RCP::coordinateType, LocalOrdinal, GlobalOrdinal, Node> >& compCoarseCoordinates, - const bool makeCompCoords) -{ +template +void MakeCoarseCompositeOperator(RCP>& compRowMap, + RCP>& quasiRegRowMap, + RCP>& quasiRegColMap, + RCP>& regRowImporter, + RCP>& regMatrix, + RCP::coordinateType, LocalOrdinal, GlobalOrdinal, Node>>& regCoarseCoordinates, + RCP>& coarseCompOp, + RCP::coordinateType, LocalOrdinal, GlobalOrdinal, Node>>& compCoarseCoordinates, + const bool makeCompCoords) { #include "Xpetra_UseShortNames.hpp" using CoordType = typename Teuchos::ScalarTraits::coordinateType; - coarseCompOp = MatrixFactory::Build(compRowMap, - // This estimate is very conservative and probably costs us lots of memory... - 8*regMatrix->getCrsGraph()->getLocalMaxNumRowEntries()); + coarseCompOp = MatrixFactory::Build(compRowMap, + // This estimate is very conservative and probably costs us lots of memory... + 8 * regMatrix->getCrsGraph()->getLocalMaxNumRowEntries()); regionalToComposite(regMatrix, quasiRegRowMap, quasiRegColMap, @@ -366,21 +363,21 @@ void MakeCoarseCompositeOperator(RCPsetObjectLabel("coarse composite operator"); // Create coarse composite coordinates for repartitioning - if(makeCompCoords) { - const int check = regMatrix->getRowMap()->getLocalNumElements() % regCoarseCoordinates->getMap()->getLocalNumElements(); + if (makeCompCoords) { + const int check = regMatrix->getRowMap()->getLocalNumElements() % regCoarseCoordinates->getMap()->getLocalNumElements(); TEUCHOS_ASSERT(check == 0); RCP compCoordMap; - RCP > regCoordImporter; - if(dofsPerNode == 1) { - compCoordMap = compRowMap; + RCP> regCoordImporter; + if (dofsPerNode == 1) { + compCoordMap = compRowMap; regCoordImporter = regRowImporter; } else { using size_type = typename Teuchos::Array::size_type; Array compCoordMapData(compRowMap->getLocalNumElements() / dofsPerNode); ArrayView compRowMapData = compRowMap->getLocalElementList(); - for(size_type nodeIdx = 0; nodeIdx < compCoordMapData.size(); ++nodeIdx) { - compCoordMapData[nodeIdx] = compRowMapData[nodeIdx*dofsPerNode] / dofsPerNode; + for (size_type nodeIdx = 0; nodeIdx < compCoordMapData.size(); ++nodeIdx) { + compCoordMapData[nodeIdx] = compRowMapData[nodeIdx * dofsPerNode] / dofsPerNode; } compCoordMap = MapFactory::Build(compRowMap->lib(), compRowMap->getGlobalNumElements() / dofsPerNode, @@ -388,21 +385,20 @@ void MakeCoarseCompositeOperator(RCPgetIndexBase(), compRowMap->getComm()); - RCP > quasiRegCoordMap; - Array quasiRegCoordMapData(quasiRegRowMap->getLocalNumElements() / dofsPerNode); - ArrayView quasiRegRowMapData = quasiRegRowMap->getLocalElementList(); - for(size_type nodeIdx = 0; nodeIdx < quasiRegCoordMapData.size(); ++nodeIdx) { - quasiRegCoordMapData[nodeIdx] = quasiRegRowMapData[nodeIdx*dofsPerNode] / dofsPerNode; - } - quasiRegCoordMap = MapFactory::Build(quasiRegRowMap->lib(), - quasiRegRowMap->getGlobalNumElements() / dofsPerNode, - quasiRegCoordMapData(), - quasiRegRowMap->getIndexBase(), - quasiRegRowMap->getComm()); - regCoordImporter = ImportFactory::Build(compCoordMap, quasiRegCoordMap); + RCP> quasiRegCoordMap; + Array quasiRegCoordMapData(quasiRegRowMap->getLocalNumElements() / dofsPerNode); + ArrayView quasiRegRowMapData = quasiRegRowMap->getLocalElementList(); + for (size_type nodeIdx = 0; nodeIdx < quasiRegCoordMapData.size(); ++nodeIdx) { + quasiRegCoordMapData[nodeIdx] = quasiRegRowMapData[nodeIdx * dofsPerNode] / dofsPerNode; + } + quasiRegCoordMap = MapFactory::Build(quasiRegRowMap->lib(), + quasiRegRowMap->getGlobalNumElements() / dofsPerNode, + quasiRegCoordMapData(), + quasiRegRowMap->getIndexBase(), + quasiRegRowMap->getComm()); + regCoordImporter = ImportFactory::Build(compCoordMap, quasiRegCoordMap); } - compCoarseCoordinates = Xpetra::MultiVectorFactory - ::Build(compCoordMap, regCoarseCoordinates->getNumVectors()); + compCoarseCoordinates = Xpetra::MultiVectorFactory::Build(compCoordMap, regCoarseCoordinates->getNumVectors()); TEUCHOS_ASSERT(Teuchos::nonnull(compCoarseCoordinates)); // The following looks like regionalToComposite for Vector @@ -415,8 +411,7 @@ void MakeCoarseCompositeOperator(RCPreplaceMap(regCoordImporter->getTargetMap()); compCoarseCoordinates->doExport(*quasiRegCoarseCoordinates, *(regCoordImporter), Xpetra::INSERT); } -} // MakeCoarseCompositeOperator - +} // MakeCoarseCompositeOperator /* Create a direct solver for a composite operator * @@ -425,16 +420,15 @@ void MakeCoarseCompositeOperator(RCP -RCP, Tpetra::MultiVector > > -MakeCompositeDirectSolver(RCP >& compOp) -{ - using Tpetra_CrsMatrix = Tpetra::CrsMatrix; +template +RCP, Tpetra::MultiVector>> +MakeCompositeDirectSolver(RCP>& compOp) { + using Tpetra_CrsMatrix = Tpetra::CrsMatrix; using Tpetra_MultiVector = Tpetra::MultiVector; - using Utilities = MueLu::Utilities; + using Utilities = MueLu::Utilities; using Teuchos::TimeMonitor; - RCP > coarseSolver; + RCP> coarseSolver; { RCP tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("MakeCompositeDirectSolver: 1 - Setup"))); @@ -444,7 +438,7 @@ MakeCompositeDirectSolver(RCP(amesos2SolverName, tMat); + coarseSolver = Amesos2::create(amesos2SolverName, tMat); Teuchos::ParameterList amesos2_params("Amesos2"); amesos2_params.sublist(amesos2SolverName).set("IsContiguous", false, "Are GIDs Contiguous"); @@ -459,20 +453,19 @@ MakeCompositeDirectSolver(RCP +template void RebalanceCoarseCompositeOperator(const int rebalanceNumPartitions, - RCP >& coarseCompOp, - RCP::coordinateType, LocalOrdinal, GlobalOrdinal, Node> >& compCoarseCoordinates, - RCP >& rebalancedCompOp, - RCP::coordinateType, LocalOrdinal, GlobalOrdinal, Node> >& rebalancedCoordinates, - RCP >& rebalanceImporter) -{ + RCP>& coarseCompOp, + RCP::coordinateType, LocalOrdinal, GlobalOrdinal, Node>>& compCoarseCoordinates, + RCP>& rebalancedCompOp, + RCP::coordinateType, LocalOrdinal, GlobalOrdinal, Node>>& rebalancedCoordinates, + RCP>& rebalanceImporter) { #include "MueLu_UseShortNames.hpp" using CoordType = typename Teuchos::ScalarTraits::coordinateType; using Teuchos::TimeMonitor; @@ -496,21 +489,21 @@ void RebalanceCoarseCompositeOperator(const int rebalanceNumPartitions, RCP zoltan = rcp(new Zoltan2Interface()); - level.Set > ("A", coarseCompOp); - level.Set >("Coordinates", compCoarseCoordinates); -// int numPartitions = Get(level, "number of partitions"); + level.Set>("A", coarseCompOp); + level.Set>("Coordinates", compCoarseCoordinates); + // int numPartitions = Get(level, "number of partitions"); RCP repart = rcp(new RepartitionFactory()); Teuchos::ParameterList paramList; - paramList.set("repartition: remap parts", false); - if( numPartitions > 0 ){ // If number of coarse rebalance partitions was provided by the user. - level.Set ("number of partitions", numPartitions); + paramList.set("repartition: remap parts", false); + if (numPartitions > 0) { // If number of coarse rebalance partitions was provided by the user. + level.Set("number of partitions", numPartitions); } else { Teuchos::ParameterList paramListHeuristic; - paramListHeuristic.set("repartition: start level", 1); + paramListHeuristic.set("repartition: start level", 1); RCP repartHeuristic = rcp(new RepartitionHeuristicFactory()); repartHeuristic->SetParameterList(paramListHeuristic); - repart->SetFactory("number of partitions", repartHeuristic ); + repart->SetFactory("number of partitions", repartHeuristic); } repart->SetParameterList(paramList); repart->SetFactory("Partition", zoltan); @@ -526,13 +519,13 @@ void RebalanceCoarseCompositeOperator(const int rebalanceNumPartitions, ParameterList XpetraList; XpetraList.set("Restrict Communicator", true); - XpetraList.set("Timer Label","MueLu::RebalanceAc-for-coarseAMG"); + XpetraList.set("Timer Label", "MueLu::RebalanceAc-for-coarseAMG"); // Build rebalanced coarse composite operator - rebalancedCompOp = MatrixFactory::Build(coarseCompOp, *rebalanceImporter, *rebalanceImporter, rebalanceImporter->getTargetMap(), rebalanceImporter->getTargetMap(), rcp(&XpetraList,false)); - if (!rebalancedCompOp.is_null()) { - rebalancedCompOp->SetFixedBlockSize(coarseCompOp->GetFixedBlockSize()); - } + rebalancedCompOp = MatrixFactory::Build(coarseCompOp, *rebalanceImporter, *rebalanceImporter, rebalanceImporter->getTargetMap(), rebalanceImporter->getTargetMap(), rcp(&XpetraList, false)); + if (!rebalancedCompOp.is_null()) { + rebalancedCompOp->SetFixedBlockSize(coarseCompOp->GetFixedBlockSize()); + } // Build rebalanced coarse coordinates (The following code is borrowed from MueLu_RebalanceTransferFactory_def.hpp) LO blkSize = coarseCompOp->GetFixedBlockSize(); @@ -544,24 +537,24 @@ void RebalanceCoarseCompositeOperator(const int rebalanceNumPartitions, // NOTE: there is an implicit assumption here: we assume that dof any node are enumerated consequently // Proper fix would require using decomposition similar to how we construct importer in the // RepartitionFactory - RCP origMap = compCoarseCoordinates->getMap(); - GO indexBase = origMap->getIndexBase(); + RCP origMap = compCoarseCoordinates->getMap(); + GO indexBase = origMap->getIndexBase(); - ArrayView OEntries = rebalanceImporter->getTargetMap()->getLocalElementList(); - LO numEntries = OEntries.size()/blkSize; + ArrayView OEntries = rebalanceImporter->getTargetMap()->getLocalElementList(); + LO numEntries = OEntries.size() / blkSize; ArrayRCP Entries(numEntries); for (LO i = 0; i < numEntries; i++) - Entries[i] = (OEntries[i*blkSize]-indexBase)/blkSize + indexBase; + Entries[i] = (OEntries[i * blkSize] - indexBase) / blkSize + indexBase; RCP targetMap = MapFactory::Build(origMap->lib(), origMap->getGlobalNumElements(), Entries(), indexBase, origMap->getComm()); - coordImporter = ImportFactory::Build(origMap, targetMap); + coordImporter = ImportFactory::Build(origMap, targetMap); } - rebalancedCoordinates = Xpetra::MultiVectorFactory::Build(coordImporter->getTargetMap(), compCoarseCoordinates->getNumVectors()); + rebalancedCoordinates = Xpetra::MultiVectorFactory::Build(coordImporter->getTargetMap(), compCoarseCoordinates->getNumVectors()); rebalancedCoordinates->doImport(*compCoarseCoordinates, *coordImporter, Xpetra::INSERT); rebalancedCoordinates->replaceMap(rebalancedCoordinates->getMap()->removeEmptyProcesses()); return; -} // RebalanceCoarseCompositeOperator +} // RebalanceCoarseCompositeOperator #endif /* Create an AMG hierarchy for a composite operator @@ -569,12 +562,11 @@ void RebalanceCoarseCompositeOperator(const int rebalanceNumPartitions, * Create the hierarchy object and perform the multigrid setup. * Finally, the hierarhcy object will be ready to be applied during the region MG V-cycle call. */ -template -RCP > -MakeCompositeAMGHierarchy(RCP >& compOp, +template +RCP> +MakeCompositeAMGHierarchy(RCP>& compOp, const std::string& xmlFileName, - RCP::coordinateType, LocalOrdinal, GlobalOrdinal, Node> > coordinates) -{ + RCP::coordinateType, LocalOrdinal, GlobalOrdinal, Node>> coordinates) { #include "MueLu_UseShortNames.hpp" using coordinates_type = typename Teuchos::ScalarTraits::coordinateType; @@ -587,24 +579,23 @@ MakeCompositeAMGHierarchy(RCP mueluParams = Teuchos::rcp(new ParameterList()); Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, mueluParams.ptr(), - *compOp->getRowMap()->getComm()); - + *compOp->getRowMap()->getComm()); // Get the user data sublist - const std::string userName = "user data"; + const std::string userName = "user data"; Teuchos::ParameterList& userParamList = mueluParams->sublist(userName); // Add nullspace information { // Compute nullspace RCP nullspace; - if((compOp->GetFixedBlockSize() == 1) || Teuchos::is_null(coordinates)) { // Scalar problem, constant nullspace + if ((compOp->GetFixedBlockSize() == 1) || Teuchos::is_null(coordinates)) { // Scalar problem, constant nullspace nullspace = MultiVectorFactory::Build(compOp->getRowMap(), 1); nullspace->putScalar(one); - } else if(compOp->GetFixedBlockSize() == 2) { // 2D Elasticity + } else if (compOp->GetFixedBlockSize() == 2) { // 2D Elasticity nullspace = MultiVectorFactory::Build(compOp->getRowMap(), 3); - Array > nullspaceData(3); - Array > coordinateData(2); + Array> nullspaceData(3); + Array> coordinateData(2); // Calculate center const coordinates_type cx = coordinates->getVector(0)->meanValue(); @@ -613,24 +604,24 @@ MakeCompositeAMGHierarchy(RCPgetData(0); coordinateData[1] = coordinates->getData(1); - for(int vecIdx = 0; vecIdx < 3; ++vecIdx) { + for (int vecIdx = 0; vecIdx < 3; ++vecIdx) { nullspaceData[vecIdx] = nullspace->getDataNonConst(vecIdx); } - for(size_t nodeIdx = 0; nodeIdx < coordinates->getLocalLength(); ++nodeIdx) { + for (size_t nodeIdx = 0; nodeIdx < coordinates->getLocalLength(); ++nodeIdx) { // translations - nullspaceData[0][2*nodeIdx + 0] = one; - nullspaceData[1][2*nodeIdx + 1] = one; + nullspaceData[0][2 * nodeIdx + 0] = one; + nullspaceData[1][2 * nodeIdx + 1] = one; // rotation about z axis - nullspaceData[2][2*nodeIdx + 0] = -(coordinateData[1][nodeIdx] - cy); - nullspaceData[2][2*nodeIdx + 1] = (coordinateData[0][nodeIdx] - cx); + nullspaceData[2][2 * nodeIdx + 0] = -(coordinateData[1][nodeIdx] - cy); + nullspaceData[2][2 * nodeIdx + 1] = (coordinateData[0][nodeIdx] - cx); } - } else if(compOp->GetFixedBlockSize() == 3) { // 3D Elasticity + } else if (compOp->GetFixedBlockSize() == 3) { // 3D Elasticity nullspace = MultiVectorFactory::Build(compOp->getRowMap(), 6); - Array > nullspaceData(6); - Array > coordinateData(3); + Array> nullspaceData(6); + Array> coordinateData(3); // Calculate center const coordinates_type cx = coordinates->getVector(0)->meanValue(); @@ -641,27 +632,27 @@ MakeCompositeAMGHierarchy(RCPgetData(1); coordinateData[2] = coordinates->getData(2); - for(int vecIdx = 0; vecIdx < 6; ++vecIdx) { + for (int vecIdx = 0; vecIdx < 6; ++vecIdx) { nullspaceData[vecIdx] = nullspace->getDataNonConst(vecIdx); } - for(size_t nodeIdx = 0; nodeIdx < coordinates->getLocalLength(); ++nodeIdx) { + for (size_t nodeIdx = 0; nodeIdx < coordinates->getLocalLength(); ++nodeIdx) { // translations - nullspaceData[0][3*nodeIdx + 0] = one; - nullspaceData[1][3*nodeIdx + 1] = one; - nullspaceData[2][3*nodeIdx + 2] = one; + nullspaceData[0][3 * nodeIdx + 0] = one; + nullspaceData[1][3 * nodeIdx + 1] = one; + nullspaceData[2][3 * nodeIdx + 2] = one; // rotation about z axis - nullspaceData[3][3*nodeIdx + 0] = -(coordinateData[1][nodeIdx] - cy); - nullspaceData[3][3*nodeIdx + 1] = (coordinateData[0][nodeIdx] - cx); + nullspaceData[3][3 * nodeIdx + 0] = -(coordinateData[1][nodeIdx] - cy); + nullspaceData[3][3 * nodeIdx + 1] = (coordinateData[0][nodeIdx] - cx); // rotation about x axis - nullspaceData[4][3*nodeIdx + 1] = -(coordinateData[2][nodeIdx] - cz); - nullspaceData[4][3*nodeIdx + 2] = (coordinateData[1][nodeIdx] - cy); + nullspaceData[4][3 * nodeIdx + 1] = -(coordinateData[2][nodeIdx] - cz); + nullspaceData[4][3 * nodeIdx + 2] = (coordinateData[1][nodeIdx] - cy); // rotation about y axis - nullspaceData[5][3*nodeIdx + 0] = (coordinateData[2][nodeIdx] - cz); - nullspaceData[5][3*nodeIdx + 2] = -(coordinateData[0][nodeIdx] - cx); + nullspaceData[5][3 * nodeIdx + 0] = (coordinateData[2][nodeIdx] - cz); + nullspaceData[5][3 * nodeIdx + 2] = -(coordinateData[0][nodeIdx] - cx); } } @@ -681,7 +672,7 @@ MakeCompositeAMGHierarchy(RCPSetVerbLevel(MueLu::VERB_NONE); return compOpHiearchy; -} // MakeCompositeAMGHierarchy - +} // MakeCompositeAMGHierarchy - // Make interface scaling factors recursively -template +// Make interface scaling factors recursively +template void MakeInterfaceScalingFactors(const int numLevels, - Teuchos::RCP > regHierarchy) -{ + Teuchos::RCP> regHierarchy) { #include "Xpetra_UseShortNames.hpp" #include "MueLu_UseShortNames.hpp" const SC SC_ONE = Teuchos::ScalarTraits::one(); - TEUCHOS_TEST_FOR_EXCEPT_MSG(!(numLevels>0), "We require numLevel > 0. Probably, numLevel has not been set, yet."); + TEUCHOS_TEST_FOR_EXCEPT_MSG(!(numLevels > 0), "We require numLevel > 0. Probably, numLevel has not been set, yet."); for (int l = 0; l < numLevels; l++) { - RCP level = regHierarchy->GetLevel(l); - RCP regMat = level->Get >("A"); - RCP regRowMap = regMat->getRowMap(); - RCP > regRowImporters = level->Get > >("rowImport"); + RCP level = regHierarchy->GetLevel(l); + RCP regMat = level->Get>("A"); + RCP regRowMap = regMat->getRowMap(); + RCP> regRowImporters = level->Get>>("rowImport"); // initialize region vector with all ones. - RCP > regInterfaceScalings = VectorFactory::Build(regRowMap); + RCP> regInterfaceScalings = VectorFactory::Build(regRowMap); regInterfaceScalings->putScalar(SC_ONE); // transform to composite layout while adding interface values via the Export() combine mode - RCP compInterfaceScalingSum = VectorFactory::Build( regRowImporters->getSourceMap() , true); + RCP compInterfaceScalingSum = VectorFactory::Build(regRowImporters->getSourceMap(), true); regionalToComposite(regInterfaceScalings, compInterfaceScalingSum, regRowImporters); /* transform composite layout back to regional layout. Now, GIDs associated * with region interface should carry a scaling factor (!= 1). */ - RCP quasiRegInterfaceScaling; // Is that vector really needed? + RCP quasiRegInterfaceScaling; // Is that vector really needed? compositeToRegional(compInterfaceScalingSum, quasiRegInterfaceScaling, regInterfaceScalings, regRowMap, regRowImporters); - level->Set > >("regInterfaceScalings", regInterfaceScalings); + level->Set>>("regInterfaceScalings", regInterfaceScalings); } -} // MakeInterfaceScalingFactors +} // MakeInterfaceScalingFactors - -template +template void createRegionHierarchy(const int numDimensions, const Array lNodesPerDim, const std::string aggregationRegionType, RCP& interfaceParams, const int maxRegPerGID, RCP& coarseSolverData, - Array >& smootherParams, + Array>& smootherParams, RCP hierarchyData, - RCP > & regHierarchy, - const bool keepCoarseCoords) -{ + RCP>& regHierarchy, + const bool keepCoarseCoords) { #include "Xpetra_UseShortNames.hpp" #include "MueLu_UseShortNames.hpp" using Teuchos::TimeMonitor; // This monitor times everything and gets the overall setting cost RCP tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("createRegionHierarchy"))); - using Hierarchy = MueLu::Hierarchy; - using Utilities = MueLu::Utilities; - using DirectCoarseSolver = Amesos2::Solver, Tpetra::MultiVector >; + using Hierarchy = MueLu::Hierarchy; + using Utilities = MueLu::Utilities; + using DirectCoarseSolver = Amesos2::Solver, Tpetra::MultiVector>; // std::cout << mapComp->getComm()->getRank() << " | Setting up MueLu hierarchies ..." << std::endl; int numLevels = 0; @@ -770,37 +757,39 @@ void createRegionHierarchy(const int numDimensions, // resize group containers on each level for (int l = 0; l < numLevels; ++l) { // Also doing some initialization in the smootherParams - if(l > 0) {smootherParams[l] = rcp(new Teuchos::ParameterList(*smootherParams[0]));} + if (l > 0) { + smootherParams[l] = rcp(new Teuchos::ParameterList(*smootherParams[0])); + } } } - RCP level0 = regHierarchy->GetLevel(0); - RCP regMat = level0->Get >("A"); - RCP revisedRowMap = regMat->getRowMap(); + RCP level0 = regHierarchy->GetLevel(0); + RCP regMat = level0->Get>("A"); + RCP revisedRowMap = regMat->getRowMap(); /* Get coarse level matrices and prolongators from MueLu hierarchy * Note: fine level has been dealt with previously, so we start at level 1 here. */ - using real_type = typename Teuchos::ScalarTraits::coordinateType; + using real_type = typename Teuchos::ScalarTraits::coordinateType; using realvaluedmultivector_type = Xpetra::MultiVector; RCP regCoarseCoordinates; - for (int l = 1; l < numLevels; ++l) { // Note: we start at level 1 (which is the first coarse level) + for (int l = 1; l < numLevels; ++l) { // Note: we start at level 1 (which is the first coarse level) RCP level = regHierarchy->GetLevel(l); - if(keepCoarseCoords && (l == numLevels - 1)) { - regCoarseCoordinates = level->Get >("Coordinates2", MueLu::NoFactory::get()); + if (keepCoarseCoords && (l == numLevels - 1)) { + regCoarseCoordinates = level->Get>("Coordinates2", MueLu::NoFactory::get()); } - RCP regMatrices = level->Get >("A", MueLu::NoFactory::get()); + RCP regMatrices = level->Get>("A", MueLu::NoFactory::get()); // Create residual and solution vectors and cache them for vCycle apply std::string levelName("level"); levelName += std::to_string(l); ParameterList& levelList = hierarchyData->sublist(levelName, false, "list of data on current level"); - RCP regRes = VectorFactory::Build(revisedRowMap, true); - RCP regSol = VectorFactory::Build(revisedRowMap, true); + RCP regRes = VectorFactory::Build(revisedRowMap, true); + RCP regSol = VectorFactory::Build(revisedRowMap, true); - levelList.set >("residual", regRes, "Cached residual vector"); - levelList.set >("solution", regSol, "Cached solution vector"); + levelList.set>("residual", regRes, "Cached residual vector"); + levelList.set>("solution", regSol, "Cached solution vector"); } // std::cout << mapComp->getComm()->getRank() << " | MakeCoarseLevelMaps ..." << std::endl; @@ -824,12 +813,12 @@ void createRegionHierarchy(const int numDimensions, // Only set the smoother up to the last but one level // if we want to use a smoother on the coarse level // we will handle that separately with "coarse solver type" - for(int levelIdx = 0; levelIdx < numLevels - 1; ++levelIdx) { - RCP level = regHierarchy->GetLevel(levelIdx); - RCP regMatrix = level->Get >("A", MueLu::NoFactory::get()); - RCP regRowMap = regMatrix->getRowMap(); - RCP > regRowImporter = level->Get > >("rowImport"); - RCP > regInterfaceScalings = level->Get > >("regInterfaceScalings"); + for (int levelIdx = 0; levelIdx < numLevels - 1; ++levelIdx) { + RCP level = regHierarchy->GetLevel(levelIdx); + RCP regMatrix = level->Get>("A", MueLu::NoFactory::get()); + RCP regRowMap = regMatrix->getRowMap(); + RCP> regRowImporter = level->Get>>("rowImport"); + RCP> regInterfaceScalings = level->Get>>("regInterfaceScalings"); smootherParams[levelIdx]->set("smoother: level", levelIdx); smootherSetup(smootherParams[levelIdx], regRowMap, @@ -839,40 +828,39 @@ void createRegionHierarchy(const int numDimensions, // std::cout << mapComp->getComm()->getRank() << " | CreateCoarseSolver ..." << std::endl; - tmLocal = Teuchos::null; - tmLocal = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("createRegionHierarchy: CreateCoarseSolver"))); + tmLocal = Teuchos::null; + tmLocal = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("createRegionHierarchy: CreateCoarseSolver"))); const std::string coarseSolverType = coarseSolverData->get("coarse solver type"); if (coarseSolverType == "smoother") { - RCP level = regHierarchy->GetLevel(numLevels - 1); - RCP regMatrix = level->Get >("A", MueLu::NoFactory::get()); - RCP regRowMap = regMatrix->getRowMap(); - RCP regRowImporter = level->Get >("rowImport"); - RCP regInterfaceScalings = level->Get >("regInterfaceScalings"); + RCP level = regHierarchy->GetLevel(numLevels - 1); + RCP regMatrix = level->Get>("A", MueLu::NoFactory::get()); + RCP regRowMap = regMatrix->getRowMap(); + RCP regRowImporter = level->Get>("rowImport"); + RCP regInterfaceScalings = level->Get>("regInterfaceScalings"); // Set the smoother on the coarsest level. - const std::string smootherXMLFileName = coarseSolverData->get("smoother xml file"); + const std::string smootherXMLFileName = coarseSolverData->get("smoother xml file"); RCP coarseSmootherParams = smootherParams[numLevels - 1]; Teuchos::updateParametersFromXmlFileAndBroadcast(smootherXMLFileName, coarseSmootherParams.ptr(), *level->GetComm()); coarseSmootherParams->set("smoother: level", numLevels - 1); coarseSmootherParams->print(); - smootherSetup(smootherParams[numLevels - 1], regRowMap, regMatrix, regInterfaceScalings, regRowImporter); - } else if( (coarseSolverType == "direct") || (coarseSolverType == "amg") ) { + } else if ((coarseSolverType == "direct") || (coarseSolverType == "amg")) { // A composite coarse matrix is needed // std::cout << mapComp->getComm()->getRank() << " | MakeCoarseCompositeOperator ..." << std::endl; - RCP level = regHierarchy->GetLevel(numLevels - 1); - RCP regMatrix = level->Get >("A", MueLu::NoFactory::get()); - RCP regRowImporter = level->Get >("rowImport"); - RCP compRowMap = regRowImporter->getSourceMap(); - RCP quasiRegRowMap = regRowImporter->getTargetMap(); - RCP quasiRegColMap = regRowImporter->getTargetMap();// col map same as row map. + RCP level = regHierarchy->GetLevel(numLevels - 1); + RCP regMatrix = level->Get>("A", MueLu::NoFactory::get()); + RCP regRowImporter = level->Get>("rowImport"); + RCP compRowMap = regRowImporter->getSourceMap(); + RCP quasiRegRowMap = regRowImporter->getTargetMap(); + RCP quasiRegColMap = regRowImporter->getTargetMap(); // col map same as row map. - RCP > coarseCompOp; + RCP> coarseCompOp; RCP compCoarseCoordinates; MakeCoarseCompositeOperator(compRowMap, quasiRegRowMap, @@ -884,14 +872,14 @@ void createRegionHierarchy(const int numDimensions, compCoarseCoordinates, keepCoarseCoords); - coarseSolverData->set >("compCoarseRowMap", coarseCompOp->getRowMap()); + coarseSolverData->set>("compCoarseRowMap", coarseCompOp->getRowMap()); // std::cout << mapComp->getComm()->getRank() << " | MakeCoarseCompositeSolver ..." << std::endl; if (coarseSolverType == "direct") { RCP coarseDirectSolver = MakeCompositeDirectSolver(coarseCompOp); - coarseSolverData->set >("direct solver object", coarseDirectSolver); + coarseSolverData->set>("direct solver object", coarseDirectSolver); } else if (coarseSolverType == "amg") { - if(keepCoarseCoords == false) { + if (keepCoarseCoords == false) { std::cout << "WARNING: you requested a coarse AMG solver but you did not request coarse coordinates to be kept, repartitioning is not possible!" << std::endl; } @@ -899,21 +887,21 @@ void createRegionHierarchy(const int numDimensions, std::string amgXmlFileName = coarseSolverData->get("amg xml file"); #if defined(HAVE_MUELU_ZOLTAN2) && defined(HAVE_MPI) const bool coarseSolverRebalance = coarseSolverData->get("coarse solver rebalance"); - if(keepCoarseCoords == true && coarseSolverRebalance == true ){ - RCP > rebalancedCompOp; - RCP::coordinateType, LocalOrdinal, GlobalOrdinal, Node> > rebalancedCoordinates; + if (keepCoarseCoords == true && coarseSolverRebalance == true) { + RCP> rebalancedCompOp; + RCP::coordinateType, LocalOrdinal, GlobalOrdinal, Node>> rebalancedCoordinates; RCP rebalanceImporter; const int rebalanceNumPartitions = coarseSolverData->get("coarse rebalance num partitions"); RebalanceCoarseCompositeOperator(rebalanceNumPartitions, - coarseCompOp, - compCoarseCoordinates, - rebalancedCompOp, - rebalancedCoordinates, - rebalanceImporter); - coarseSolverData->set >("rebalanceImporter", rebalanceImporter); - - if( !rebalancedCompOp.is_null() ) + coarseCompOp, + compCoarseCoordinates, + rebalancedCompOp, + rebalancedCoordinates, + rebalanceImporter); + coarseSolverData->set>("rebalanceImporter", rebalanceImporter); + + if (!rebalancedCompOp.is_null()) coarseAMGHierarchy = MakeCompositeAMGHierarchy(rebalancedCompOp, amgXmlFileName, rebalancedCoordinates); } else { @@ -922,12 +910,12 @@ void createRegionHierarchy(const int numDimensions, #else coarseAMGHierarchy = MakeCompositeAMGHierarchy(coarseCompOp, amgXmlFileName, compCoarseCoordinates); #endif - coarseSolverData->set >("amg hierarchy object", coarseAMGHierarchy); + coarseSolverData->set>("amg hierarchy object", coarseAMGHierarchy); } - } else { + } else { TEUCHOS_TEST_FOR_EXCEPT_MSG(false, "Unknown coarse solver type."); } -} // createRegionHierarchy +} // createRegionHierarchy -#endif // MUELU_SETUPREGIONHIERARCHY_DEF_HPP +#endif // MUELU_SETUPREGIONHIERARCHY_DEF_HPP diff --git a/packages/muelu/research/regionMG/src/SetupRegionMatrix_def.hpp b/packages/muelu/research/regionMG/src/SetupRegionMatrix_def.hpp index 73650a565d6f..587607edc716 100644 --- a/packages/muelu/research/regionMG/src/SetupRegionMatrix_def.hpp +++ b/packages/muelu/research/regionMG/src/SetupRegionMatrix_def.hpp @@ -48,8 +48,8 @@ #include #include -#define RegionsSpanProcs 1 -#define MultipleRegionsPerProc 2 +#define RegionsSpanProcs 1 +#define MultipleRegionsPerProc 2 #include #include @@ -62,20 +62,18 @@ #include #include -using Teuchos::RCP; -using Teuchos::ArrayRCP; using Teuchos::Array; +using Teuchos::ArrayRCP; +using Teuchos::RCP; /*! \brief Find common regions of two nodes * */ -template -Teuchos::Array findCommonRegions(const GlobalOrdinal nodeA, ///< GID of first node - const GlobalOrdinal nodeB, ///< GID of second node - const Array > nodesToRegions, ///< mapping of nodes to regions - RCP > nodesToRegionsMap - ) -{ +template +Teuchos::Array findCommonRegions(const GlobalOrdinal nodeA, ///< GID of first node + const GlobalOrdinal nodeB, ///< GID of second node + const Array> nodesToRegions, ///< mapping of nodes to regions + RCP> nodesToRegionsMap) { #include "Xpetra_UseShortNamesOrdinal.hpp" using Teuchos::TimeMonitor; RCP tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("findCommonRegions: 1 - Extract regions"))); @@ -103,7 +101,7 @@ Teuchos::Array findCommonRegions(const GlobalOrdinal nodeA, ///< GID of fir tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("findCommonRegions: 3 - Find commons"))); std::vector::iterator it = std::set_intersection(regionsA.begin(), - regionsA.end(), regionsB.begin(), regionsB.end(), commonRegions.begin()); + regionsA.end(), regionsB.begin(), regionsB.end(), commonRegions.begin()); commonRegions.resize(it - commonRegions.begin()); tm = Teuchos::null; @@ -121,21 +119,20 @@ Teuchos::Array findCommonRegions(const GlobalOrdinal nodeA, ///< GID of fir return finalCommonRegions; } - template -void MakeQuasiregionMatrices(const RCP > AComp, - RCP > regionsPerGIDWithGhosts, - RCP > rowMap, - RCP > colMap, - RCP >& rowImport, - RCP >& quasiRegionMats, +void MakeQuasiregionMatrices(const RCP> AComp, + RCP> regionsPerGIDWithGhosts, + RCP> rowMap, + RCP> colMap, + RCP>& rowImport, + RCP>& quasiRegionMats, const Teuchos::ArrayRCP& regionMatVecLIDs) { #include "Xpetra_UseShortNames.hpp" using Teuchos::RCP; using Teuchos::TimeMonitor; - Array > regionPerGIDWithGhostsData(regionsPerGIDWithGhosts->getNumVectors()); - for(size_t vecIdx = 0; vecIdx < regionsPerGIDWithGhosts->getNumVectors(); ++vecIdx) { + Array> regionPerGIDWithGhostsData(regionsPerGIDWithGhosts->getNumVectors()); + for (size_t vecIdx = 0; vecIdx < regionsPerGIDWithGhosts->getNumVectors(); ++vecIdx) { regionPerGIDWithGhostsData[vecIdx] = regionsPerGIDWithGhosts->getData(vecIdx); } @@ -166,12 +163,12 @@ void MakeQuasiregionMatrices(const RCP quasiRegionCrsWrap = Teuchos::rcp_dynamic_cast(quasiRegionMats); - RCP quasiRegionCrs = quasiRegionCrsWrap->getCrsMatrix(); + RCP quasiRegionCrs = quasiRegionCrsWrap->getCrsMatrix(); // Grab first and last element of sorted interface LIDs Array interfaceLIDs(regionMatVecLIDs()); std::sort(interfaceLIDs.begin(), interfaceLIDs.end()); - auto vecEnd = std::unique(interfaceLIDs.begin(), interfaceLIDs.end()); + auto vecEnd = std::unique(interfaceLIDs.begin(), interfaceLIDs.end()); auto vecStart = interfaceLIDs.begin(); GO rowGID; @@ -179,18 +176,18 @@ void MakeQuasiregionMatrices(const RCPgetGlobalElement(*row); - numEntries = quasiRegionMats->getNumEntriesInLocalRow(*row); // number of entries in this row - Array values(numEntries); // non-zeros in this row - Array colInds(numEntries); // local column indices + for (auto row = vecStart; row < vecEnd; ++row) { + rowGID = rowMap->getGlobalElement(*row); + numEntries = quasiRegionMats->getNumEntriesInLocalRow(*row); // number of entries in this row + Array values(numEntries); // non-zeros in this row + Array colInds(numEntries); // local column indices quasiRegionMats->getLocalRowCopy(*row, colInds, values, numEntries); - for (std::size_t entryIdx = 0; entryIdx < numEntries; ++entryIdx) { // loop over all entries in this row - col = colInds[entryIdx]; + for (std::size_t entryIdx = 0; entryIdx < numEntries; ++entryIdx) { // loop over all entries in this row + col = colInds[entryIdx]; colGID = colMap->getGlobalElement(col); Array commonRegions; - if (rowGID != colGID) { // Skip the diagonal entry. It will be processed later. + if (rowGID != colGID) { // Skip the diagonal entry. It will be processed later. commonRegions = findCommonRegions(rowGID, colGID, regionPerGIDWithGhostsData, regionsPerGIDWithGhosts->getMap()); } @@ -208,18 +205,17 @@ void MakeQuasiregionMatrices(const RCPfillComplete(); tm = Teuchos::null; -} // MakeQuasiregionMatrices - +} // MakeQuasiregionMatrices template -void MakeRegionMatrices(const RCP > AComp, - const RCP > mapComp, - RCP > rowMap, - RCP > revisedRowMap, - RCP > revisedColMap, - RCP >& rowImport, - RCP >& quasiRegionMats, - RCP >& regionMats) { +void MakeRegionMatrices(const RCP> AComp, + const RCP> mapComp, + RCP> rowMap, + RCP> revisedRowMap, + RCP> revisedColMap, + RCP>& rowImport, + RCP>& quasiRegionMats, + RCP>& regionMats) { #include "Xpetra_UseShortNames.hpp" using Teuchos::RCP; using Teuchos::TimeMonitor; @@ -230,40 +226,40 @@ void MakeRegionMatrices(const RCP regionCrsMat = Teuchos::rcp_dynamic_cast(regionMats)->getCrsMatrix(); + // Extract current region CrsMatrix + RCP regionCrsMat = Teuchos::rcp_dynamic_cast(regionMats)->getCrsMatrix(); - // Extract current quasi-region CrsMatrix - RCP quasiRegionCrsMat = Teuchos::rcp_dynamic_cast(quasiRegionMats)->getCrsMatrix(); + // Extract current quasi-region CrsMatrix + RCP quasiRegionCrsMat = Teuchos::rcp_dynamic_cast(quasiRegionMats)->getCrsMatrix(); - // Pull out the data from the quasi-region CrsMatrix - ArrayRCP rowptrQuasiRegion; - ArrayRCP colindQuasiRegion; - ArrayRCP valuesQuasiRegion; - quasiRegionCrsMat->getAllValues(rowptrQuasiRegion, colindQuasiRegion, valuesQuasiRegion); + // Pull out the data from the quasi-region CrsMatrix + ArrayRCP rowptrQuasiRegion; + ArrayRCP colindQuasiRegion; + ArrayRCP valuesQuasiRegion; + quasiRegionCrsMat->getAllValues(rowptrQuasiRegion, colindQuasiRegion, valuesQuasiRegion); - // Do a deep copy of values - // (at least we've been doing deep copies so far, maybe we could do shallow copies to save time?) - ArrayRCP rowptrRegion(rowptrQuasiRegion.size()); - ArrayRCP colindRegion(colindQuasiRegion.size()); - ArrayRCP valuesRegion(valuesQuasiRegion.size()); + // Do a deep copy of values + // (at least we've been doing deep copies so far, maybe we could do shallow copies to save time?) + ArrayRCP rowptrRegion(rowptrQuasiRegion.size()); + ArrayRCP colindRegion(colindQuasiRegion.size()); + ArrayRCP valuesRegion(valuesQuasiRegion.size()); - regionCrsMat->allocateAllValues(valuesRegion.size(), rowptrRegion, colindRegion, valuesRegion); + regionCrsMat->allocateAllValues(valuesRegion.size(), rowptrRegion, colindRegion, valuesRegion); - for(LocalOrdinal idx = 0; idx < static_cast(rowptrRegion.size()); ++idx) { - rowptrRegion[idx] = rowptrQuasiRegion[idx]; - } + for (LocalOrdinal idx = 0; idx < static_cast(rowptrRegion.size()); ++idx) { + rowptrRegion[idx] = rowptrQuasiRegion[idx]; + } - for(LocalOrdinal idx = 0; idx < static_cast(colindRegion.size()); ++idx) { - colindRegion[idx] = colindQuasiRegion[idx]; - valuesRegion[idx] = valuesQuasiRegion[idx]; - } + for (LocalOrdinal idx = 0; idx < static_cast(colindRegion.size()); ++idx) { + colindRegion[idx] = colindQuasiRegion[idx]; + valuesRegion[idx] = valuesQuasiRegion[idx]; + } - // Set and fillComplete the region CrsMatrix - regionCrsMat->setAllValues(rowptrRegion, colindRegion, valuesRegion); - regionCrsMat->expertStaticFillComplete(revisedRowMap, revisedRowMap); + // Set and fillComplete the region CrsMatrix + regionCrsMat->setAllValues(rowptrRegion, colindRegion, valuesRegion); + regionCrsMat->expertStaticFillComplete(revisedRowMap, revisedRowMap); } tm = Teuchos::null; @@ -280,7 +276,7 @@ void MakeRegionMatrices(const RCP quasiRegNspViolation = VectorFactory::Build(rowMap, true); - regNspViolation = VectorFactory::Build(revisedRowMap, true); + regNspViolation = VectorFactory::Build(revisedRowMap, true); compositeToRegional(nspViolation, quasiRegNspViolation, regNspViolation, revisedRowMap, rowImport); @@ -328,9 +324,8 @@ void MakeRegionMatrices(const RCPapply(*regNsp, *regCorrection); regionMats->SetFixedBlockSize(AComp->GetFixedBlockSize()); - RCP regDiag = Teuchos::null; - regDiag = VectorFactory::Build(revisedRowMap, true); + regDiag = VectorFactory::Build(revisedRowMap, true); regionMats->getLocalDiagCopy(*regDiag); regDiag->update(-SC_ONE, *regCorrection, SC_ONE, *regNspViolation, SC_ONE); @@ -339,8 +334,7 @@ void MakeRegionMatrices(const RCPreplaceDiag(*regDiag); tm = Teuchos::null; -} // MakeRegionMatrices - +} // MakeRegionMatrices /*! \brief Transform regional matrix to composite layout * @@ -356,18 +350,17 @@ void MakeRegionMatrices(const RCP -void regionalToComposite(const RCP >& regMat, ///< Matrix in region layout [in] - const RCP > rowMap, ///< row maps in quasiRegion layout [in] - const RCP > colMap, ///< col maps in quasiRegion layout [in] - const RCP > rowImport, ///< row importer in region layout [in] - const Xpetra::CombineMode combineMode, ///< Combine mode for import/export [in] - RCP >& compMat ///< Matrix in composite layout [in/out] - ) -{ +void regionalToComposite(const RCP>& regMat, ///< Matrix in region layout [in] + const RCP> rowMap, ///< row maps in quasiRegion layout [in] + const RCP> colMap, ///< col maps in quasiRegion layout [in] + const RCP> rowImport, ///< row importer in region layout [in] + const Xpetra::CombineMode combineMode, ///< Combine mode for import/export [in] + RCP>& compMat ///< Matrix in composite layout [in/out] +) { #include "Xpetra_UseShortNames.hpp" - using Teuchos::TimeMonitor; - using Teuchos::rcp; using std::size_t; + using Teuchos::rcp; + using Teuchos::TimeMonitor; RCP tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("regionalToComposite: Matrix"))); @@ -386,8 +379,8 @@ void regionalToComposite(const RCP quasiRegMat; { quasiRegMat = rcp(new CrsMatrixWrap(rowMap, - colMap, - regMat->getCrsGraph()->getLocalMaxNumRowEntries())); + colMap, + regMat->getCrsGraph()->getLocalMaxNumRowEntries())); // Extract current quasi-region CrsMatrix RCP quasiRegionCrsMat = Teuchos::rcp_dynamic_cast(quasiRegMat)->getCrsMatrix(); @@ -409,11 +402,11 @@ void regionalToComposite(const RCPallocateAllValues(valuesQuasiRegion.size(), rowptrQuasiRegion, colindQuasiRegion, valuesQuasiRegion); - for(LocalOrdinal idx = 0; idx < static_cast(rowptrQuasiRegion.size()); ++idx) { + for (LocalOrdinal idx = 0; idx < static_cast(rowptrQuasiRegion.size()); ++idx) { rowptrQuasiRegion[idx] = rowptrRegion[idx]; } - for(LocalOrdinal idx = 0; idx < static_cast(colindQuasiRegion.size()); ++idx) { + for (LocalOrdinal idx = 0; idx < static_cast(colindQuasiRegion.size()); ++idx) { colindQuasiRegion[idx] = colindRegion[idx]; valuesQuasiRegion[idx] = valuesRegion[idx]; } @@ -426,7 +419,7 @@ void regionalToComposite(const RCP partialCompMat; partialCompMat = MatrixFactory::Build(compMat->getRowMap(), - 8*regMat->getCrsGraph()->getLocalMaxNumRowEntries()); + 8 * regMat->getCrsGraph()->getLocalMaxNumRowEntries()); partialCompMat->doExport(*(quasiRegMat), *(rowImport), Xpetra::INSERT); partialCompMat->fillComplete(); @@ -436,29 +429,28 @@ void regionalToComposite(const RCPfillComplete(); return; -} // regionalToComposite - +} // regionalToComposite /*! \brief Compute local data needed to perform a MatVec in region format */ template -void SetupMatVec(const Teuchos::RCP >& interfaceGIDsMV, - const Teuchos::RCP >& regionsPerGIDWithGhosts, - const Teuchos::RCP >& regionRowMap, - const Teuchos::RCP >& rowImport, +void SetupMatVec(const Teuchos::RCP>& interfaceGIDsMV, + const Teuchos::RCP>& regionsPerGIDWithGhosts, + const Teuchos::RCP>& regionRowMap, + const Teuchos::RCP>& rowImport, Teuchos::ArrayRCP& regionMatVecLIDs, - Teuchos::RCP >& regionInterfaceImporter) { + Teuchos::RCP>& regionInterfaceImporter) { #include "Xpetra_UseShortNamesOrdinal.hpp" using Teuchos::TimeMonitor; RCP tm; tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("SetupMatVec: 1 - sumInterfaceValues"))); - const LO maxRegPerGID = static_cast(regionsPerGIDWithGhosts->getNumVectors()); - const int myRank = regionRowMap->getComm()->getRank(); + const LO maxRegPerGID = static_cast(regionsPerGIDWithGhosts->getNumVectors()); + const int myRank = regionRowMap->getComm()->getRank(); interfaceGIDsMV->replaceMap(regionRowMap); - RCP > interfaceGIDs; + RCP> interfaceGIDs; interfaceGIDs = interfaceGIDsMV; sumInterfaceValues(interfaceGIDs, regionRowMap, rowImport); @@ -467,14 +459,13 @@ void SetupMatVec(const Teuchos::RCP regionMatVecLIDstmp; Teuchos::Array regionMatVecGIDs; - Array > regionsPerGIDWithGhostsData(maxRegPerGID); - Array > interfaceGIDsData(maxRegPerGID); - for(LO regionIdx = 0; regionIdx < maxRegPerGID; ++regionIdx) { + Array> regionsPerGIDWithGhostsData(maxRegPerGID); + Array> interfaceGIDsData(maxRegPerGID); + for (LO regionIdx = 0; regionIdx < maxRegPerGID; ++regionIdx) { regionsPerGIDWithGhostsData[regionIdx] = regionsPerGIDWithGhosts->getData(regionIdx); - interfaceGIDsData[regionIdx] = interfaceGIDs->getData(regionIdx); - for(LO idx = 0; idx < static_cast(regionsPerGIDWithGhostsData[regionIdx].size()); ++idx) { - if((regionsPerGIDWithGhostsData[regionIdx][idx] != -1) - && (regionsPerGIDWithGhostsData[regionIdx][idx] != myRank)) { + interfaceGIDsData[regionIdx] = interfaceGIDs->getData(regionIdx); + for (LO idx = 0; idx < static_cast(regionsPerGIDWithGhostsData[regionIdx].size()); ++idx) { + if ((regionsPerGIDWithGhostsData[regionIdx][idx] != -1) && (regionsPerGIDWithGhostsData[regionIdx][idx] != myRank)) { regionMatVecLIDstmp.push_back(idx); regionMatVecGIDs.push_back(interfaceGIDsData[regionIdx][idx]); } @@ -485,11 +476,11 @@ void SetupMatVec(const Teuchos::RCP regionInterfaceMap = Xpetra::MapFactory::Build(regionRowMap->lib(), - Teuchos::OrdinalTraits::invalid(), - regionMatVecGIDs(), - regionRowMap->getIndexBase(), - regionRowMap->getComm()); + RCP regionInterfaceMap = Xpetra::MapFactory::Build(regionRowMap->lib(), + Teuchos::OrdinalTraits::invalid(), + regionMatVecGIDs(), + regionRowMap->getIndexBase(), + regionRowMap->getComm()); tm = Teuchos::null; tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("SetupMatVec: 3 - Build importer"))); @@ -497,7 +488,7 @@ void SetupMatVec(const Teuchos::RCP -void -computeResidual(RCP >& regRes, ///< residual (to be evaluated) - const RCP > regX, ///< left-hand side (solution) - const RCP > regB, ///< right-hand side (forcing term) - const RCP > regionMats, ///< matrix in region format - const Teuchos::ParameterList& params ///< parameter with fast MatVec parameters and pre-computed communication patterns - ) -{ +void computeResidual(RCP>& regRes, ///< residual (to be evaluated) + const RCP> regX, ///< left-hand side (solution) + const RCP> regB, ///< right-hand side (forcing term) + const RCP> regionMats, ///< matrix in region format + const Teuchos::ParameterList& params ///< parameter with fast MatVec parameters and pre-computed communication patterns +) { #include "Xpetra_UseShortNames.hpp" using TST = Teuchos::ScalarTraits; using Teuchos::TimeMonitor; @@ -522,7 +511,7 @@ computeResidual(RCP >& // Get pre-communicated communication patterns for the fast MatVec const ArrayRCP regionInterfaceLIDs = params.get>("Fast MatVec: interface LIDs"); - const RCP regionInterfaceImporter = params.get>("Fast MatVec: interface importer"); + const RCP regionInterfaceImporter = params.get>("Fast MatVec: interface importer"); // Step 1: Compute region version of y = Ax and store it in regRes regionMats->apply(*regX, *regRes, Teuchos::NO_TRANS, TST::one(), TST::zero(), true, regionInterfaceImporter, regionInterfaceLIDs); @@ -531,6 +520,6 @@ computeResidual(RCP >& regRes->update(TST::one(), *regB, -TST::one(), *regRes, TST::zero()); tm = Teuchos::null; -} // computeResidual +} // computeResidual -#endif // MUELU_SETUPREGIONMATRIX_DEF_HPP +#endif // MUELU_SETUPREGIONMATRIX_DEF_HPP diff --git a/packages/muelu/research/regionMG/src/SetupRegionSmoothers_def.hpp b/packages/muelu/research/regionMG/src/SetupRegionSmoothers_def.hpp index 079feb75c5a9..563f337d2787 100644 --- a/packages/muelu/research/regionMG/src/SetupRegionSmoothers_def.hpp +++ b/packages/muelu/research/regionMG/src/SetupRegionSmoothers_def.hpp @@ -59,15 +59,14 @@ #include #include - #include "SetupRegionMatrix_def.hpp" #include "SetupRegionVector_def.hpp" -using Teuchos::RCP; -using Teuchos::ArrayRCP; using Teuchos::Array; +using Teuchos::ArrayRCP; using Teuchos::ArrayView; using Teuchos::ParameterList; +using Teuchos::RCP; /*! \brief Create list of valid smoother types * @@ -76,13 +75,12 @@ using Teuchos::ParameterList; * * ToDo: replace this list by an enum when we migrate to actual code. */ -std::map getListOfValidSmootherTypes() -{ +std::map getListOfValidSmootherTypes() { std::map smootherTypes; - smootherTypes.insert(std::pair("None", 0)); - smootherTypes.insert(std::pair("Jacobi", 1)); - smootherTypes.insert(std::pair("Gauss", 2)); - smootherTypes.insert(std::pair("SymmetricGauss", 3)); + smootherTypes.insert(std::pair("None", 0)); + smootherTypes.insert(std::pair("Jacobi", 1)); + smootherTypes.insert(std::pair("Gauss", 2)); + smootherTypes.insert(std::pair("SymmetricGauss", 3)); smootherTypes.insert(std::pair("Chebyshev", 4)); return smootherTypes; @@ -92,11 +90,11 @@ std::map getListOfValidSmootherTypes() * * Computes the inverse of the diagonal in region format and with interface scaling */ -template +template void computeInverseDiagonal(RCP params, - const RCP > revisedRowMap, - const RCP > regionMats, - const RCP > rowImport) ///< row importer in region layout [in] + const RCP > revisedRowMap, + const RCP > regionMats, + const RCP > rowImport) ///< row importer in region layout [in] { #include "Xpetra_UseShortNames.hpp" using Teuchos::TimeMonitor; @@ -115,22 +113,19 @@ void computeInverseDiagonal(RCP params, params->set >("smoothers: inverse diagonal", diagReg); } - /*! \brief Do Jacobi smoothing * * Perform Jacobi smoothing in the region layout using the true diagonal value * recovered from the splitted matrix. */ -template +template void jacobiIterate(RCP smootherParams, - RCP >& regX, // left-hand side (or solution) - const RCP > regB, // right-hand side (or residual) - const RCP > regionMats, // matrices in true region layout - const RCP > revisedRowMap, ///< revised row maps in region layout [in] (actually extracted from regionMats) - const RCP > rowImport,///< row importer in region layout [in] - bool& zeroInitGuess - ) -{ + RCP >& regX, // left-hand side (or solution) + const RCP > regB, // right-hand side (or residual) + const RCP > regionMats, // matrices in true region layout + const RCP > revisedRowMap, ///< revised row maps in region layout [in] (actually extracted from regionMats) + const RCP > rowImport, ///< row importer in region layout [in] + bool& zeroInitGuess) { #include "Xpetra_UseShortNames.hpp" using Teuchos::TimeMonitor; RCP tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Region Jacobi Iterate"))); @@ -138,19 +133,17 @@ void jacobiIterate(RCP smootherParams, // const Scalar SC_ZERO = Teuchos::ScalarTraits::zero(); const Scalar SC_ONE = Teuchos::ScalarTraits::one(); - const int maxIter = smootherParams->get ("smoother: sweeps"); + const int maxIter = smootherParams->get("smoother: sweeps"); const double damping = smootherParams->get("smoother: damping"); RCP diag_inv = smootherParams->get >("smoothers: inverse diagonal"); RCP regRes = VectorFactory::Build(revisedRowMap, true); for (int iter = 0; iter < maxIter; ++iter) { - // Update the residual vector if (zeroInitGuess) { regX->elementWiseMultiply(damping, *diag_inv, *regB, SC_ONE); - } - else { + } else { computeResidual(regRes, regX, regB, regionMats, *smootherParams); // update solution according to Jacobi's method @@ -160,8 +153,7 @@ void jacobiIterate(RCP smootherParams, } return; -} // jacobiIterate - +} // jacobiIterate /*! \brief Do Gauss-Seidel smoothing * @@ -169,23 +161,21 @@ void jacobiIterate(RCP smootherParams, * recovered from the splitted matrix. Off-diagonal values are just taken as they are * in region format. */ -template +template void GSIterate(RCP smootherParams, - RCP >& regX, // left-hand side (or solution) - const RCP > regB, // right-hand side (or residual) - const RCP > regionMats, // matrices in true region layout - const RCP > revisedRowMap, ///< revised row maps in region layout [in] (actually extracted from regionMats) - const RCP > rowImport,///< row importer in region layout [in] + RCP >& regX, // left-hand side (or solution) + const RCP > regB, // right-hand side (or residual) + const RCP > regionMats, // matrices in true region layout + const RCP > revisedRowMap, ///< revised row maps in region layout [in] (actually extracted from regionMats) + const RCP > rowImport, ///< row importer in region layout [in] bool& zeroInitGuess, - bool sgs = false - ) -{ + bool sgs = false) { #include "Xpetra_UseShortNames.hpp" using Teuchos::TimeMonitor; RCP tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Region Gauss-Seidel Iterate"))); // Extract user-given and pre-computed data from paremter list - const int maxIter = smootherParams->get("smoother: sweeps"); + const int maxIter = smootherParams->get("smoother: sweeps"); const double damping = smootherParams->get("smoother: damping"); RCP diag_inv = smootherParams->get >("smoothers: inverse diagonal"); @@ -195,21 +185,19 @@ void GSIterate(RCP smootherParams, // GS iteration loop for (int iter = 0; iter < maxIter; ++iter) { - // Update the residual vector - if (!zeroInitGuess) - { + if (!zeroInitGuess) { computeResidual(regRes, regX, regB, regionMats, *smootherParams); } // update the solution and the residual - using MT = typename Teuchos::ScalarTraits::magnitudeType; - RCP delta = VectorFactory::Build(regionMats->getRowMap(), true); - ArrayRCP ldelta = delta->getDataNonConst(0); - ArrayRCP OneregX = regX->getDataNonConst(0); + using MT = typename Teuchos::ScalarTraits::magnitudeType; + RCP delta = VectorFactory::Build(regionMats->getRowMap(), true); + ArrayRCP ldelta = delta->getDataNonConst(0); + ArrayRCP OneregX = regX->getDataNonConst(0); ArrayRCP OneregRes = regRes->getDataNonConst(0); - if (zeroInitGuess) { // copy regB to regRes + if (zeroInitGuess) { // copy regB to regRes ArrayRCP rhs = regB->getDataNonConst(0); for (size_t k = 0; k < numRows; ++k) OneregRes[k] = rhs[k]; } @@ -221,58 +209,55 @@ void GSIterate(RCP smootherParams, ArrayView AAcols; ArrayView AAvals; regionMats->getLocalRowView(k, AAcols, AAvals); - const int *Acols = AAcols.getRawPtr(); - const SC *Avals = AAvals.getRawPtr(); + const int* Acols = AAcols.getRawPtr(); + const SC* Avals = AAvals.getRawPtr(); const LO RowLeng = AAvals.size(); // Loop over entries in row k and perform GS iteration for (LO kk = 0; kk < RowLeng; kk++) { - OneregRes[k] -= Avals[kk]*ldelta[Acols[kk]]; + OneregRes[k] -= Avals[kk] * ldelta[Acols[kk]]; } - ldelta[k] = damping*Onediag[k]*OneregRes[k]; + ldelta[k] = damping * Onediag[k] * OneregRes[k]; OneregX[k] += ldelta[k]; } zeroInitGuess = false; - if( sgs ){ + if (sgs) { for (size_t k = numRows; k--;) { // Extract a single row ArrayView AAcols; ArrayView AAvals; regionMats->getLocalRowView(k, AAcols, AAvals); - const int *Acols = AAcols.getRawPtr(); - const SC *Avals = AAvals.getRawPtr(); + const int* Acols = AAcols.getRawPtr(); + const SC* Avals = AAvals.getRawPtr(); const LO RowLeng = AAvals.size(); // Loop over entries in row k and perform GS iteration for (LO kk = 0; kk < RowLeng; kk++) { - OneregRes[k] -= Avals[kk]*ldelta[Acols[kk]]; + OneregRes[k] -= Avals[kk] * ldelta[Acols[kk]]; } - ldelta[k] = damping*Onediag[k]*OneregRes[k]; + ldelta[k] = damping * Onediag[k] * OneregRes[k]; OneregX[k] += ldelta[k]; } } } return; -} // GS - +} // GS //! Transfer region vector to composite format and compute its 2-norm -template +template typename Teuchos::ScalarTraits::magnitudeType calcNorm2(RCP >& regVec, - const RCP > rowImport) -{ + const RCP > rowImport) { #include "Xpetra_UseShortNames.hpp" const RCP mapComp = rowImport->getSourceMap(); - RCP compVec = VectorFactory::Build(mapComp, true); + RCP compVec = VectorFactory::Build(mapComp, true); regionalToComposite(regVec, compVec, rowImport); typename Teuchos::ScalarTraits::magnitudeType norm = compVec->norm2(); return norm; -} // calcNorm2 - +} // calcNorm2 /*! Compute inner product of two region vectors * @@ -285,38 +270,35 @@ calcNorm2(RCP >& regVe * * @return Inner product of regX and regY */ -template +template Scalar dotProd(RCP >& regX, RCP >& regY, - const RCP > rowImport) -{ + const RCP > rowImport) { #include "Xpetra_UseShortNames.hpp" const RCP mapComp = rowImport->getSourceMap(); - RCP compX = VectorFactory::Build(mapComp, true); - RCP compY = VectorFactory::Build(mapComp, true); + RCP compX = VectorFactory::Build(mapComp, true); + RCP compY = VectorFactory::Build(mapComp, true); regionalToComposite(regX, compX, rowImport); regionalToComposite(regY, compY, rowImport); SC dotVal = compX->dot(*compY); return dotVal; -} // dotProd - +} // dotProd -template +template Scalar powerMethod(RCP params, const RCP > regionMats, const RCP > revisedRowMap, const RCP > rowImport, - const int numIters) -{ + const int numIters) { #include "Xpetra_UseShortNames.hpp" RCP diag_inv = params->get >("smoothers: inverse diagonal"); - const SC SC_ZERO = Teuchos::ScalarTraits::zero(); - const SC SC_ONE = Teuchos::ScalarTraits::one(); - SC lambdaMax = SC_ZERO; + const SC SC_ZERO = Teuchos::ScalarTraits::zero(); + const SC SC_ONE = Teuchos::ScalarTraits::one(); + SC lambdaMax = SC_ZERO; SC RQ_top, RQ_bottom, norm; RCP regX = VectorFactory::Build(revisedRowMap, true); @@ -325,100 +307,96 @@ powerMethod(RCP params, regX->randomize(); norm = calcNorm2(regX, rowImport); - regX->scale( SC_ONE / norm ); + regX->scale(SC_ONE / norm); for (int iter = 0; iter < numIters; ++iter) { - - regionMats->apply(*regX, *regY); // A.apply (x, y); - sumInterfaceValues(regY, revisedRowMap, rowImport); // step 2 + regionMats->apply(*regX, *regY); // A.apply (x, y); + sumInterfaceValues(regY, revisedRowMap, rowImport); // step 2 // Scale by inverse of diagonal regY->elementWiseMultiply(SC_ONE, *diag_inv, *regY, SC_ZERO); - RQ_top = dotProd(regY, regX, rowImport); + RQ_top = dotProd(regY, regX, rowImport); RQ_bottom = dotProd(regX, regX, rowImport); lambdaMax = RQ_top / RQ_bottom; norm = calcNorm2(regY, rowImport); - if (norm == SC_ZERO) { // Return something reasonable. + if (norm == SC_ZERO) { // Return something reasonable. return SC_ZERO; } - regX->update( SC_ONE / norm, *regY, SC_ZERO); - + regX->update(SC_ONE / norm, *regY, SC_ZERO); } return lambdaMax; -} // powerMethod +} // powerMethod /*! \brief Performs Chebyshev specific setup * * Use power method to estimate lambdaMx */ -template +template void chebyshevSetup(RCP params, - const RCP > regionMats, - const RCP > regionInterfaceScaling, - const RCP > revisedRowMap, - const RCP > rowImport) { + const RCP > regionMats, + const RCP > regionInterfaceScaling, + const RCP > revisedRowMap, + const RCP > rowImport) { #include "Xpetra_UseShortNames.hpp" using Teuchos::TimeMonitor; RCP tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Region Chebyshev Setup"))); // Calculate lambdaMax Scalar lambdaMax = 1; - lambdaMax = powerMethod(params, - regionMats, - revisedRowMap, - rowImport, - 10); - params->set< Scalar >("chebyshev: lambda max", lambdaMax ); - -} // chebyshevSetup + lambdaMax = powerMethod(params, + regionMats, + revisedRowMap, + rowImport, + 10); + params->set("chebyshev: lambda max", lambdaMax); +} // chebyshevSetup /*! \brief The textbook Chebyshev algorithm from Ifpack2 translated into the region format */ -template +template void chebyshevIterate(RCP smootherParams, - RCP >& regX, ///< left-hand side (or solution) - const RCP > regB, ///< right-hand side (or residual) - const RCP > regionMats, ///< matrices in true region layout - const RCP > revisedRowMap, ///< revised row maps in region layout [in] (actually extracted from regionMats) - const RCP > rowImport,///< row importer in region layout [in] - bool& zeroInitGuess ///< Use a zero vector as initial guess? - ) -{ + RCP >& regX, ///< left-hand side (or solution) + const RCP > regB, ///< right-hand side (or residual) + const RCP > regionMats, ///< matrices in true region layout + const RCP > revisedRowMap, ///< revised row maps in region layout [in] (actually extracted from regionMats) + const RCP > rowImport, ///< row importer in region layout [in] + bool& zeroInitGuess ///< Use a zero vector as initial guess? +) { #include "Xpetra_UseShortNames.hpp" using Teuchos::TimeMonitor; RCP tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Region Chebyshev Iterate"))); // Extract input data from parameter list - const int maxIter = smootherParams->get("smoother: sweeps"); - const Scalar eigRatio = smootherParams->get("smoother: Chebyshev eigRatio"); - const Scalar lambdaMax = smootherParams->get("chebyshev: lambda max"); + const int maxIter = smootherParams->get("smoother: sweeps"); + const Scalar eigRatio = smootherParams->get("smoother: Chebyshev eigRatio"); + const Scalar lambdaMax = smootherParams->get("chebyshev: lambda max"); const Scalar boostFactor = smootherParams->get("smoother: Chebyshev boost factor"); - RCP diag_inv = smootherParams->get >("smoothers: inverse diagonal"); + RCP diag_inv = smootherParams->get >("smoothers: inverse diagonal"); // Define some constants for convenience const Scalar SC_ZERO = Teuchos::ScalarTraits::zero(); - const Scalar SC_ONE = Teuchos::ScalarTraits::one(); - const Scalar SC_TWO = Teuchos::as (2); + const Scalar SC_ONE = Teuchos::ScalarTraits::one(); + const Scalar SC_TWO = Teuchos::as(2); // Estimation of eigenvalue interval of interest: [alpha, beta] - const Scalar alpha = lambdaMax / eigRatio; // lower bound (estimate via given max-to-min ratio) - const Scalar beta = boostFactor * lambdaMax; // upper bound (estimated via boost factor) + const Scalar alpha = lambdaMax / eigRatio; // lower bound (estimate via given max-to-min ratio) + const Scalar beta = boostFactor * lambdaMax; // upper bound (estimated via boost factor) // Algorithmic constants const Scalar delta = SC_TWO / (beta - alpha); const Scalar theta = (beta + alpha) / SC_TWO; - const Scalar s1 = theta * delta; + const Scalar s1 = theta * delta; // Algorithmic parameters Scalar dtemp1 = SC_ZERO; Scalar dtemp2 = SC_ZERO; Scalar rhokp1 = SC_ZERO; - Scalar rhok = SC_ONE / s1; + Scalar rhok = SC_ONE / s1; RCP regRes = VectorFactory::Build(revisedRowMap, true); @@ -427,17 +405,16 @@ void chebyshevIterate(RCP smootherParams, // First Iteration if (zeroInitGuess) { - regZ->elementWiseMultiply(SC_ONE, *diag_inv, *regB, SC_ZERO); // Z = D_inv * b - regP->update(SC_ONE/theta, *regZ, SC_ZERO); // P = 1/theta Z - regX->update(SC_ONE, *regP, SC_ZERO); // X = 0 + P - } - else { + regZ->elementWiseMultiply(SC_ONE, *diag_inv, *regB, SC_ZERO); // Z = D_inv * b + regP->update(SC_ONE / theta, *regZ, SC_ZERO); // P = 1/theta Z + regX->update(SC_ONE, *regP, SC_ZERO); // X = 0 + P + } else { // Compute residual vector computeResidual(regRes, regX, regB, regionMats, *smootherParams); - regZ->elementWiseMultiply(SC_ONE, *diag_inv, *regRes, SC_ZERO); // z = D_inv * R, that is, D \ R. - regP->update(SC_ONE/theta, *regZ, SC_ZERO); // P = 1/theta Z - regX->update(SC_ONE, *regP, SC_ONE); // X = X + P + regZ->elementWiseMultiply(SC_ONE, *diag_inv, *regRes, SC_ZERO); // z = D_inv * R, that is, D \ R. + regP->update(SC_ONE / theta, *regZ, SC_ZERO); // P = 1/theta Z + regX->update(SC_ONE, *regP, SC_ONE); // X = X + P } // The rest of the iterations @@ -451,9 +428,9 @@ void chebyshevIterate(RCP smootherParams, rhokp1 = SC_ONE / (SC_TWO * s1 - rhok); dtemp1 = rhokp1 * rhok; dtemp2 = SC_TWO * rhokp1 * delta; - rhok = rhokp1; - regP->update(dtemp2, *regZ, dtemp1);// P = dtemp2*Z + dtemp1*P - regX->update(SC_ONE, *regP, SC_ONE);// X = X + P + rhok = rhokp1; + regP->update(dtemp2, *regZ, dtemp1); // P = dtemp2*Z + dtemp1*P + regX->update(SC_ONE, *regP, SC_ONE); // X = X + P // If we compute the residual here, we could either do R = B - // A*X, or R = R - alpha*A*P. Since we choose the former, we @@ -461,15 +438,14 @@ void chebyshevIterate(RCP smootherParams, } zeroInitGuess = false; -} // chebyshevIterate +} // chebyshevIterate - -template +template void smootherSetup(RCP params, const RCP > revisedRowMap, const RCP > regionMats, const RCP > regionInterfaceScaling, - const RCP > rowImport) ///< row importer in region layout [in] + const RCP > rowImport) ///< row importer in region layout [in] { using Teuchos::TimeMonitor; RCP tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Region Smoother: 1 - Setup"))); @@ -478,42 +454,39 @@ void smootherSetup(RCP params, std::map smootherTypes = getListOfValidSmootherTypes(); - switch(smootherTypes[type]) { - case 0: // None - { - break; - } - case 1: // Jacobi - case 2: // Gauss-Seidel - case 3: // Symmetric Gauss-Seidel - { - computeInverseDiagonal(params, revisedRowMap, regionMats, rowImport); - break; - } - case 4: // Chebyshev - { - computeInverseDiagonal(params, revisedRowMap, regionMats, rowImport); - chebyshevSetup(params, regionMats, regionInterfaceScaling, revisedRowMap, rowImport); - break; - } - default: - { - std::cout << "Unknown smoother: " << type << "!" << std::endl; - throw; - } + switch (smootherTypes[type]) { + case 0: // None + { + break; + } + case 1: // Jacobi + case 2: // Gauss-Seidel + case 3: // Symmetric Gauss-Seidel + { + computeInverseDiagonal(params, revisedRowMap, regionMats, rowImport); + break; + } + case 4: // Chebyshev + { + computeInverseDiagonal(params, revisedRowMap, regionMats, rowImport); + chebyshevSetup(params, regionMats, regionInterfaceScaling, revisedRowMap, rowImport); + break; + } + default: { + std::cout << "Unknown smoother: " << type << "!" << std::endl; + throw; + } } } - -template +template void smootherApply(RCP params, RCP >& regX, const RCP > regB, const RCP > regionMats, const RCP > revisedRowMap, const RCP > rowImport, - bool& zeroInitGuess) -{ + bool& zeroInitGuess) { using Teuchos::TimeMonitor; RCP tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Region Smoother: 2 - Apply"))); @@ -521,38 +494,37 @@ void smootherApply(RCP params, std::map smootherTypes = getListOfValidSmootherTypes(); - switch(smootherTypes[type]) { - case 0: // None - { - break; - } - case 1: // Jacobi - { - jacobiIterate(params, regX, regB, regionMats, revisedRowMap, rowImport, zeroInitGuess); - break; - } - case 2: // Gauss-Seidel - { - GSIterate(params, regX, regB, regionMats, revisedRowMap, rowImport, zeroInitGuess); - break; - } - case 3: // Symmetric Gauss-Seidel - { - GSIterate(params, regX, regB, regionMats, revisedRowMap, rowImport, zeroInitGuess, true); - break; - } - case 4: // Chebyshev - chebyshevIterate(params, regX, regB, regionMats, revisedRowMap, rowImport, zeroInitGuess); - { - break; - } - default: - { - std::cout << "Unknown smoother: " << type << "!" << std::endl; - throw; - } + switch (smootherTypes[type]) { + case 0: // None + { + break; + } + case 1: // Jacobi + { + jacobiIterate(params, regX, regB, regionMats, revisedRowMap, rowImport, zeroInitGuess); + break; + } + case 2: // Gauss-Seidel + { + GSIterate(params, regX, regB, regionMats, revisedRowMap, rowImport, zeroInitGuess); + break; + } + case 3: // Symmetric Gauss-Seidel + { + GSIterate(params, regX, regB, regionMats, revisedRowMap, rowImport, zeroInitGuess, true); + break; + } + case 4: // Chebyshev + chebyshevIterate(params, regX, regB, regionMats, revisedRowMap, rowImport, zeroInitGuess); + { + break; + } + default: { + std::cout << "Unknown smoother: " << type << "!" << std::endl; + throw; + } } -} // smootherApply +} // smootherApply -#endif // MUELU_SETUPREGIONSMOOTHERS_DEF_HPP +#endif // MUELU_SETUPREGIONSMOOTHERS_DEF_HPP diff --git a/packages/muelu/research/regionMG/src/SetupRegionUtilities.hpp b/packages/muelu/research/regionMG/src/SetupRegionUtilities.hpp index 5c650dfd7e8c..f76d1ed140f8 100644 --- a/packages/muelu/research/regionMG/src/SetupRegionUtilities.hpp +++ b/packages/muelu/research/regionMG/src/SetupRegionUtilities.hpp @@ -47,12 +47,11 @@ #include "Teuchos_Array.hpp" -template +template void findInterface(const int numDimensions, Teuchos::Array nodesPerDim, const Teuchos::Array boundaryConditions, Teuchos::Array& interfacesDimensions, Teuchos::Array& interfacesLIDs) { - using LO = LocalOrdinal; std::cout << "nodesPerDim: " << nodesPerDim << std::endl; @@ -64,162 +63,153 @@ void findInterface(const int numDimensions, Teuchos::Array nodesPe // Step 1: determine what edges/faces are needed based on BC. LO numInterfaces = 0, numTotalLIDs = 0, numInterfaceLIDs; interfacesDimensions.resize(18, 1); - for(LO dim = 0; dim < 3; ++dim) { + for (LO dim = 0; dim < 3; ++dim) { // Check for nodes and no boundary conditions in this direction, otherwise skip it. - if((nodesPerDim[dim] == 1) || (boundaryConditions[2*dim] + boundaryConditions[2*dim + 1] == 2)) { + if ((nodesPerDim[dim] == 1) || (boundaryConditions[2 * dim] + boundaryConditions[2 * dim + 1] == 2)) { continue; } // Since we are not skipping this direction // we at least need to store data for one surface numInterfaceLIDs = 1; - for(LO dimIdx = 0; dimIdx < 3; ++dimIdx) { - interfacesDimensions[numInterfaces*3 + dimIdx] = (dimIdx == dim ? 1 : nodesPerDim[dimIdx]); + for (LO dimIdx = 0; dimIdx < 3; ++dimIdx) { + interfacesDimensions[numInterfaces * 3 + dimIdx] = (dimIdx == dim ? 1 : nodesPerDim[dimIdx]); numInterfaceLIDs *= (dimIdx == dim ? 1 : nodesPerDim[dimIdx]); } numTotalLIDs += numInterfaceLIDs; ++numInterfaces; // If there are no BC we need to store the surface twice. - if(boundaryConditions[2*dim] + boundaryConditions[2*dim + 1] == 0) { - for(LO dimIdx = 0; dimIdx < 3; ++dimIdx) { - interfacesDimensions[numInterfaces*3 + dimIdx] - = interfacesDimensions[(numInterfaces - 1)*3 + dimIdx]; + if (boundaryConditions[2 * dim] + boundaryConditions[2 * dim + 1] == 0) { + for (LO dimIdx = 0; dimIdx < 3; ++dimIdx) { + interfacesDimensions[numInterfaces * 3 + dimIdx] = interfacesDimensions[(numInterfaces - 1) * 3 + dimIdx]; } numTotalLIDs += numInterfaceLIDs; ++numInterfaces; } } - interfacesDimensions.resize(3*numInterfaces); + interfacesDimensions.resize(3 * numInterfaces); interfacesLIDs.resize(numTotalLIDs, -1); // Step 2 lazy implementation of all geometrical cases. LO nodeOffset = 0; - if(numDimensions == 2) { + if (numDimensions == 2) { // left interface - if(boundaryConditions[0] == 0) { - for(LO nodeIdx = 0; nodeIdx < nodesPerDim[1]; ++nodeIdx) { - interfacesLIDs[nodeOffset + nodeIdx] = nodeIdx*nodesPerDim[0]; + if (boundaryConditions[0] == 0) { + for (LO nodeIdx = 0; nodeIdx < nodesPerDim[1]; ++nodeIdx) { + interfacesLIDs[nodeOffset + nodeIdx] = nodeIdx * nodesPerDim[0]; } nodeOffset += nodesPerDim[1]; } // right interface - if(boundaryConditions[1] == 0) { - for(LO nodeIdx = 0; nodeIdx < nodesPerDim[1]; ++nodeIdx) { - interfacesLIDs[nodeOffset + nodeIdx] = (nodeIdx + 1)*nodesPerDim[0] - 1; + if (boundaryConditions[1] == 0) { + for (LO nodeIdx = 0; nodeIdx < nodesPerDim[1]; ++nodeIdx) { + interfacesLIDs[nodeOffset + nodeIdx] = (nodeIdx + 1) * nodesPerDim[0] - 1; } nodeOffset += nodesPerDim[1]; } // front interface - if(boundaryConditions[2] == 0) { - for(LO nodeIdx = 0; nodeIdx < nodesPerDim[0]; ++nodeIdx) { + if (boundaryConditions[2] == 0) { + for (LO nodeIdx = 0; nodeIdx < nodesPerDim[0]; ++nodeIdx) { interfacesLIDs[nodeOffset + nodeIdx] = nodeIdx; } nodeOffset += nodesPerDim[0]; } // back interface - if(boundaryConditions[3] == 0) { - for(LO nodeIdx = 0; nodeIdx < nodesPerDim[0]; ++nodeIdx) { - interfacesLIDs[nodeOffset + nodeIdx] = (nodesPerDim[1] - 1)*nodesPerDim[0] + nodeIdx; + if (boundaryConditions[3] == 0) { + for (LO nodeIdx = 0; nodeIdx < nodesPerDim[0]; ++nodeIdx) { + interfacesLIDs[nodeOffset + nodeIdx] = (nodesPerDim[1] - 1) * nodesPerDim[0] + nodeIdx; } nodeOffset += nodesPerDim[0]; } } - if(numDimensions == 3) { + if (numDimensions == 3) { // left interface - if(boundaryConditions[0] == 0) { - for(LO k = 0; k < nodesPerDim[2]; ++k) { - for(LO j = 0; j < nodesPerDim[1]; ++j) { - interfacesLIDs[nodeOffset + k*nodesPerDim[1] + j] - = k*nodesPerDim[1]*nodesPerDim[0] + j*nodesPerDim[0]; + if (boundaryConditions[0] == 0) { + for (LO k = 0; k < nodesPerDim[2]; ++k) { + for (LO j = 0; j < nodesPerDim[1]; ++j) { + interfacesLIDs[nodeOffset + k * nodesPerDim[1] + j] = k * nodesPerDim[1] * nodesPerDim[0] + j * nodesPerDim[0]; } } - nodeOffset += nodesPerDim[2]*nodesPerDim[1]; + nodeOffset += nodesPerDim[2] * nodesPerDim[1]; } // right interface - if(boundaryConditions[1] == 0) { - for(LO k = 0; k < nodesPerDim[2]; ++k) { - for(LO j = 0; j < nodesPerDim[1]; ++j) { - interfacesLIDs[nodeOffset + k*nodesPerDim[1] + j] - = k*nodesPerDim[1]*nodesPerDim[0] + (j + 1)*nodesPerDim[0] - 1; + if (boundaryConditions[1] == 0) { + for (LO k = 0; k < nodesPerDim[2]; ++k) { + for (LO j = 0; j < nodesPerDim[1]; ++j) { + interfacesLIDs[nodeOffset + k * nodesPerDim[1] + j] = k * nodesPerDim[1] * nodesPerDim[0] + (j + 1) * nodesPerDim[0] - 1; } } - nodeOffset += nodesPerDim[2]*nodesPerDim[1]; + nodeOffset += nodesPerDim[2] * nodesPerDim[1]; } // front interface - if(boundaryConditions[2] == 0) { - for(LO k = 0; k < nodesPerDim[2]; ++k) { - for(LO i = 0; i < nodesPerDim[0]; ++i) { - interfacesLIDs[nodeOffset + k*nodesPerDim[0] + i] - = k*nodesPerDim[1]*nodesPerDim[0] + i; + if (boundaryConditions[2] == 0) { + for (LO k = 0; k < nodesPerDim[2]; ++k) { + for (LO i = 0; i < nodesPerDim[0]; ++i) { + interfacesLIDs[nodeOffset + k * nodesPerDim[0] + i] = k * nodesPerDim[1] * nodesPerDim[0] + i; } } - nodeOffset += nodesPerDim[2]*nodesPerDim[0]; + nodeOffset += nodesPerDim[2] * nodesPerDim[0]; } // back interface - if(boundaryConditions[3] == 0) { - for(LO k = 0; k < nodesPerDim[2]; ++k) { - for(LO i = 0; i < nodesPerDim[0]; ++i) { - interfacesLIDs[nodeOffset + k*nodesPerDim[0] + i] - = k*nodesPerDim[1]*nodesPerDim[0] + (nodesPerDim[1] - 1)*nodesPerDim[0] + i; + if (boundaryConditions[3] == 0) { + for (LO k = 0; k < nodesPerDim[2]; ++k) { + for (LO i = 0; i < nodesPerDim[0]; ++i) { + interfacesLIDs[nodeOffset + k * nodesPerDim[0] + i] = k * nodesPerDim[1] * nodesPerDim[0] + (nodesPerDim[1] - 1) * nodesPerDim[0] + i; } } - nodeOffset += nodesPerDim[2]*nodesPerDim[0]; + nodeOffset += nodesPerDim[2] * nodesPerDim[0]; } // bottom interface - if(boundaryConditions[4] == 0) { - for(LO j = 0; j < nodesPerDim[1]; ++j) { - for(LO i = 0; i < nodesPerDim[0]; ++i) { - interfacesLIDs[nodeOffset + j*nodesPerDim[0] + i] - = j*nodesPerDim[0] + i; + if (boundaryConditions[4] == 0) { + for (LO j = 0; j < nodesPerDim[1]; ++j) { + for (LO i = 0; i < nodesPerDim[0]; ++i) { + interfacesLIDs[nodeOffset + j * nodesPerDim[0] + i] = j * nodesPerDim[0] + i; } } - nodeOffset += nodesPerDim[1]*nodesPerDim[0]; + nodeOffset += nodesPerDim[1] * nodesPerDim[0]; } - //top interface - if(boundaryConditions[5] == 0) { - for(LO j = 0; j < nodesPerDim[1]; ++j) { - for(LO i = 0; i < nodesPerDim[0]; ++i) { - interfacesLIDs[nodeOffset + j*nodesPerDim[0] + i] - = (nodesPerDim[2] - 1)*nodesPerDim[1]*nodesPerDim[0] + j*nodesPerDim[0] + i; + // top interface + if (boundaryConditions[5] == 0) { + for (LO j = 0; j < nodesPerDim[1]; ++j) { + for (LO i = 0; i < nodesPerDim[0]; ++i) { + interfacesLIDs[nodeOffset + j * nodesPerDim[0] + i] = (nodesPerDim[2] - 1) * nodesPerDim[1] * nodesPerDim[0] + j * nodesPerDim[0] + i; } } - nodeOffset += nodesPerDim[1]*nodesPerDim[0]; + nodeOffset += nodesPerDim[1] * nodesPerDim[0]; } } -} // findInterface - +} // findInterface -template +template void createRegionData(const int numDimensions, const bool useUnstructured, const int numDofsPerNode, const Teuchos::ArrayView gNodesPerDim, - const Teuchos::ArrayView lNodesPerDim, + const Teuchos::ArrayView lNodesPerDim, const Teuchos::ArrayView procsPerDim, const Teuchos::RCP > nodeMap, const Teuchos::RCP > dofMap, int& maxRegPerGID, LocalOrdinal& numLocalRegionNodes, Teuchos::Array& boundaryConditions, - Teuchos::Array& sendGIDs, ///< GIDs of nodes + Teuchos::Array& sendGIDs, ///< GIDs of nodes Teuchos::Array& sendPIDs, int& numInterfaces, - Teuchos::Array& rNodesPerDim, + Teuchos::Array& rNodesPerDim, Teuchos::Array& quasiRegionGIDs, Teuchos::Array& quasiRegionCoordGIDs, - Teuchos::Array& compositeToRegionLIDs, + Teuchos::Array& compositeToRegionLIDs, Teuchos::Array& interfaceGIDs, - Teuchos::Array& interfaceLIDsData) { - + Teuchos::Array& interfaceLIDsData) { using GO = GlobalOrdinal; using LO = LocalOrdinal; @@ -228,40 +218,52 @@ void createRegionData(const int numDimensions, Teuchos::Array endIndices(3); const GO startGID = dofMap->getMinGlobalIndex() / numDofsPerNode; { - startIndices[2] = startGID / (gNodesPerDim[1]*gNodesPerDim[0]); - const GO rem = startGID % (gNodesPerDim[1]*gNodesPerDim[0]); + startIndices[2] = startGID / (gNodesPerDim[1] * gNodesPerDim[0]); + const GO rem = startGID % (gNodesPerDim[1] * gNodesPerDim[0]); startIndices[1] = rem / gNodesPerDim[0]; startIndices[0] = rem % gNodesPerDim[0]; - endIndices[0] = startIndices[0] + lNodesPerDim[0] - 1; - endIndices[1] = startIndices[1] + lNodesPerDim[1] - 1; - endIndices[2] = startIndices[2] + lNodesPerDim[2] - 1; + endIndices[0] = startIndices[0] + lNodesPerDim[0] - 1; + endIndices[1] = startIndices[1] + lNodesPerDim[1] - 1; + endIndices[2] = startIndices[2] + lNodesPerDim[2] - 1; } int leftBC = 0, rightBC = 0, frontBC = 0, backBC = 0, bottomBC = 0, topBC = 0; - if(startIndices[0] == 0) {leftBC = 1;} - if(startIndices[1] == 0) {frontBC = 1;} - if(startIndices[2] == 0) {bottomBC = 1;} + if (startIndices[0] == 0) { + leftBC = 1; + } + if (startIndices[1] == 0) { + frontBC = 1; + } + if (startIndices[2] == 0) { + bottomBC = 1; + } - if(endIndices[0] == gNodesPerDim[0] - 1) {rightBC = 1;} - if(endIndices[1] == gNodesPerDim[1] - 1) {backBC = 1;} - if(endIndices[2] == gNodesPerDim[2] - 1) {topBC = 1;} + if (endIndices[0] == gNodesPerDim[0] - 1) { + rightBC = 1; + } + if (endIndices[1] == gNodesPerDim[1] - 1) { + backBC = 1; + } + if (endIndices[2] == gNodesPerDim[2] - 1) { + topBC = 1; + } boundaryConditions.resize(6); - boundaryConditions[0] = leftBC ; - boundaryConditions[1] = rightBC ; - boundaryConditions[2] = frontBC ; - boundaryConditions[3] = backBC ; + boundaryConditions[0] = leftBC; + boundaryConditions[1] = rightBC; + boundaryConditions[2] = frontBC; + boundaryConditions[3] = backBC; boundaryConditions[4] = bottomBC; - boundaryConditions[5] = topBC ; + boundaryConditions[5] = topBC; LO numReceive = 0, numSend = 0; - Teuchos::Array receiveGIDs; + Teuchos::Array receiveGIDs; Teuchos::Array receivePIDs; - Teuchos::Array receiveLIDs, sendLIDs, interfaceLIDs; + Teuchos::Array receiveLIDs, sendLIDs, interfaceLIDs; - if(numDimensions == 1) { + if (numDimensions == 1) { maxRegPerGID = 2; - if(leftBC == 0) { + if (leftBC == 0) { numReceive = 1; receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); @@ -270,7 +272,7 @@ void createRegionData(const int numDimensions, receiveGIDs[0] = startIndices[0] - 1; receivePIDs[0] = myRank - 1; } - if(rightBC == 0) { + if (rightBC == 0) { numSend = 1; sendGIDs.resize(numSend); sendPIDs.resize(numSend); @@ -280,10 +282,10 @@ void createRegionData(const int numDimensions, sendGIDs[0] = myRank + 1; sendLIDs[0] = lNodesPerDim[0] - 1; } - } else if(numDimensions == 2) { + } else if (numDimensions == 2) { maxRegPerGID = 4; // Received nodes - if(frontBC == 0 && leftBC == 0) { + if (frontBC == 0 && leftBC == 0) { numReceive = lNodesPerDim[0] + lNodesPerDim[1] + 1; receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); @@ -295,31 +297,31 @@ void createRegionData(const int numDimensions, receivePIDs[countIDs] = myRank - procsPerDim[0] - 1; ++countIDs; // Receive front edge nodes - for(LO i = 0; i < lNodesPerDim[0]; ++i) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { receiveGIDs[countIDs] = startGID - gNodesPerDim[0] + i; receivePIDs[countIDs] = myRank - procsPerDim[0]; ++countIDs; } // Receive left edge nodes - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - receiveGIDs[countIDs] = startGID - 1 + j*gNodesPerDim[0]; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + receiveGIDs[countIDs] = startGID - 1 + j * gNodesPerDim[0]; receivePIDs[countIDs] = myRank - 1; ++countIDs; } - if(useUnstructured) { // Set parameters for interface aggregation + if (useUnstructured) { // Set parameters for interface aggregation numInterfaces = 2; interfaceLIDs.resize((lNodesPerDim[0] + 1) + (lNodesPerDim[1] + 1)); - for(LO nodeIdx = 0; nodeIdx < lNodesPerDim[0] + 1; ++nodeIdx) { + for (LO nodeIdx = 0; nodeIdx < lNodesPerDim[0] + 1; ++nodeIdx) { interfaceLIDs[nodeIdx] = nodeIdx; } - for(LO nodeIdx = 0; nodeIdx < lNodesPerDim[1] + 1; ++nodeIdx) { - interfaceLIDs[lNodesPerDim[0] + 1 + nodeIdx] = nodeIdx*(lNodesPerDim[1] + 1); + for (LO nodeIdx = 0; nodeIdx < lNodesPerDim[1] + 1; ++nodeIdx) { + interfaceLIDs[lNodesPerDim[0] + 1 + nodeIdx] = nodeIdx * (lNodesPerDim[1] + 1); } } - } else if(frontBC == 0) { + } else if (frontBC == 0) { numReceive = lNodesPerDim[0]; receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); @@ -327,22 +329,22 @@ void createRegionData(const int numDimensions, LO countIDs = 0; // Receive front edge nodes - for(LO i = 0; i < lNodesPerDim[0]; ++i) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { receiveGIDs[countIDs] = startGID - gNodesPerDim[0] + i; receivePIDs[countIDs] = myRank - procsPerDim[0]; ++countIDs; } - if(useUnstructured) { // Set parameters for interface aggregation + if (useUnstructured) { // Set parameters for interface aggregation numInterfaces = 1; interfaceLIDs.resize(lNodesPerDim[0] + 1); - for(LO nodeIdx = 0; nodeIdx < lNodesPerDim[0] + 1; ++nodeIdx) { + for (LO nodeIdx = 0; nodeIdx < lNodesPerDim[0] + 1; ++nodeIdx) { interfaceLIDs[nodeIdx] = nodeIdx; } } - } else if(leftBC == 0) { + } else if (leftBC == 0) { numReceive = lNodesPerDim[1]; receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); @@ -350,25 +352,24 @@ void createRegionData(const int numDimensions, LO countIDs = 0; // Receive left edge nodes - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - receiveGIDs[countIDs] = startGID - 1 + j*gNodesPerDim[0]; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + receiveGIDs[countIDs] = startGID - 1 + j * gNodesPerDim[0]; receivePIDs[countIDs] = myRank - 1; ++countIDs; } - if(useUnstructured) { // Set parameters for interface aggregation + if (useUnstructured) { // Set parameters for interface aggregation numInterfaces = 1; interfaceLIDs.resize(lNodesPerDim[1] + 1); - for(LO nodeIdx = 0; nodeIdx < lNodesPerDim[1] + 1; ++nodeIdx) { - interfaceLIDs[nodeIdx] = nodeIdx*(lNodesPerDim[1] + 1); + for (LO nodeIdx = 0; nodeIdx < lNodesPerDim[1] + 1; ++nodeIdx) { + interfaceLIDs[nodeIdx] = nodeIdx * (lNodesPerDim[1] + 1); } } - } // Sent nodes - if(rightBC == 0 && backBC == 0) { + if (rightBC == 0 && backBC == 0) { numSend = lNodesPerDim[0] + lNodesPerDim[1] + 1; sendGIDs.resize(numSend); sendPIDs.resize(numSend); @@ -376,25 +377,25 @@ void createRegionData(const int numDimensions, LO countIDs = 0; // Send nodes of right edge - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - sendGIDs[countIDs] = j*gNodesPerDim[0] + startGID + lNodesPerDim[0] - 1; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + sendGIDs[countIDs] = j * gNodesPerDim[0] + startGID + lNodesPerDim[0] - 1; sendPIDs[countIDs] = myRank + 1; - sendLIDs[countIDs] = (j + 1)*lNodesPerDim[0] - 1; + sendLIDs[countIDs] = (j + 1) * lNodesPerDim[0] - 1; ++countIDs; } // Send nodes of back edge - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = i + startGID + (lNodesPerDim[1] - 1)*gNodesPerDim[0]; + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = i + startGID + (lNodesPerDim[1] - 1) * gNodesPerDim[0]; sendPIDs[countIDs] = myRank + procsPerDim[0]; - sendLIDs[countIDs] = (lNodesPerDim[1] - 1)*lNodesPerDim[0] + i; + sendLIDs[countIDs] = (lNodesPerDim[1] - 1) * lNodesPerDim[0] + i; ++countIDs; } // Send node of back-right corner - sendGIDs[countIDs] = startGID + (lNodesPerDim[1] - 1)*gNodesPerDim[0] + lNodesPerDim[0] - 1; + sendGIDs[countIDs] = startGID + (lNodesPerDim[1] - 1) * gNodesPerDim[0] + lNodesPerDim[0] - 1; sendPIDs[countIDs] = myRank + procsPerDim[0] + 1; - sendLIDs[countIDs] = lNodesPerDim[1]*lNodesPerDim[0] - 1; + sendLIDs[countIDs] = lNodesPerDim[1] * lNodesPerDim[0] - 1; ++countIDs; - } else if(backBC == 0) { + } else if (backBC == 0) { numSend = lNodesPerDim[0]; sendGIDs.resize(numSend); @@ -403,13 +404,13 @@ void createRegionData(const int numDimensions, LO countIDs = 0; // Send nodes of back edge - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = i + startGID + (lNodesPerDim[1] - 1)*gNodesPerDim[0]; + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = i + startGID + (lNodesPerDim[1] - 1) * gNodesPerDim[0]; sendPIDs[countIDs] = myRank + procsPerDim[0]; - sendLIDs[countIDs] = (lNodesPerDim[1] - 1)*lNodesPerDim[0] + i; + sendLIDs[countIDs] = (lNodesPerDim[1] - 1) * lNodesPerDim[0] + i; ++countIDs; } - } else if(rightBC == 0) { + } else if (rightBC == 0) { numSend = lNodesPerDim[1]; sendGIDs.resize(numSend); sendPIDs.resize(numSend); @@ -417,642 +418,567 @@ void createRegionData(const int numDimensions, LO countIDs = 0; // Send nodes of right edge - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - sendGIDs[countIDs] = j*gNodesPerDim[0] + startGID + lNodesPerDim[0] - 1; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + sendGIDs[countIDs] = j * gNodesPerDim[0] + startGID + lNodesPerDim[0] - 1; sendPIDs[countIDs] = myRank + 1; - sendLIDs[countIDs] = (j + 1)*lNodesPerDim[0] - 1; + sendLIDs[countIDs] = (j + 1) * lNodesPerDim[0] - 1; ++countIDs; } } - } else if(numDimensions == 3) { + } else if (numDimensions == 3) { maxRegPerGID = 8; // Received nodes - if( (bottomBC == 0) && (frontBC == 0) && (leftBC == 0) ) { - numReceive = lNodesPerDim[0]*lNodesPerDim[1] // bottom face - + lNodesPerDim[0]*(lNodesPerDim[2] + 1) // front face - + (lNodesPerDim[1] + 1)*(lNodesPerDim[2] + 1); // left face + if ((bottomBC == 0) && (frontBC == 0) && (leftBC == 0)) { + numReceive = lNodesPerDim[0] * lNodesPerDim[1] // bottom face + + lNodesPerDim[0] * (lNodesPerDim[2] + 1) // front face + + (lNodesPerDim[1] + 1) * (lNodesPerDim[2] + 1); // left face receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); receiveLIDs.resize(numReceive); LO countIDs = 0; // Receive front-left-bottom corner node - receiveGIDs[countIDs] = startGID - gNodesPerDim[0] - 1 - - gNodesPerDim[1]*gNodesPerDim[0]; - receivePIDs[countIDs] = myRank - procsPerDim[0] - 1 - - procsPerDim[1]*procsPerDim[0]; + receiveGIDs[countIDs] = startGID - gNodesPerDim[0] - 1 - gNodesPerDim[1] * gNodesPerDim[0]; + receivePIDs[countIDs] = myRank - procsPerDim[0] - 1 - procsPerDim[1] * procsPerDim[0]; ++countIDs; // Receive front-bottom edge nodes - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = startGID - gNodesPerDim[0]*gNodesPerDim[1] - - gNodesPerDim[0] + i; - receivePIDs[countIDs] = myRank - procsPerDim[0]*procsPerDim[1] - procsPerDim[0]; + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = startGID - gNodesPerDim[0] * gNodesPerDim[1] - gNodesPerDim[0] + i; + receivePIDs[countIDs] = myRank - procsPerDim[0] * procsPerDim[1] - procsPerDim[0]; ++countIDs; } // Recieve left-bottom edge nodes - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - receiveGIDs[countIDs] = startGID - gNodesPerDim[0]*gNodesPerDim[1] - - 1 + j*gNodesPerDim[0]; - receivePIDs[countIDs] = myRank - procsPerDim[0]*procsPerDim[1] - 1; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + receiveGIDs[countIDs] = startGID - gNodesPerDim[0] * gNodesPerDim[1] - 1 + j * gNodesPerDim[0]; + receivePIDs[countIDs] = myRank - procsPerDim[0] * procsPerDim[1] - 1; ++countIDs; // Recieve bottom face nodes - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = startGID - gNodesPerDim[0]*gNodesPerDim[1] - + i - + j*gNodesPerDim[0]; - receivePIDs[countIDs] = myRank - procsPerDim[0]*procsPerDim[1]; + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = startGID - gNodesPerDim[0] * gNodesPerDim[1] + i + j * gNodesPerDim[0]; + receivePIDs[countIDs] = myRank - procsPerDim[0] * procsPerDim[1]; ++countIDs; } } // Receive front-left edge nodes - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - receiveGIDs[countIDs] = startGID - gNodesPerDim[0] - - 1 + k*gNodesPerDim[0]*gNodesPerDim[1]; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + receiveGIDs[countIDs] = startGID - gNodesPerDim[0] - 1 + k * gNodesPerDim[0] * gNodesPerDim[1]; receivePIDs[countIDs] = myRank - procsPerDim[0] - 1; ++countIDs; // Receive front face nodes - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = startGID - gNodesPerDim[0] + i - + k*(gNodesPerDim[1]*gNodesPerDim[0]); + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = startGID - gNodesPerDim[0] + i + k * (gNodesPerDim[1] * gNodesPerDim[0]); receivePIDs[countIDs] = myRank - procsPerDim[0]; ++countIDs; } // Receive left face nodes - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - receiveGIDs[countIDs] = startGID - 1 - + j*gNodesPerDim[0] - + k*(gNodesPerDim[1]*gNodesPerDim[0]); + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + receiveGIDs[countIDs] = startGID - 1 + j * gNodesPerDim[0] + k * (gNodesPerDim[1] * gNodesPerDim[0]); receivePIDs[countIDs] = myRank - 1; ++countIDs; } } - if(useUnstructured) { // Set parameters for interface aggregation + if (useUnstructured) { // Set parameters for interface aggregation numInterfaces = 3; - interfaceLIDs.resize((lNodesPerDim[0] + 1)*(lNodesPerDim[1] + 1) - + (lNodesPerDim[0] + 1)*(lNodesPerDim[2] + 1) - + (lNodesPerDim[1] + 1)*(lNodesPerDim[2] + 1)); + interfaceLIDs.resize((lNodesPerDim[0] + 1) * (lNodesPerDim[1] + 1) + (lNodesPerDim[0] + 1) * (lNodesPerDim[2] + 1) + (lNodesPerDim[1] + 1) * (lNodesPerDim[2] + 1)); LO nodeOffset = 0, nodeIdx, nodeLID; // Bottom face - for(nodeIdx = 0; nodeIdx < (lNodesPerDim[0] + 1)*(lNodesPerDim[1] + 1); ++nodeIdx) { + for (nodeIdx = 0; nodeIdx < (lNodesPerDim[0] + 1) * (lNodesPerDim[1] + 1); ++nodeIdx) { interfaceLIDs[nodeIdx] = nodeIdx; } // Front face - nodeOffset += (lNodesPerDim[0] + 1)*(lNodesPerDim[1] + 1); - for(LO k = 0; k < lNodesPerDim[2] + 1; ++k) { - for(LO i = 0; i < lNodesPerDim[0] + 1; ++i) { - nodeIdx = k*(lNodesPerDim[0] + 1) + i + nodeOffset; - nodeLID = k*(lNodesPerDim[0] + 1)*(lNodesPerDim[1] + 1) + i; + nodeOffset += (lNodesPerDim[0] + 1) * (lNodesPerDim[1] + 1); + for (LO k = 0; k < lNodesPerDim[2] + 1; ++k) { + for (LO i = 0; i < lNodesPerDim[0] + 1; ++i) { + nodeIdx = k * (lNodesPerDim[0] + 1) + i + nodeOffset; + nodeLID = k * (lNodesPerDim[0] + 1) * (lNodesPerDim[1] + 1) + i; interfaceLIDs[nodeIdx] = nodeLID; } } // Left face - nodeOffset += (lNodesPerDim[0] + 1)*(lNodesPerDim[2] + 1); - for(LO k = 0; k < lNodesPerDim[2] + 1; ++k) { - for(LO j = 0; j < lNodesPerDim[1] + 1; ++j) { - nodeIdx = k*(lNodesPerDim[1] + 1) + j + nodeOffset; - nodeLID = k*(lNodesPerDim[0] + 1)*(lNodesPerDim[1] + 1) + j*(lNodesPerDim[0] + 1); + nodeOffset += (lNodesPerDim[0] + 1) * (lNodesPerDim[2] + 1); + for (LO k = 0; k < lNodesPerDim[2] + 1; ++k) { + for (LO j = 0; j < lNodesPerDim[1] + 1; ++j) { + nodeIdx = k * (lNodesPerDim[1] + 1) + j + nodeOffset; + nodeLID = k * (lNodesPerDim[0] + 1) * (lNodesPerDim[1] + 1) + j * (lNodesPerDim[0] + 1); interfaceLIDs[nodeIdx] = nodeLID; } } } - // Two faces received - } else if( (bottomBC == 0) && (frontBC == 0) ) { - numReceive = lNodesPerDim[0]*(lNodesPerDim[1] + lNodesPerDim[2] + 1); + // Two faces received + } else if ((bottomBC == 0) && (frontBC == 0)) { + numReceive = lNodesPerDim[0] * (lNodesPerDim[1] + lNodesPerDim[2] + 1); receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); receiveLIDs.resize(numReceive); LO countIDs = 0; // Receive front-bottom edge nodes - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = startGID - gNodesPerDim[0]*gNodesPerDim[1] - - gNodesPerDim[0] + i; - receivePIDs[countIDs] = myRank - procsPerDim[0]*procsPerDim[1] - procsPerDim[0]; + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = startGID - gNodesPerDim[0] * gNodesPerDim[1] - gNodesPerDim[0] + i; + receivePIDs[countIDs] = myRank - procsPerDim[0] * procsPerDim[1] - procsPerDim[0]; ++countIDs; } // Receive bottom face nodes - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = startGID - gNodesPerDim[0]*gNodesPerDim[1] - + i - + j*gNodesPerDim[0]; - receivePIDs[countIDs] = myRank - procsPerDim[0]*procsPerDim[1]; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = startGID - gNodesPerDim[0] * gNodesPerDim[1] + i + j * gNodesPerDim[0]; + receivePIDs[countIDs] = myRank - procsPerDim[0] * procsPerDim[1]; ++countIDs; } } // Receive front face nodes - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = startGID - gNodesPerDim[0] + i - + k*(gNodesPerDim[1]*gNodesPerDim[0]); + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = startGID - gNodesPerDim[0] + i + k * (gNodesPerDim[1] * gNodesPerDim[0]); receivePIDs[countIDs] = myRank - procsPerDim[0]; ++countIDs; } } - if(useUnstructured) { // Set parameters for interface aggregation + if (useUnstructured) { // Set parameters for interface aggregation numInterfaces = 2; - interfaceLIDs.resize(lNodesPerDim[0]*(lNodesPerDim[1] + 1) - + lNodesPerDim[0]*(lNodesPerDim[2] + 1)); + interfaceLIDs.resize(lNodesPerDim[0] * (lNodesPerDim[1] + 1) + lNodesPerDim[0] * (lNodesPerDim[2] + 1)); LO nodeOffset = 0, nodeIdx, nodeLID; // Bottom face - for(nodeIdx = 0; nodeIdx < lNodesPerDim[0]*(lNodesPerDim[1] + 1); ++nodeIdx) { + for (nodeIdx = 0; nodeIdx < lNodesPerDim[0] * (lNodesPerDim[1] + 1); ++nodeIdx) { interfaceLIDs[nodeIdx] = nodeIdx; } // Front face - nodeOffset += lNodesPerDim[0]*(lNodesPerDim[1] + 1); - for(LO k = 0; k < lNodesPerDim[2] + 1; ++k) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - nodeIdx = k*lNodesPerDim[0] + i + nodeOffset; - nodeLID = k*lNodesPerDim[0]*(lNodesPerDim[1] + 1) + i; + nodeOffset += lNodesPerDim[0] * (lNodesPerDim[1] + 1); + for (LO k = 0; k < lNodesPerDim[2] + 1; ++k) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + nodeIdx = k * lNodesPerDim[0] + i + nodeOffset; + nodeLID = k * lNodesPerDim[0] * (lNodesPerDim[1] + 1) + i; interfaceLIDs[nodeIdx] = nodeLID; } } } - } else if( (bottomBC == 0) && (leftBC == 0) ) { - numReceive = lNodesPerDim[1]*(lNodesPerDim[0] + lNodesPerDim[2] + 1); + } else if ((bottomBC == 0) && (leftBC == 0)) { + numReceive = lNodesPerDim[1] * (lNodesPerDim[0] + lNodesPerDim[2] + 1); receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); receiveLIDs.resize(numReceive); LO countIDs = 0; // Receive left-bottom edge nodes - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - receiveGIDs[countIDs] = j*gNodesPerDim[0] - + startGID - gNodesPerDim[1]*gNodesPerDim[0] - 1; - receivePIDs[countIDs] = myRank - procsPerDim[1]*procsPerDim[0] - 1; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + receiveGIDs[countIDs] = j * gNodesPerDim[0] + startGID - gNodesPerDim[1] * gNodesPerDim[0] - 1; + receivePIDs[countIDs] = myRank - procsPerDim[1] * procsPerDim[0] - 1; ++countIDs; // Receive bottom face nodes - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = j*gNodesPerDim[0] + i - + startGID - gNodesPerDim[1]*gNodesPerDim[0]; - receivePIDs[countIDs] = myRank - procsPerDim[1]*procsPerDim[0]; + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = j * gNodesPerDim[0] + i + startGID - gNodesPerDim[1] * gNodesPerDim[0]; + receivePIDs[countIDs] = myRank - procsPerDim[1] * procsPerDim[0]; ++countIDs; } } // Receive left face nodes - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - receiveGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] + j*gNodesPerDim[0] - + startGID - 1; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + receiveGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + j * gNodesPerDim[0] + startGID - 1; receivePIDs[countIDs] = myRank - 1; ++countIDs; } } - if(useUnstructured) { // Set parameters for interface aggregation + if (useUnstructured) { // Set parameters for interface aggregation numInterfaces = 2; - interfaceLIDs.resize((lNodesPerDim[0] + 1)*lNodesPerDim[1] - + lNodesPerDim[1]*(lNodesPerDim[2] + 1)); + interfaceLIDs.resize((lNodesPerDim[0] + 1) * lNodesPerDim[1] + lNodesPerDim[1] * (lNodesPerDim[2] + 1)); LO nodeOffset = 0, nodeIdx, nodeLID; // Bottom face - for(nodeIdx = 0; nodeIdx < (lNodesPerDim[0] + 1)*lNodesPerDim[1]; ++nodeIdx) { + for (nodeIdx = 0; nodeIdx < (lNodesPerDim[0] + 1) * lNodesPerDim[1]; ++nodeIdx) { interfaceLIDs[nodeIdx] = nodeIdx; } // Left face - nodeOffset += (lNodesPerDim[0] + 1)*lNodesPerDim[1]; - for(LO k = 0; k < lNodesPerDim[2] + 1; ++k) { - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - nodeIdx = k*lNodesPerDim[1] + j + nodeOffset; - nodeLID = k*(lNodesPerDim[0] + 1)*lNodesPerDim[1] + j*(lNodesPerDim[0] + 1); + nodeOffset += (lNodesPerDim[0] + 1) * lNodesPerDim[1]; + for (LO k = 0; k < lNodesPerDim[2] + 1; ++k) { + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + nodeIdx = k * lNodesPerDim[1] + j + nodeOffset; + nodeLID = k * (lNodesPerDim[0] + 1) * lNodesPerDim[1] + j * (lNodesPerDim[0] + 1); interfaceLIDs[nodeIdx] = nodeLID; } } } - } else if( (frontBC == 0) && (leftBC == 0) ) { - numReceive = lNodesPerDim[2]*(lNodesPerDim[0] + lNodesPerDim[1] + 1); + } else if ((frontBC == 0) && (leftBC == 0)) { + numReceive = lNodesPerDim[2] * (lNodesPerDim[0] + lNodesPerDim[1] + 1); receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); receiveLIDs.resize(numReceive); LO countIDs = 0; // Receive front-left edge nodes - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - receiveGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] - + startGID - gNodesPerDim[0] - 1; - receivePIDs[countIDs] = myRank - procsPerDim[0] - 1; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + receiveGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + startGID - gNodesPerDim[0] - 1; + receivePIDs[countIDs] = myRank - procsPerDim[0] - 1; ++countIDs; // Receive front face nodes - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] + i - + startGID - gNodesPerDim[0]; + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + i + startGID - gNodesPerDim[0]; receivePIDs[countIDs] = myRank - procsPerDim[0]; ++countIDs; } // Receive left face nodes - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - receiveGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] + j*gNodesPerDim[0] - + startGID - 1; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + receiveGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + j * gNodesPerDim[0] + startGID - 1; receivePIDs[countIDs] = myRank - 1; ++countIDs; } } - if(useUnstructured) { // Set parameters for interface aggregation + if (useUnstructured) { // Set parameters for interface aggregation numInterfaces = 2; - interfaceLIDs.resize((lNodesPerDim[0] + 1)*lNodesPerDim[2] - + (lNodesPerDim[1] + 1)*lNodesPerDim[2]); + interfaceLIDs.resize((lNodesPerDim[0] + 1) * lNodesPerDim[2] + (lNodesPerDim[1] + 1) * lNodesPerDim[2]); LO nodeOffset = 0, nodeIdx, nodeLID; // Front face - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO i = 0; i < lNodesPerDim[0] + 1; ++i) { - nodeIdx = k*(lNodesPerDim[0] + 1) + i; - nodeLID = k*(lNodesPerDim[0] + 1)*(lNodesPerDim[1] + 1) + i; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO i = 0; i < lNodesPerDim[0] + 1; ++i) { + nodeIdx = k * (lNodesPerDim[0] + 1) + i; + nodeLID = k * (lNodesPerDim[0] + 1) * (lNodesPerDim[1] + 1) + i; interfaceLIDs[nodeIdx] = nodeLID; } } // Left face - nodeOffset += (lNodesPerDim[0] + 1)*lNodesPerDim[2]; - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO j = 0; j < lNodesPerDim[1] + 1; ++j) { - nodeIdx = k*(lNodesPerDim[1] + 1) + j + nodeOffset; - nodeLID = k*(lNodesPerDim[0] + 1)*(lNodesPerDim[1] + 1) + j*(lNodesPerDim[0] + 1); + nodeOffset += (lNodesPerDim[0] + 1) * lNodesPerDim[2]; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO j = 0; j < lNodesPerDim[1] + 1; ++j) { + nodeIdx = k * (lNodesPerDim[1] + 1) + j + nodeOffset; + nodeLID = k * (lNodesPerDim[0] + 1) * (lNodesPerDim[1] + 1) + j * (lNodesPerDim[0] + 1); interfaceLIDs[nodeIdx] = nodeLID; } } } - // Single face received - } else if(bottomBC == 0) { - numReceive = lNodesPerDim[0]*lNodesPerDim[1]; + // Single face received + } else if (bottomBC == 0) { + numReceive = lNodesPerDim[0] * lNodesPerDim[1]; receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); receiveLIDs.resize(numReceive); LO countIDs = 0; // Receive bottom face nodes - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = j*gNodesPerDim[0] + i - + startGID - gNodesPerDim[1]*gNodesPerDim[0]; - receivePIDs[countIDs] = myRank - procsPerDim[1]*procsPerDim[0]; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = j * gNodesPerDim[0] + i + startGID - gNodesPerDim[1] * gNodesPerDim[0]; + receivePIDs[countIDs] = myRank - procsPerDim[1] * procsPerDim[0]; ++countIDs; } } - } else if(frontBC == 0) { - numReceive = lNodesPerDim[0]*lNodesPerDim[2]; + } else if (frontBC == 0) { + numReceive = lNodesPerDim[0] * lNodesPerDim[2]; receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); receiveLIDs.resize(numReceive); LO countIDs = 0; // Receive front face nodes - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - receiveGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] + i - + startGID - gNodesPerDim[0]; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + receiveGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + i + startGID - gNodesPerDim[0]; receivePIDs[countIDs] = myRank - procsPerDim[0]; ++countIDs; } } - } else if(leftBC == 0) { - numReceive = lNodesPerDim[1]*lNodesPerDim[2]; + } else if (leftBC == 0) { + numReceive = lNodesPerDim[1] * lNodesPerDim[2]; receiveGIDs.resize(numReceive); receivePIDs.resize(numReceive); receiveLIDs.resize(numReceive); LO countIDs = 0; // Recive left face nodes - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - receiveGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] - + j*gNodesPerDim[0] + startGID - 1; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + receiveGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + j * gNodesPerDim[0] + startGID - 1; receivePIDs[countIDs] = myRank - 1; ++countIDs; } } - } // Sent nodes - if( (topBC == 0) && (backBC == 0) && (rightBC == 0) ) { - numSend = (lNodesPerDim[0])*(lNodesPerDim[1]) - + (lNodesPerDim[0])*(lNodesPerDim[2]) - + (lNodesPerDim[1])*(lNodesPerDim[2]) - + lNodesPerDim[0] - + lNodesPerDim[1] - + lNodesPerDim[2] - + 1; + if ((topBC == 0) && (backBC == 0) && (rightBC == 0)) { + numSend = (lNodesPerDim[0]) * (lNodesPerDim[1]) + (lNodesPerDim[0]) * (lNodesPerDim[2]) + (lNodesPerDim[1]) * (lNodesPerDim[2]) + lNodesPerDim[0] + lNodesPerDim[1] + lNodesPerDim[2] + 1; sendGIDs.resize(numSend); sendPIDs.resize(numSend); sendLIDs.resize(numSend); LO countIDs = 0; // Send nodes of right face - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - sendGIDs[countIDs] = k*(gNodesPerDim[1]*gNodesPerDim[0]) - + j*gNodesPerDim[0] - + startGID + lNodesPerDim[0] - 1; - sendLIDs[countIDs] = k*(lNodesPerDim[1]*lNodesPerDim[0]) - + j*lNodesPerDim[0] + lNodesPerDim[0] - 1; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + sendGIDs[countIDs] = k * (gNodesPerDim[1] * gNodesPerDim[0]) + j * gNodesPerDim[0] + startGID + lNodesPerDim[0] - 1; + sendLIDs[countIDs] = k * (lNodesPerDim[1] * lNodesPerDim[0]) + j * lNodesPerDim[0] + lNodesPerDim[0] - 1; sendPIDs[countIDs] = myRank + 1; ++countIDs; } } // Send nodes of back face - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = k*(gNodesPerDim[1]*gNodesPerDim[0]) + i - + startGID + (lNodesPerDim[1] - 1)*gNodesPerDim[0]; - sendLIDs[countIDs] = k*(lNodesPerDim[1]*lNodesPerDim[0]) - + (lNodesPerDim[1] - 1)*lNodesPerDim[0] + i; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = k * (gNodesPerDim[1] * gNodesPerDim[0]) + i + startGID + (lNodesPerDim[1] - 1) * gNodesPerDim[0]; + sendLIDs[countIDs] = k * (lNodesPerDim[1] * lNodesPerDim[0]) + (lNodesPerDim[1] - 1) * lNodesPerDim[0] + i; sendPIDs[countIDs] = myRank + procsPerDim[0]; ++countIDs; } } // Send nodes of right-back edge - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - sendGIDs[countIDs] = k*(gNodesPerDim[1]*gNodesPerDim[0]) - + startGID + (lNodesPerDim[1] - 1)*gNodesPerDim[0] + lNodesPerDim[0] - 1; - sendLIDs[countIDs] = k*(lNodesPerDim[1]*lNodesPerDim[0]) - + lNodesPerDim[1]*lNodesPerDim[0] - 1; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + sendGIDs[countIDs] = k * (gNodesPerDim[1] * gNodesPerDim[0]) + startGID + (lNodesPerDim[1] - 1) * gNodesPerDim[0] + lNodesPerDim[0] - 1; + sendLIDs[countIDs] = k * (lNodesPerDim[1] * lNodesPerDim[0]) + lNodesPerDim[1] * lNodesPerDim[0] - 1; sendPIDs[countIDs] = myRank + procsPerDim[0] + 1; ++countIDs; } // Send nodes of top face - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = j*gNodesPerDim[0] + i - + startGID + (lNodesPerDim[2] - 1)*gNodesPerDim[1]*gNodesPerDim[0]; - sendLIDs[countIDs] = (lNodesPerDim[2] - 1)*lNodesPerDim[1]*lNodesPerDim[0] - + j*lNodesPerDim[0] + i; - sendPIDs[countIDs] = myRank + procsPerDim[1]*procsPerDim[0]; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = j * gNodesPerDim[0] + i + startGID + (lNodesPerDim[2] - 1) * gNodesPerDim[1] * gNodesPerDim[0]; + sendLIDs[countIDs] = (lNodesPerDim[2] - 1) * lNodesPerDim[1] * lNodesPerDim[0] + j * lNodesPerDim[0] + i; + sendPIDs[countIDs] = myRank + procsPerDim[1] * procsPerDim[0]; ++countIDs; } } // Send nodes of top-right edge - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - sendGIDs[countIDs] = j*gNodesPerDim[0] - + startGID + (lNodesPerDim[2] - 1)*gNodesPerDim[1]*gNodesPerDim[0] + lNodesPerDim[0] - 1; - sendLIDs[countIDs] = (lNodesPerDim[2] - 1)*(lNodesPerDim[1]*lNodesPerDim[0]) - + j*lNodesPerDim[0] + lNodesPerDim[0] - 1; - sendPIDs[countIDs] = myRank + procsPerDim[1]*procsPerDim[0] + 1; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + sendGIDs[countIDs] = j * gNodesPerDim[0] + startGID + (lNodesPerDim[2] - 1) * gNodesPerDim[1] * gNodesPerDim[0] + lNodesPerDim[0] - 1; + sendLIDs[countIDs] = (lNodesPerDim[2] - 1) * (lNodesPerDim[1] * lNodesPerDim[0]) + j * lNodesPerDim[0] + lNodesPerDim[0] - 1; + sendPIDs[countIDs] = myRank + procsPerDim[1] * procsPerDim[0] + 1; ++countIDs; } // Send nodes of top-back edge - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = i - + startGID + (lNodesPerDim[2] - 1)*gNodesPerDim[1]*gNodesPerDim[0] - + (lNodesPerDim[1] - 1)*gNodesPerDim[0]; - sendLIDs[countIDs] = (lNodesPerDim[2] - 1)*(lNodesPerDim[1]*lNodesPerDim[0]) - + (lNodesPerDim[1] - 1)*lNodesPerDim[0] + i; - sendPIDs[countIDs] = myRank + procsPerDim[1]*procsPerDim[0] + procsPerDim[0]; + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = i + startGID + (lNodesPerDim[2] - 1) * gNodesPerDim[1] * gNodesPerDim[0] + (lNodesPerDim[1] - 1) * gNodesPerDim[0]; + sendLIDs[countIDs] = (lNodesPerDim[2] - 1) * (lNodesPerDim[1] * lNodesPerDim[0]) + (lNodesPerDim[1] - 1) * lNodesPerDim[0] + i; + sendPIDs[countIDs] = myRank + procsPerDim[1] * procsPerDim[0] + procsPerDim[0]; ++countIDs; } // Send node of top-back-right corner - sendGIDs[countIDs] = startGID + (lNodesPerDim[2] - 1)*gNodesPerDim[1]*gNodesPerDim[0] - + (lNodesPerDim[1] - 1)*gNodesPerDim[0] + lNodesPerDim[0] - 1; - sendLIDs[countIDs] = lNodesPerDim[2]*lNodesPerDim[1]*lNodesPerDim[0] - 1; - sendPIDs[countIDs] = myRank + procsPerDim[1]*procsPerDim[0] + procsPerDim[0] + 1; + sendGIDs[countIDs] = startGID + (lNodesPerDim[2] - 1) * gNodesPerDim[1] * gNodesPerDim[0] + (lNodesPerDim[1] - 1) * gNodesPerDim[0] + lNodesPerDim[0] - 1; + sendLIDs[countIDs] = lNodesPerDim[2] * lNodesPerDim[1] * lNodesPerDim[0] - 1; + sendPIDs[countIDs] = myRank + procsPerDim[1] * procsPerDim[0] + procsPerDim[0] + 1; ++countIDs; - } else if( (topBC == 0) && (backBC == 0) ) { - numSend = (lNodesPerDim[0]*lNodesPerDim[2]) // back face - + (lNodesPerDim[0]*lNodesPerDim[1]) // Top face - + (lNodesPerDim[0]); // top-back edge + } else if ((topBC == 0) && (backBC == 0)) { + numSend = (lNodesPerDim[0] * lNodesPerDim[2]) // back face + + (lNodesPerDim[0] * lNodesPerDim[1]) // Top face + + (lNodesPerDim[0]); // top-back edge sendGIDs.resize(numSend); sendPIDs.resize(numSend); sendLIDs.resize(numSend); LO countIDs = 0; // Send nodes of back face - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = k*(gNodesPerDim[1]*gNodesPerDim[0]) + i - + startGID + (lNodesPerDim[1] - 1)*gNodesPerDim[0]; - sendLIDs[countIDs] = k*lNodesPerDim[1]*lNodesPerDim[0] - + (lNodesPerDim[1] - 1)*lNodesPerDim[0] + i; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = k * (gNodesPerDim[1] * gNodesPerDim[0]) + i + startGID + (lNodesPerDim[1] - 1) * gNodesPerDim[0]; + sendLIDs[countIDs] = k * lNodesPerDim[1] * lNodesPerDim[0] + (lNodesPerDim[1] - 1) * lNodesPerDim[0] + i; sendPIDs[countIDs] = myRank + procsPerDim[0]; ++countIDs; } } // Send nodes of top face - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = j*gNodesPerDim[0] + i - + startGID + (lNodesPerDim[2] - 1)*gNodesPerDim[1]*gNodesPerDim[0]; - sendLIDs[countIDs] = (lNodesPerDim[2] - 1)*lNodesPerDim[1]*lNodesPerDim[0] - + j*lNodesPerDim[0] + i; - sendPIDs[countIDs] = myRank + procsPerDim[1]*procsPerDim[0]; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = j * gNodesPerDim[0] + i + startGID + (lNodesPerDim[2] - 1) * gNodesPerDim[1] * gNodesPerDim[0]; + sendLIDs[countIDs] = (lNodesPerDim[2] - 1) * lNodesPerDim[1] * lNodesPerDim[0] + j * lNodesPerDim[0] + i; + sendPIDs[countIDs] = myRank + procsPerDim[1] * procsPerDim[0]; ++countIDs; } } // Send nodes of top-back edge - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = i - + startGID + (lNodesPerDim[2] - 1)*gNodesPerDim[1]*gNodesPerDim[0] - + (lNodesPerDim[1] - 1)*gNodesPerDim[0]; - sendLIDs[countIDs] = (lNodesPerDim[2] - 1)*lNodesPerDim[1]*lNodesPerDim[0] - + (lNodesPerDim[1] - 1)*lNodesPerDim[0] + i; - sendPIDs[countIDs] = myRank + procsPerDim[1]*procsPerDim[0] + procsPerDim[0]; + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = i + startGID + (lNodesPerDim[2] - 1) * gNodesPerDim[1] * gNodesPerDim[0] + (lNodesPerDim[1] - 1) * gNodesPerDim[0]; + sendLIDs[countIDs] = (lNodesPerDim[2] - 1) * lNodesPerDim[1] * lNodesPerDim[0] + (lNodesPerDim[1] - 1) * lNodesPerDim[0] + i; + sendPIDs[countIDs] = myRank + procsPerDim[1] * procsPerDim[0] + procsPerDim[0]; ++countIDs; } - } else if( (topBC == 0) && (rightBC == 0) ) { - numSend = (lNodesPerDim[1]*lNodesPerDim[2]) // right face - + (lNodesPerDim[0]*lNodesPerDim[1]) // Top face - + (lNodesPerDim[1]); // top-right edge + } else if ((topBC == 0) && (rightBC == 0)) { + numSend = (lNodesPerDim[1] * lNodesPerDim[2]) // right face + + (lNodesPerDim[0] * lNodesPerDim[1]) // Top face + + (lNodesPerDim[1]); // top-right edge sendGIDs.resize(numSend); sendPIDs.resize(numSend); sendLIDs.resize(numSend); LO countIDs = 0; // Send nodes of right face - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - sendGIDs[countIDs] = k*(gNodesPerDim[1]*gNodesPerDim[0]) - + j*gNodesPerDim[0] - + startGID + lNodesPerDim[0] - 1; - sendLIDs[countIDs] = k*(lNodesPerDim[1]*lNodesPerDim[0]) - + j*lNodesPerDim[0] + lNodesPerDim[0] - 1; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + sendGIDs[countIDs] = k * (gNodesPerDim[1] * gNodesPerDim[0]) + j * gNodesPerDim[0] + startGID + lNodesPerDim[0] - 1; + sendLIDs[countIDs] = k * (lNodesPerDim[1] * lNodesPerDim[0]) + j * lNodesPerDim[0] + lNodesPerDim[0] - 1; sendPIDs[countIDs] = myRank + 1; ++countIDs; } } // Send nodes of top face - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = j*gNodesPerDim[0] + i - + startGID + (lNodesPerDim[2] - 1)*gNodesPerDim[1]*gNodesPerDim[0]; - sendLIDs[countIDs] = (lNodesPerDim[2] - 1)*lNodesPerDim[1]*lNodesPerDim[0] - + j*lNodesPerDim[0] + i; - sendPIDs[countIDs] = myRank + procsPerDim[1]*procsPerDim[0]; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = j * gNodesPerDim[0] + i + startGID + (lNodesPerDim[2] - 1) * gNodesPerDim[1] * gNodesPerDim[0]; + sendLIDs[countIDs] = (lNodesPerDim[2] - 1) * lNodesPerDim[1] * lNodesPerDim[0] + j * lNodesPerDim[0] + i; + sendPIDs[countIDs] = myRank + procsPerDim[1] * procsPerDim[0]; ++countIDs; } } // Send nodes of top-right edge - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - sendGIDs[countIDs] = j*gNodesPerDim[0] - + startGID + (lNodesPerDim[2] - 1)*gNodesPerDim[1]*gNodesPerDim[0] + lNodesPerDim[0] - 1; - sendLIDs[countIDs] = (lNodesPerDim[2] - 1)*lNodesPerDim[1]*lNodesPerDim[0] - + j*lNodesPerDim[0] + lNodesPerDim[0] - 1; - sendPIDs[countIDs] = myRank + procsPerDim[1]*procsPerDim[0] + 1; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + sendGIDs[countIDs] = j * gNodesPerDim[0] + startGID + (lNodesPerDim[2] - 1) * gNodesPerDim[1] * gNodesPerDim[0] + lNodesPerDim[0] - 1; + sendLIDs[countIDs] = (lNodesPerDim[2] - 1) * lNodesPerDim[1] * lNodesPerDim[0] + j * lNodesPerDim[0] + lNodesPerDim[0] - 1; + sendPIDs[countIDs] = myRank + procsPerDim[1] * procsPerDim[0] + 1; ++countIDs; } - } else if( (backBC == 0) && (rightBC == 0) ) { - numSend = lNodesPerDim[2]*(lNodesPerDim[0] + lNodesPerDim[1] + 1); + } else if ((backBC == 0) && (rightBC == 0)) { + numSend = lNodesPerDim[2] * (lNodesPerDim[0] + lNodesPerDim[1] + 1); sendGIDs.resize(numSend); sendPIDs.resize(numSend); sendLIDs.resize(numSend); LO countIDs = 0; // Send nodes of right face - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - sendGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] + j*gNodesPerDim[0] - + startGID + lNodesPerDim[0] - 1; - sendLIDs[countIDs] = k*lNodesPerDim[1]*lNodesPerDim[0] - + j*lNodesPerDim[0] + lNodesPerDim[0] - 1; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + sendGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + j * gNodesPerDim[0] + startGID + lNodesPerDim[0] - 1; + sendLIDs[countIDs] = k * lNodesPerDim[1] * lNodesPerDim[0] + j * lNodesPerDim[0] + lNodesPerDim[0] - 1; sendPIDs[countIDs] = myRank + 1; ++countIDs; } } // Send nodes of back face - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] + i - + startGID + (lNodesPerDim[1] - 1)*gNodesPerDim[0]; - sendLIDs[countIDs] = k*lNodesPerDim[1]*lNodesPerDim[0] - + (lNodesPerDim[1] - 1)*lNodesPerDim[0] + i; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + i + startGID + (lNodesPerDim[1] - 1) * gNodesPerDim[0]; + sendLIDs[countIDs] = k * lNodesPerDim[1] * lNodesPerDim[0] + (lNodesPerDim[1] - 1) * lNodesPerDim[0] + i; sendPIDs[countIDs] = myRank + procsPerDim[0]; ++countIDs; } } // Send nodes of back-right edge - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - sendGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] - + startGID + (lNodesPerDim[1] - 1)*gNodesPerDim[0] + lNodesPerDim[0] - 1; - sendLIDs[countIDs] = k*(lNodesPerDim[1]*lNodesPerDim[0]) - + lNodesPerDim[1]*lNodesPerDim[0] - 1; - sendPIDs[countIDs] = myRank + procsPerDim[0] + 1; - ++countIDs; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + sendGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + startGID + (lNodesPerDim[1] - 1) * gNodesPerDim[0] + lNodesPerDim[0] - 1; + sendLIDs[countIDs] = k * (lNodesPerDim[1] * lNodesPerDim[0]) + lNodesPerDim[1] * lNodesPerDim[0] - 1; + sendPIDs[countIDs] = myRank + procsPerDim[0] + 1; + ++countIDs; } - } else if(topBC == 0) { - numSend = lNodesPerDim[0]*lNodesPerDim[1]; + } else if (topBC == 0) { + numSend = lNodesPerDim[0] * lNodesPerDim[1]; sendGIDs.resize(numSend); sendPIDs.resize(numSend); sendLIDs.resize(numSend); LO countIDs = 0; // Send nodes of top face - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = j*gNodesPerDim[0] + i - + startGID + (lNodesPerDim[2] - 1)*gNodesPerDim[0]*gNodesPerDim[1]; - sendLIDs[countIDs] = (lNodesPerDim[2] - 1)*lNodesPerDim[1]*lNodesPerDim[0] - + j*lNodesPerDim[0] + i; - sendPIDs[countIDs] = myRank + procsPerDim[1]*procsPerDim[0]; + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = j * gNodesPerDim[0] + i + startGID + (lNodesPerDim[2] - 1) * gNodesPerDim[0] * gNodesPerDim[1]; + sendLIDs[countIDs] = (lNodesPerDim[2] - 1) * lNodesPerDim[1] * lNodesPerDim[0] + j * lNodesPerDim[0] + i; + sendPIDs[countIDs] = myRank + procsPerDim[1] * procsPerDim[0]; ++countIDs; } } - } else if(backBC == 0) { - numSend = lNodesPerDim[0]*lNodesPerDim[2]; + } else if (backBC == 0) { + numSend = lNodesPerDim[0] * lNodesPerDim[2]; sendGIDs.resize(numSend); sendPIDs.resize(numSend); sendLIDs.resize(numSend); LO countIDs = 0; // Send nodes of back face - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO i = 0; i < lNodesPerDim[0]; ++i) { - sendGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] + i - + startGID + (lNodesPerDim[1] - 1)*gNodesPerDim[0]; - sendLIDs[countIDs] = k*lNodesPerDim[1]*lNodesPerDim[0] - + (lNodesPerDim[1] - 1)*lNodesPerDim[0] + i; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO i = 0; i < lNodesPerDim[0]; ++i) { + sendGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + i + startGID + (lNodesPerDim[1] - 1) * gNodesPerDim[0]; + sendLIDs[countIDs] = k * lNodesPerDim[1] * lNodesPerDim[0] + (lNodesPerDim[1] - 1) * lNodesPerDim[0] + i; sendPIDs[countIDs] = myRank + procsPerDim[0]; ++countIDs; } } - } else if(rightBC == 0) { - numSend = lNodesPerDim[1]*lNodesPerDim[2]; + } else if (rightBC == 0) { + numSend = lNodesPerDim[1] * lNodesPerDim[2]; sendGIDs.resize(numSend); sendPIDs.resize(numSend); sendLIDs.resize(numSend); LO countIDs = 0; // Send nodes of right face - for(LO k = 0; k < lNodesPerDim[2]; ++k) { - for(LO j = 0; j < lNodesPerDim[1]; ++j) { - sendGIDs[countIDs] = k*gNodesPerDim[1]*gNodesPerDim[0] - + j*gNodesPerDim[0] + startGID + lNodesPerDim[0] - 1; - sendLIDs[countIDs] = k*lNodesPerDim[1]*lNodesPerDim[0] - + j*lNodesPerDim[0] + lNodesPerDim[0] - 1; + for (LO k = 0; k < lNodesPerDim[2]; ++k) { + for (LO j = 0; j < lNodesPerDim[1]; ++j) { + sendGIDs[countIDs] = k * gNodesPerDim[1] * gNodesPerDim[0] + j * gNodesPerDim[0] + startGID + lNodesPerDim[0] - 1; + sendLIDs[countIDs] = k * lNodesPerDim[1] * lNodesPerDim[0] + j * lNodesPerDim[0] + lNodesPerDim[0] - 1; sendPIDs[countIDs] = myRank + 1; ++countIDs; } } - } } - // The code below could probably be separated and performed in its own function. // It pretty much tries to go from the more geometric based information generated // above to the quasiRegion map which means finding the GID of nodes-off rank on the interface. - const LO numLocalCompositeNodes = lNodesPerDim[0]*lNodesPerDim[1]*lNodesPerDim[2]; - numLocalRegionNodes = numLocalCompositeNodes + numReceive; - quasiRegionGIDs.resize(numLocalRegionNodes*numDofsPerNode); + const LO numLocalCompositeNodes = lNodesPerDim[0] * lNodesPerDim[1] * lNodesPerDim[2]; + numLocalRegionNodes = numLocalCompositeNodes + numReceive; + quasiRegionGIDs.resize(numLocalRegionNodes * numDofsPerNode); quasiRegionCoordGIDs.resize(numLocalRegionNodes); rNodesPerDim[0] = lNodesPerDim[0]; rNodesPerDim[1] = lNodesPerDim[1]; rNodesPerDim[2] = lNodesPerDim[2]; - if(leftBC == 0) {rNodesPerDim[0] += 1;} - if(frontBC == 0) {rNodesPerDim[1] += 1;} - if(bottomBC == 0) {rNodesPerDim[2] += 1;} + if (leftBC == 0) { + rNodesPerDim[0] += 1; + } + if (frontBC == 0) { + rNodesPerDim[1] += 1; + } + if (bottomBC == 0) { + rNodesPerDim[2] += 1; + } // Using receiveGIDs, rNodesPerDim and numLocalRegionNodes, build quasi-region row map // This will potentially be done by the application or in a MueLu interface but for now // let us keep it in this utility function. LO interfaceCount = 0, compositeIdx = 0; Teuchos::Array regionIJK(3); - for(LO nodeRegionIdx = 0; nodeRegionIdx < numLocalRegionNodes; ++nodeRegionIdx) { - regionIJK[2] = nodeRegionIdx / (rNodesPerDim[1]*rNodesPerDim[0]); - LO tmp = nodeRegionIdx % (rNodesPerDim[1]*rNodesPerDim[0]); + for (LO nodeRegionIdx = 0; nodeRegionIdx < numLocalRegionNodes; ++nodeRegionIdx) { + regionIJK[2] = nodeRegionIdx / (rNodesPerDim[1] * rNodesPerDim[0]); + LO tmp = nodeRegionIdx % (rNodesPerDim[1] * rNodesPerDim[0]); regionIJK[1] = tmp / rNodesPerDim[0]; regionIJK[0] = tmp % rNodesPerDim[0]; - if( (regionIJK[0] == 0 && leftBC == 0) || - (regionIJK[1] == 0 && frontBC == 0) || - (regionIJK[2] == 0 && bottomBC == 0) ) { + if ((regionIJK[0] == 0 && leftBC == 0) || + (regionIJK[1] == 0 && frontBC == 0) || + (regionIJK[2] == 0 && bottomBC == 0)) { quasiRegionCoordGIDs[nodeRegionIdx] = receiveGIDs[interfaceCount]; - for(int dof = 0; dof < numDofsPerNode; ++dof) { - quasiRegionGIDs[nodeRegionIdx*numDofsPerNode + dof] = - receiveGIDs[interfaceCount]*numDofsPerNode + dof; + for (int dof = 0; dof < numDofsPerNode; ++dof) { + quasiRegionGIDs[nodeRegionIdx * numDofsPerNode + dof] = + receiveGIDs[interfaceCount] * numDofsPerNode + dof; } receiveLIDs[interfaceCount] = nodeRegionIdx; ++interfaceCount; } else { - compositeIdx = (regionIJK[2] + bottomBC - 1)*lNodesPerDim[1]*lNodesPerDim[0] - + (regionIJK[1] + frontBC - 1)*lNodesPerDim[0] - + (regionIJK[0] + leftBC - 1); + compositeIdx = (regionIJK[2] + bottomBC - 1) * lNodesPerDim[1] * lNodesPerDim[0] + (regionIJK[1] + frontBC - 1) * lNodesPerDim[0] + (regionIJK[0] + leftBC - 1); quasiRegionCoordGIDs[nodeRegionIdx] = nodeMap->getGlobalElement(compositeIdx); - for(int dof = 0; dof < numDofsPerNode; ++dof) { - quasiRegionGIDs[nodeRegionIdx*numDofsPerNode + dof] - = dofMap->getGlobalElement(compositeIdx*numDofsPerNode + dof); - compositeToRegionLIDs[compositeIdx*numDofsPerNode + dof] = nodeRegionIdx*numDofsPerNode + dof; + for (int dof = 0; dof < numDofsPerNode; ++dof) { + quasiRegionGIDs[nodeRegionIdx * numDofsPerNode + dof] = dofMap->getGlobalElement(compositeIdx * numDofsPerNode + dof); + compositeToRegionLIDs[compositeIdx * numDofsPerNode + dof] = nodeRegionIdx * numDofsPerNode + dof; } } } @@ -1062,17 +988,17 @@ void createRegionData(const int numDimensions, interfaceLIDsData.resize((sendGIDs.size() + receiveGIDs.size()) * numDofsPerNode); interfaceGIDs.resize((sendGIDs.size() + receiveGIDs.size()) * numDofsPerNode); using size_type = typename Teuchos::Array::size_type; - for(size_type nodeIdx = 0; nodeIdx < sendGIDs.size(); ++nodeIdx) { - for(int dof = 0; dof < numDofsPerNode; ++dof) { - LO dofIdx = nodeIdx*numDofsPerNode + dof; - interfaceGIDs[dofIdx] = sendGIDs[nodeIdx] * numDofsPerNode + dof; + for (size_type nodeIdx = 0; nodeIdx < sendGIDs.size(); ++nodeIdx) { + for (int dof = 0; dof < numDofsPerNode; ++dof) { + LO dofIdx = nodeIdx * numDofsPerNode + dof; + interfaceGIDs[dofIdx] = sendGIDs[nodeIdx] * numDofsPerNode + dof; interfaceLIDsData[dofIdx] = compositeToRegionLIDs[sendLIDs[nodeIdx] * numDofsPerNode + dof]; } } - for(size_type nodeIdx = 0; nodeIdx < receiveGIDs.size(); ++nodeIdx) { - for(int dof = 0; dof < numDofsPerNode; ++dof) { - LO dofIdx = nodeIdx*numDofsPerNode + dof; - interfaceGIDs[dofIdx + sendGIDs.size() * numDofsPerNode] = receiveGIDs[nodeIdx] * numDofsPerNode + dof; + for (size_type nodeIdx = 0; nodeIdx < receiveGIDs.size(); ++nodeIdx) { + for (int dof = 0; dof < numDofsPerNode; ++dof) { + LO dofIdx = nodeIdx * numDofsPerNode + dof; + interfaceGIDs[dofIdx + sendGIDs.size() * numDofsPerNode] = receiveGIDs[nodeIdx] * numDofsPerNode + dof; interfaceLIDsData[dofIdx + sendLIDs.size() * numDofsPerNode] = receiveLIDs[nodeIdx] * numDofsPerNode + dof; } } @@ -1087,7 +1013,7 @@ void createRegionData(const int numDimensions, interfaceGIDs.erase(std::unique(interfaceGIDs.begin(), interfaceGIDs.end()), interfaceGIDs.end()); -} // createRegionData +} // createRegionData template void MakeRegionPerGIDWithGhosts(const Teuchos::RCP >& nodeMap, @@ -1095,7 +1021,7 @@ void MakeRegionPerGIDWithGhosts(const Teuchos::RCP >& rowImport, const int maxRegPerGID, const LocalOrdinal numDofsPerNode, - const Teuchos::Array& lNodesPerDir, + const Teuchos::Array& lNodesPerDir, const Teuchos::Array& sendGIDs, const Teuchos::Array& sendPIDs, const Teuchos::Array& interfaceRegionLIDs, @@ -1104,41 +1030,41 @@ void MakeRegionPerGIDWithGhosts(const Teuchos::RCP > dofMap = rowImport->getSourceMap(); - const RCP > quasiRegionRowMap = rowImport->getTargetMap(); - const int myRank = dofMap->getComm()->getRank(); + const RCP > dofMap = rowImport->getSourceMap(); + const RCP > quasiRegionRowMap = rowImport->getTargetMap(); + const int myRank = dofMap->getComm()->getRank(); - RCP >regionsPerGID = - Xpetra::MultiVectorFactory::Build(dofMap, maxRegPerGID, false); + RCP > regionsPerGID = + Xpetra::MultiVectorFactory::Build(dofMap, maxRegPerGID, false); regionsPerGIDWithGhosts = - Xpetra::MultiVectorFactory::Build(quasiRegionRowMap, maxRegPerGID, false); + Xpetra::MultiVectorFactory::Build(quasiRegionRowMap, maxRegPerGID, false); - { // Scope for regionsPerGIDView + { // Scope for regionsPerGIDView Array > regionsPerGIDView(maxRegPerGID); - for(int regionIdx = 0; regionIdx < maxRegPerGID; ++regionIdx) { + for (int regionIdx = 0; regionIdx < maxRegPerGID; ++regionIdx) { regionsPerGIDView[regionIdx] = regionsPerGID->getDataNonConst(regionIdx); } // Initialize all entries to myRank in first column and to -1 in other columns - for(LO dofIdx = 0; dofIdx < lNodesPerDir[0]*lNodesPerDir[1]*lNodesPerDir[2]*numDofsPerNode; ++dofIdx) { + for (LO dofIdx = 0; dofIdx < lNodesPerDir[0] * lNodesPerDir[1] * lNodesPerDir[2] * numDofsPerNode; ++dofIdx) { regionsPerGIDView[0][dofIdx] = myRank; - for(int regionIdx = 1; regionIdx < maxRegPerGID; ++regionIdx) { + for (int regionIdx = 1; regionIdx < maxRegPerGID; ++regionIdx) { regionsPerGIDView[regionIdx][dofIdx] = -1; } } // Now loop over the sendGIDs array to fill entries with values in sendPIDs LO nodeIdx = 0; - for(LO sendIdx = 0; sendIdx < static_cast(sendPIDs.size()); ++sendIdx) { + for (LO sendIdx = 0; sendIdx < static_cast(sendPIDs.size()); ++sendIdx) { nodeIdx = nodeMap->getLocalElement(sendGIDs[sendIdx]); - for(int dof = 0; dof < numDofsPerNode; ++dof) { - LO dofIdx = nodeIdx*numDofsPerNode + dof; - for(int regionIdx = 1; regionIdx < maxRegPerGID; ++regionIdx) { - if(regionsPerGIDView[regionIdx][dofIdx] == -1) { + for (int dof = 0; dof < numDofsPerNode; ++dof) { + LO dofIdx = nodeIdx * numDofsPerNode + dof; + for (int regionIdx = 1; regionIdx < maxRegPerGID; ++regionIdx) { + if (regionsPerGIDView[regionIdx][dofIdx] == -1) { regionsPerGIDView[regionIdx][dofIdx] = sendPIDs[sendIdx]; break; } @@ -1152,23 +1078,22 @@ void MakeRegionPerGIDWithGhosts(const Teuchos::RCP::Build(quasiRegionRowMap, maxRegPerGID, false); interfaceGIDsMV->putScalar(Teuchos::OrdinalTraits::zero()); const LO numRegionInterfaceLIDs = static_cast(interfaceRegionLIDs.size()); - { // Scope for interfaceGIDsPerRegion + { // Scope for interfaceGIDsPerRegion Array > regionsPerGIDWithGhostsData(maxRegPerGID); Array > interfaceGIDsMVData(maxRegPerGID); - for(int regionIdx = 0; regionIdx < maxRegPerGID; ++regionIdx) { + for (int regionIdx = 0; regionIdx < maxRegPerGID; ++regionIdx) { regionsPerGIDWithGhostsData[regionIdx] = regionsPerGIDWithGhosts->getDataNonConst(regionIdx); - interfaceGIDsMVData[regionIdx] = interfaceGIDsMV->getDataNonConst(regionIdx); - for(LO idx = 0; idx < numRegionInterfaceLIDs; ++idx) { + interfaceGIDsMVData[regionIdx] = interfaceGIDsMV->getDataNonConst(regionIdx); + for (LO idx = 0; idx < numRegionInterfaceLIDs; ++idx) { LO LID = interfaceRegionLIDs[idx]; - if(regionsPerGIDWithGhostsData[regionIdx][LID] == myRank) { + if (regionsPerGIDWithGhostsData[regionIdx][LID] == myRank) { interfaceGIDsMVData[regionIdx][LID] = regionRowMap->getGlobalElement(LID); } } } - } -} // MakeRegionPerGIDWithGhosts +} // MakeRegionPerGIDWithGhosts /*! \brief Extract list of region GIDs of all interface DOFs from the region row map @@ -1179,13 +1104,12 @@ refert to interface DOFs, so we can grab them and stick them into the list of \c template void ExtractListOfInterfaceRegionGIDs( Teuchos::RCP > regionRowMap, - const Teuchos::Array& interfaceRegionLIDs, Teuchos::Array& interfaceRegionGIDs) -{ + const Teuchos::Array& interfaceRegionLIDs, Teuchos::Array& interfaceRegionGIDs) { interfaceRegionGIDs.resize(interfaceRegionLIDs.size()); - for(LocalOrdinal interfaceIdx = 0; interfaceIdx < static_cast(interfaceRegionLIDs.size()); ++interfaceIdx) { + for (LocalOrdinal interfaceIdx = 0; interfaceIdx < static_cast(interfaceRegionLIDs.size()); ++interfaceIdx) { interfaceRegionGIDs[interfaceIdx] = - regionRowMap->getGlobalElement(interfaceRegionLIDs[interfaceIdx]); + regionRowMap->getGlobalElement(interfaceRegionLIDs[interfaceIdx]); } -} // ExtractListOfInterfaceRegionGIDs +} // ExtractListOfInterfaceRegionGIDs -#endif // MUELU_SETUPREGIONUTILITIES_HPP +#endif // MUELU_SETUPREGIONUTILITIES_HPP diff --git a/packages/muelu/research/regionMG/src/SetupRegionVector_def.hpp b/packages/muelu/research/regionMG/src/SetupRegionVector_def.hpp index a935f62922cc..e3dc90fcec28 100644 --- a/packages/muelu/research/regionMG/src/SetupRegionVector_def.hpp +++ b/packages/muelu/research/regionMG/src/SetupRegionVector_def.hpp @@ -62,9 +62,9 @@ #include #include -using Teuchos::RCP; -using Teuchos::ArrayRCP; using Teuchos::Array; +using Teuchos::ArrayRCP; +using Teuchos::RCP; /*! \brief Transform composite vector to regional layout * @@ -72,14 +72,13 @@ using Teuchos::Array; * 1. import it into an auxiliary vector in the quasiRegional layout * 2. replace the quasiRegional map of the auxiliary vector with the regional map */ -template -void compositeToRegional(RCP > compVec, ///< Vector in composite layout [in] - RCP >& quasiRegVecs, ///< Vector in quasiRegional layout [in/out] - RCP >& regVecs, ///< Vector in regional layout [in/out] - const RCP > revisedRowMap, ///< revised row maps in region layout [in] - const RCP > rowImport ///< row importer in region layout [in] - ) -{ +template +void compositeToRegional(RCP > compVec, ///< Vector in composite layout [in] + RCP >& quasiRegVecs, ///< Vector in quasiRegional layout [in/out] + RCP >& regVecs, ///< Vector in regional layout [in/out] + const RCP > revisedRowMap, ///< revised row maps in region layout [in] + const RCP > rowImport ///< row importer in region layout [in] +) { #include "Xpetra_UseShortNames.hpp" // quasiRegional layout @@ -90,12 +89,12 @@ void compositeToRegional(RCPreplaceMap(revisedRowMap); return; -} // compositeToRegional +} // compositeToRegional /*! \brief Transform composite vector to regional layout * @@ -103,14 +102,13 @@ void compositeToRegional(RCP -void compositeToRegional(RCP > compVec, ///< Vector in composite layout [in] - RCP >& quasiRegVecs, ///< Vector in quasiRegional layout [in/out] - RCP >& regVecs, ///< Vector in regional layout [in/out] - const RCP > revisedRowMap, ///< revised row maps in region layout [in] - const RCP > rowImport ///< row importer in region layout [in] - ) -{ +template +void compositeToRegional(RCP > compVec, ///< Vector in composite layout [in] + RCP >& quasiRegVecs, ///< Vector in quasiRegional layout [in/out] + RCP >& regVecs, ///< Vector in regional layout [in/out] + const RCP > revisedRowMap, ///< revised row maps in region layout [in] + const RCP > rowImport ///< row importer in region layout [in] +) { #include "Xpetra_UseShortNames.hpp" // quasiRegional layout @@ -121,13 +119,12 @@ void compositeToRegional(RCPreplaceMap(revisedRowMap); return; -} // compositeToRegional - +} // compositeToRegional /*! \brief Transform regional vector to composite layout * @@ -139,12 +136,11 @@ void compositeToRegional(RCP -void regionalToComposite(const RCP >& regVec, ///< Vector in region layout [in] - RCP > compVec, ///< Vector in composite layout [in/out] - const RCP > rowImport ///< row importer in region layout [in] - ) -{ +template +void regionalToComposite(const RCP >& regVec, ///< Vector in region layout [in] + RCP > compVec, ///< Vector in composite layout [in/out] + const RCP > rowImport ///< row importer in region layout [in] +) { /* Let's fake an ADD combine mode that also adds local values by * 1. exporting quasiRegional vectors to auxiliary composite vectors (1 per group) * 2. add all auxiliary vectors together @@ -163,34 +159,33 @@ void regionalToComposite(const RCP quasiRegVec; - tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("regionalToComposite: 2 - quasiRegVec"))); - quasiRegVec = regVec; - TEUCHOS_ASSERT(Teuchos::nonnull(quasiRegVec)); - quasiRegVec->replaceMap(rowImport->getTargetMap()); + tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("regionalToComposite: 2 - quasiRegVec"))); + quasiRegVec = regVec; + TEUCHOS_ASSERT(Teuchos::nonnull(quasiRegVec)); + quasiRegVec->replaceMap(rowImport->getTargetMap()); - tm = Teuchos::null; - tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("regionalToComposite: 3 - partialCompVec"))); + tm = Teuchos::null; + tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("regionalToComposite: 3 - partialCompVec"))); - RCP partialCompVec = VectorFactory::Build(rowImport->getSourceMap(), true); - TEUCHOS_ASSERT(Teuchos::nonnull(partialCompVec)); - TEUCHOS_ASSERT(partialCompVec->getLocalLength() == compVecLocalLength); - partialCompVec->doExport(*quasiRegVec, *(rowImport), Xpetra::ADD); + RCP partialCompVec = VectorFactory::Build(rowImport->getSourceMap(), true); + TEUCHOS_ASSERT(Teuchos::nonnull(partialCompVec)); + TEUCHOS_ASSERT(partialCompVec->getLocalLength() == compVecLocalLength); + partialCompVec->doExport(*quasiRegVec, *(rowImport), Xpetra::ADD); - tm = Teuchos::null; - tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("regionalToComposite: 4 - compVec->sumIntoLocalValue"))); + tm = Teuchos::null; + tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("regionalToComposite: 4 - compVec->sumIntoLocalValue"))); - ArrayRCP partialCompVecData = partialCompVec->getData(0); - ArrayRCP compVecData = compVec->getDataNonConst(0); - for(size_t entryIdx = 0; entryIdx < compVecLocalLength; ++entryIdx) { - compVecData[entryIdx] += partialCompVecData[entryIdx]; - } + ArrayRCP partialCompVecData = partialCompVec->getData(0); + ArrayRCP compVecData = compVec->getDataNonConst(0); + for (size_t entryIdx = 0; entryIdx < compVecLocalLength; ++entryIdx) { + compVecData[entryIdx] += partialCompVecData[entryIdx]; + } - tm = Teuchos::null; + tm = Teuchos::null; } return; -} // regionalToComposite - +} // regionalToComposite /*! \brief Transform regional vector to composite layout * @@ -202,12 +197,11 @@ void regionalToComposite(const RCP -void regionalToComposite(const RCP >& regVec, ///< Vector in region layout [in] - RCP > compVec, ///< Vector in composite layout [in/out] - const RCP > rowImport ///< row importer in region layout [in] - ) -{ +template +void regionalToComposite(const RCP >& regVec, ///< Vector in region layout [in] + RCP > compVec, ///< Vector in composite layout [in/out] + const RCP > rowImport ///< row importer in region layout [in] +) { /* Let's fake an ADD combine mode that also adds local values by * 1. exporting quasiRegional vectors to auxiliary composite vectors (1 per group) * 2. add all auxiliary vectors together @@ -226,7 +220,7 @@ void regionalToComposite(const RCP quasiRegVec; - tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("regionalToComposite: 2 - quasiRegVec"))); + tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("regionalToComposite: 2 - quasiRegVec"))); quasiRegVec = regVec; TEUCHOS_ASSERT(Teuchos::nonnull(quasiRegVec)); quasiRegVec->replaceMap(rowImport->getTargetMap()); @@ -234,8 +228,7 @@ void regionalToComposite(const RCP partialCompVec - = MultiVectorFactory::Build(rowImport->getSourceMap(), quasiRegVec->getNumVectors(), true); + RCP partialCompVec = MultiVectorFactory::Build(rowImport->getSourceMap(), quasiRegVec->getNumVectors(), true); TEUCHOS_ASSERT(Teuchos::nonnull(partialCompVec)); TEUCHOS_ASSERT(partialCompVec->getLocalLength() == compVecLocalLength); partialCompVec->doExport(*quasiRegVec, *(rowImport), Xpetra::ADD); @@ -243,10 +236,10 @@ void regionalToComposite(const RCPsumIntoLocalValue"))); - for(LO vecIdx = 0; vecIdx < static_cast(partialCompVec->getNumVectors()); ++vecIdx) { + for (LO vecIdx = 0; vecIdx < static_cast(partialCompVec->getNumVectors()); ++vecIdx) { ArrayRCP partialCompVecData = partialCompVec->getData(vecIdx); - ArrayRCP compVecData = compVec->getDataNonConst(vecIdx); - for(size_t entryIdx = 0; entryIdx < compVecLocalLength; ++entryIdx) { + ArrayRCP compVecData = compVec->getDataNonConst(vecIdx); + for (size_t entryIdx = 0; entryIdx < compVecLocalLength; ++entryIdx) { compVecData[entryIdx] += partialCompVecData[entryIdx]; } } @@ -255,7 +248,7 @@ void regionalToComposite(const RCP void sumInterfaceValues(RCP >& regVec, - const RCP > revisedRowMap,///< revised row maps in region layout [in] - const RCP > rowImport ///< row importer in region layout [in]) - ) -{ + const RCP > revisedRowMap, ///< revised row maps in region layout [in] + const RCP > rowImport ///< row importer in region layout [in]) +) { #include "Xpetra_UseShortNames.hpp" using Teuchos::TimeMonitor; @@ -292,8 +284,7 @@ void sumInterfaceValues(RCP void sumInterfaceValues(RCP >& regVec, - const RCP > revisedRowMap,///< revised row maps in region layout [in] - const RCP > rowImport ///< row importer in region layout [in]) - ) -{ + const RCP > revisedRowMap, ///< revised row maps in region layout [in] + const RCP > rowImport ///< row importer in region layout [in]) +) { #include "Xpetra_UseShortNames.hpp" using Teuchos::TimeMonitor; @@ -330,8 +320,7 @@ void sumInterfaceValues(RCP -void scaleInterfaceDOFs(RCP >& regVec, ///< Vector to be scaled - const RCP >& scalingFactors, ///< Vector with scaling factors - bool inverseScaling ///< Divide by scaling factors (yes/no?) - ) -{ - using Vector = Xpetra::Vector; +void scaleInterfaceDOFs(RCP >& regVec, ///< Vector to be scaled + const RCP >& scalingFactors, ///< Vector with scaling factors + bool inverseScaling ///< Divide by scaling factors (yes/no?) +) { + using Vector = Xpetra::Vector; using VectorFactory = Xpetra::VectorFactory; const Scalar zero = Teuchos::ScalarTraits::zero(); - const Scalar one = Teuchos::ScalarTraits::one(); + const Scalar one = Teuchos::ScalarTraits::one(); - if (inverseScaling) - { + if (inverseScaling) { RCP inverseScalingFactors = VectorFactory::Build(scalingFactors->getMap()); inverseScalingFactors->reciprocal(*scalingFactors); regVec->elementWiseMultiply(one, *regVec, *inverseScalingFactors, zero); - } - else - { + } else { regVec->elementWiseMultiply(one, *regVec, *scalingFactors, zero); } -}// scaleInterfaceDOFs +} // scaleInterfaceDOFs -#endif // MUELU_SETUPREGIONVECTOR_DEF_HPP +#endif // MUELU_SETUPREGIONVECTOR_DEF_HPP diff --git a/packages/muelu/research/regionMG/src/SolveRegionHierarchy_def.hpp b/packages/muelu/research/regionMG/src/SolveRegionHierarchy_def.hpp index 18616ae101b3..44d5347377a3 100644 --- a/packages/muelu/research/regionMG/src/SolveRegionHierarchy_def.hpp +++ b/packages/muelu/research/regionMG/src/SolveRegionHierarchy_def.hpp @@ -47,44 +47,43 @@ #include "SetupRegionHierarchy_def.hpp" +using Teuchos::Array; using Teuchos::RCP; using Teuchos::rcp; -using Teuchos::Array; //! Recursive multigrid cycle (V or W) in region fashion -template -void MgCycle(const int levelID, ///< ID of current level - const std::string cycleType, - RCP > & regHierarchy, - RCP >& fineRegX, ///< solution - RCP > fineRegB, ///< right hand side - Array > smootherParams, ///< region smoother parameter list - bool& zeroInitGuess, - RCP coarseSolverData = Teuchos::null, - RCP hierarchyData = Teuchos::null) -{ +template +void MgCycle(const int levelID, ///< ID of current level + const std::string cycleType, + RCP>& regHierarchy, + RCP>& fineRegX, ///< solution + RCP> fineRegB, ///< right hand side + Array> smootherParams, ///< region smoother parameter list + bool& zeroInitGuess, + RCP coarseSolverData = Teuchos::null, + RCP hierarchyData = Teuchos::null) { #include "MueLu_UseShortNames.hpp" using Teuchos::TimeMonitor; const Scalar SC_ZERO = Teuchos::ScalarTraits::zero(); - const Scalar SC_ONE = Teuchos::ScalarTraits::one(); + const Scalar SC_ONE = Teuchos::ScalarTraits::one(); - RCP level = regHierarchy->GetLevel(levelID); - RCP regMatrix = level->Get >("A", MueLu::NoFactory::get()); - RCP regRowMap = regMatrix->getRowMap(); - RCP > regRowImporter = level->Get > >("rowImport"); - RCP > regInterfaceScalings = level->Get > >("regInterfaceScalings"); + RCP level = regHierarchy->GetLevel(levelID); + RCP regMatrix = level->Get>("A", MueLu::NoFactory::get()); + RCP regRowMap = regMatrix->getRowMap(); + RCP> regRowImporter = level->Get>>("rowImport"); + RCP> regInterfaceScalings = level->Get>>("regInterfaceScalings"); // Setup recursive cycling to represent either V- or W-cycles int cycleCount = 1; - if (cycleType == "W" && levelID > 0 ){ // W cycle and not on finest level. + if (cycleType == "W" && levelID > 0) { // W cycle and not on finest level. const std::string coarseSolverType = coarseSolverData->get("coarse solver type"); - if (coarseSolverType == "direct" && levelID == regHierarchy->GetNumLevels()-2 ) // Only call coarse level solve once if direct solve + if (coarseSolverType == "direct" && levelID == regHierarchy->GetNumLevels() - 2) // Only call coarse level solve once if direct solve cycleCount = 1; else cycleCount = 2; } - if (levelID < regHierarchy->GetNumLevels() - 1) // fine or intermediate levels + if (levelID < regHierarchy->GetNumLevels() - 1) // fine or intermediate levels { // extract data from hierarchy parameterlist std::string levelName("level" + std::to_string(levelID)); @@ -107,8 +106,8 @@ void MgCycle(const int levelID, ///< ID of current level tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("MgCycle: 2 - compute residual"))); RCP regRes; - if(useCachedVectors) { - regRes = levelList.get >("residual"); + if (useCachedVectors) { + regRes = levelList.get>("residual"); } else { regRes = VectorFactory::Build(regRowMap, true); } @@ -127,49 +126,47 @@ void MgCycle(const int levelID, ///< ID of current level RCP coarseRegB; { - RCP levelCoarse = regHierarchy->GetLevel(levelID+1); - RCP regProlongCoarse = levelCoarse->Get >("P", MueLu::NoFactory::get()); - RCP > regRowMapCoarse = regProlongCoarse->getColMap(); + RCP levelCoarse = regHierarchy->GetLevel(levelID + 1); + RCP regProlongCoarse = levelCoarse->Get>("P", MueLu::NoFactory::get()); + RCP> regRowMapCoarse = regProlongCoarse->getColMap(); // Get pre-communicated communication patterns for the fast MatVec - const ArrayRCP regionInterfaceLIDs = smootherParams[levelID+1]->get>("Fast MatVec: interface LIDs"); - const RCP regionInterfaceImporter = smootherParams[levelID+1]->get>("Fast MatVec: interface importer"); + const ArrayRCP regionInterfaceLIDs = smootherParams[levelID + 1]->get>("Fast MatVec: interface LIDs"); + const RCP regionInterfaceImporter = smootherParams[levelID + 1]->get>("Fast MatVec: interface importer"); coarseRegX = VectorFactory::Build(regRowMapCoarse, true); coarseRegB = VectorFactory::Build(regRowMapCoarse, true); regProlongCoarse->apply(*regRes, *coarseRegB, Teuchos::TRANS, SC_ONE, SC_ZERO, true, regionInterfaceImporter, regionInterfaceLIDs); - // TEUCHOS_ASSERT(regProlong[l+1][j]->getRangeMap()->isSameAs(*regRes[j]->getMap())); - // TEUCHOS_ASSERT(regProlong[l+1][j]->getDomainMap()->isSameAs(*coarseRegB[j]->getMap())); + // TEUCHOS_ASSERT(regProlong[l+1][j]->getRangeMap()->isSameAs(*regRes[j]->getMap())); + // TEUCHOS_ASSERT(regProlong[l+1][j]->getDomainMap()->isSameAs(*coarseRegB[j]->getMap())); } - tm = Teuchos::null; + tm = Teuchos::null; bool coarseZeroInitGuess = true; - for(int cycle = 0; cycle < cycleCount; cycle++) - { - + for (int cycle = 0; cycle < cycleCount; cycle++) { // Call V-cycle recursively - MgCycle(levelID+1, cycleType, regHierarchy, - coarseRegX, coarseRegB, - smootherParams, coarseZeroInitGuess, coarseSolverData, hierarchyData); + MgCycle(levelID + 1, cycleType, regHierarchy, + coarseRegX, coarseRegB, + smootherParams, coarseZeroInitGuess, coarseSolverData, hierarchyData); - } //cycleCount + } // cycleCount tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("MgCycle: 6 - transfer coarse to fine"))); // Transfer coarse level correction to fine level RCP regCorrection; { - RCP levelCoarse = regHierarchy->GetLevel(levelID+1); - RCP regProlongCoarse = levelCoarse->Get >("P", MueLu::NoFactory::get()); + RCP levelCoarse = regHierarchy->GetLevel(levelID + 1); + RCP regProlongCoarse = levelCoarse->Get>("P", MueLu::NoFactory::get()); // Get pre-communicated communication patterns for the fast MatVec const ArrayRCP regionInterfaceLIDs = smootherParams[levelID]->get>("Fast MatVec: interface LIDs"); - const RCP regionInterfaceImporter = smootherParams[levelID]->get>("Fast MatVec: interface importer"); + const RCP regionInterfaceImporter = smootherParams[levelID]->get>("Fast MatVec: interface importer"); regCorrection = VectorFactory::Build(regRowMap, true); regProlongCoarse->apply(*coarseRegX, *regCorrection, Teuchos::NO_TRANS, SC_ONE, SC_ZERO, false, regionInterfaceImporter, regionInterfaceLIDs); - // TEUCHOS_ASSERT(regProlong[l+1][j]->getDomainMap()->isSameAs(*coarseRegX[j]->getMap())); - // TEUCHOS_ASSERT(regProlong[l+1][j]->getRangeMap()->isSameAs(*regCorrection[j]->getMap())); + // TEUCHOS_ASSERT(regProlong[l+1][j]->getDomainMap()->isSameAs(*coarseRegX[j]->getMap())); + // TEUCHOS_ASSERT(regProlong[l+1][j]->getRangeMap()->isSameAs(*regCorrection[j]->getMap())); } tm = Teuchos::null; @@ -189,7 +186,6 @@ void MgCycle(const int levelID, ///< ID of current level tm = Teuchos::null; } else { - // Coarsest grid solve RCP fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); @@ -202,13 +198,13 @@ void MgCycle(const int levelID, ///< ID of current level const std::string coarseSolverType = coarseSolverData->get("coarse solver type"); if (coarseSolverType == "smoother") { smootherApply(smootherParams[levelID], fineRegX, fineRegB, regMatrix, - regRowMap, regRowImporter, zeroInitGuess); + regRowMap, regRowImporter, zeroInitGuess); } else { zeroInitGuess = false; // First get the Xpetra vectors from region to composite format - RCP coarseRowMap = coarseSolverData->get >("compCoarseRowMap"); - RCP compX = VectorFactory::Build(coarseRowMap, true); - RCP compRhs = VectorFactory::Build(coarseRowMap, true); + RCP coarseRowMap = coarseSolverData->get>("compCoarseRowMap"); + RCP compX = VectorFactory::Build(coarseRowMap, true); + RCP compRhs = VectorFactory::Build(coarseRowMap, true); { RCP inverseInterfaceScaling = VectorFactory::Build(regInterfaceScalings->getMap()); inverseInterfaceScaling->reciprocal(*regInterfaceScalings); @@ -217,15 +213,14 @@ void MgCycle(const int levelID, ///< ID of current level regionalToComposite(fineRegB, compRhs, regRowImporter); } - if (coarseSolverType == "direct") - { + if (coarseSolverType == "direct") { #if defined(HAVE_MUELU_AMESOS2) - using DirectCoarseSolver = Amesos2::Solver, Tpetra::MultiVector >; - RCP coarseSolver = coarseSolverData->get >("direct solver object"); + using DirectCoarseSolver = Amesos2::Solver, Tpetra::MultiVector>; + RCP coarseSolver = coarseSolverData->get>("direct solver object"); - TEUCHOS_TEST_FOR_EXCEPT_MSG(coarseRowMap->lib()!=Xpetra::UseTpetra, - "Coarse solver requires Tpetra/Amesos2 stack."); + TEUCHOS_TEST_FOR_EXCEPT_MSG(coarseRowMap->lib() != Xpetra::UseTpetra, + "Coarse solver requires Tpetra/Amesos2 stack."); TEUCHOS_ASSERT(!coarseSolver.is_null()); // using Utilities = MueLu::Utilities; @@ -234,8 +229,8 @@ void MgCycle(const int levelID, ///< ID of current level // we could also implement a similar Epetra branch using Tpetra_MultiVector = Tpetra::MultiVector; - // *fos << "Attempting to use Amesos2 to solve the coarse grid problem" << std::endl; - RCP tX = Utilities::MV2NonConstTpetraMV2(*compX); + // *fos << "Attempting to use Amesos2 to solve the coarse grid problem" << std::endl; + RCP tX = Utilities::MV2NonConstTpetraMV2(*compX); RCP tB = Utilities::MV2TpetraMV(compRhs); /* Solve! @@ -249,10 +244,12 @@ void MgCycle(const int levelID, ///< ID of current level */ if (not coarseSolver->getStatus().symbolicFactorizationDone()) *fos << "Symbolic factorization should have been done during hierarchy setup, " - "but actually is missing. Anyway ... just do it right now." << std::endl; + "but actually is missing. Anyway ... just do it right now." + << std::endl; if (not coarseSolver->getStatus().numericFactorizationDone()) *fos << "Numeric factorization should have been done during hierarchy setup, " - "but actually is missing. Anyway ... just do it right now." << std::endl; + "but actually is missing. Anyway ... just do it right now." + << std::endl; coarseSolver->solve(tX.ptr(), tB.ptr()); #else *fos << "+++++++++++++++++++++++++++ WARNING +++++++++++++++++++++++++\n" @@ -261,8 +258,7 @@ void MgCycle(const int levelID, ///< ID of current level << "+++++++++++++++++++++++++++ WARNING +++++++++++++++++++++++++" << std::endl; #endif - } - else if (coarseSolverType == "amg") // use AMG as coarse level solver + } else if (coarseSolverType == "amg") // use AMG as coarse level solver { const bool coarseSolverRebalance = coarseSolverData->get("coarse solver rebalance"); @@ -270,22 +266,22 @@ void MgCycle(const int levelID, ///< ID of current level RCP amgHierarchy = coarseSolverData->get>("amg hierarchy object"); // Run a single V-cycle - if(coarseSolverRebalance==false){ + if (coarseSolverRebalance == false) { amgHierarchy->Iterate(*compRhs, *compX, 1, true); } else { #if defined(HAVE_MUELU_ZOLTAN2) && defined(HAVE_MPI) - RCP rebalanceImporter = coarseSolverData->get >("rebalanceImporter"); + RCP rebalanceImporter = coarseSolverData->get>("rebalanceImporter"); // TODO: These vectors could be cached to improve performance RCP rebalancedRhs = VectorFactory::Build(rebalanceImporter->getTargetMap()); - RCP rebalancedX = VectorFactory::Build(rebalanceImporter->getTargetMap(), true); + RCP rebalancedX = VectorFactory::Build(rebalanceImporter->getTargetMap(), true); rebalancedRhs->doImport(*compRhs, *rebalanceImporter, Xpetra::INSERT); rebalancedRhs->replaceMap(rebalancedRhs->getMap()->removeEmptyProcesses()); rebalancedX->replaceMap(rebalancedX->getMap()->removeEmptyProcesses()); - if(!amgHierarchy.is_null()){ + if (!amgHierarchy.is_null()) { amgHierarchy->Iterate(*rebalancedRhs, *rebalancedX, 1, true); } @@ -295,9 +291,7 @@ void MgCycle(const int levelID, ///< ID of current level amgHierarchy->Iterate(*compRhs, *compX, 1, true); #endif } - } - else - { + } else { TEUCHOS_TEST_FOR_EXCEPT_MSG(false, "Unknown coarse solver type."); } @@ -312,39 +306,37 @@ void MgCycle(const int levelID, ///< ID of current level } return; -} // MgCycle +} // MgCycle //! Adapter that uses composite vectors and a region hierarchy // and performs a region MG cycle on them. -template +template void RegionMgCycleAdapter(const std::string cycleType, - RCP > & regHierarchy, - RCP >& X, ///< solution - RCP > B, ///< right hand side - Array > smootherParams, ///< region smoother parameter list - bool& zeroInitGuess, - RCP coarseSolverData = Teuchos::null, - RCP hierarchyData = Teuchos::null) -{ - + RCP>& regHierarchy, + RCP>& X, ///< solution + RCP> B, ///< right hand side + Array> smootherParams, ///< region smoother parameter list + bool& zeroInitGuess, + RCP coarseSolverData = Teuchos::null, + RCP hierarchyData = Teuchos::null) { using LO = LocalOrdinal; using GO = GlobalOrdinal; using NO = Node; using SC = Scalar; - using Level = MueLu::Level; - using Map = Xpetra::Map; - using Import = Xpetra::Import; - using Matrix = Xpetra::Matrix; - using Vector = Xpetra::Vector; - using VectorFactory = Xpetra::VectorFactory; + using Level = MueLu::Level; + using Map = Xpetra::Map; + using Import = Xpetra::Import; + using Matrix = Xpetra::Matrix; + using Vector = Xpetra::Vector; + using VectorFactory = Xpetra::VectorFactory; // Extract some info from the hierarchy // to convert vectors from composite to regional and back RCP level0 = regHierarchy->GetLevel(0); - RCP rowImport = level0->Get >("rowImport"); - RCP regInterfaceScalings = level0->Get >("regInterfaceScalings"); - RCP regMat = level0->Get >("A"); + RCP rowImport = level0->Get>("rowImport"); + RCP regInterfaceScalings = level0->Get>("regInterfaceScalings"); + RCP regMat = level0->Get>("A"); RCP revisedRowMap = regMat->getRowMap(); // Compute region vectors for B and X @@ -359,66 +351,64 @@ void RegionMgCycleAdapter(const std::string cycleType, revisedRowMap, rowImport); MgCycle(0, cycleType, regHierarchy, - regX, regB, - smootherParams, zeroInitGuess, coarseSolverData, hierarchyData); + regX, regB, + smootherParams, zeroInitGuess, coarseSolverData, hierarchyData); // Bring solution back to composite format scaleInterfaceDOFs(regX, regInterfaceScalings, true); regionalToComposite(regX, X, rowImport); -} // RegionMgCycleAdapter +} // RegionMgCycleAdapter // Solve via Richardson iteration with region MG preconditioning, hand in matrix in region format template void solveRegionProblemRichardson(const double tol, const bool scaleResidualHist, const int maxIts, const std::string cycleType, const std::string convergenceLog, RCP& coarseSolverData, - Array >& smootherParams, + Array>& smootherParams, RCP hierarchyData, - RCP > & regHierarchy, - RCP >& X, - RCP >& B) { - + RCP>& regHierarchy, + RCP>& X, + RCP>& B) { using LO = LocalOrdinal; using GO = GlobalOrdinal; using NO = Node; using SC = Scalar; - using Map = Xpetra::Map; - using Import = Xpetra::Import; - using Matrix = Xpetra::Matrix; - using Vector = Xpetra::Vector; - using VectorFactory = Xpetra::VectorFactory; + using Map = Xpetra::Map; + using Import = Xpetra::Import; + using Matrix = Xpetra::Matrix; + using Vector = Xpetra::Vector; + using VectorFactory = Xpetra::VectorFactory; using Level = MueLu::Level; - using STS = Teuchos::ScalarTraits; + using STS = Teuchos::ScalarTraits; using magnitude_type = typename STS::magnitudeType; const Scalar SC_zero = STS::zero(); const Scalar SC_one = STS::one(); // we start by extracting some basic data from the hierarchy - RCP level0 = regHierarchy->GetLevel(0); - RCP regMat = level0->Get >("A"); - RCP revisedRowMap = regMat->getRowMap(); - RCP rowImport = level0->Get >("rowImport"); - RCP dofMap = X->getMap(); - const int myRank = dofMap->getComm()->getRank(); + RCP level0 = regHierarchy->GetLevel(0); + RCP regMat = level0->Get>("A"); + RCP revisedRowMap = regMat->getRowMap(); + RCP rowImport = level0->Get>("rowImport"); + RCP dofMap = X->getMap(); + const int myRank = dofMap->getComm()->getRank(); // Instead of checking each time for rank, create a rank 0 stream RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& out = *fancy; + Teuchos::FancyOStream& out = *fancy; out.setOutputToRootOnly(0); // Prepare output of residual norm to file RCP log; - if (myRank == 0) - { - log = rcp(new std::ofstream(convergenceLog.c_str())); - (*log) << "# num procs = " << dofMap->getComm()->getSize() << "\n" - << "# iteration | res-norm (scaled=" << scaleResidualHist << ")\n" - << "#\n"; - *log << std::setprecision(16) << std::scientific; - } + if (myRank == 0) { + log = rcp(new std::ofstream(convergenceLog.c_str())); + (*log) << "# num procs = " << dofMap->getComm()->getSize() << "\n" + << "# iteration | res-norm (scaled=" << scaleResidualHist << ")\n" + << "#\n"; + *log << std::setprecision(16) << std::scientific; + } // Print type of residual norm to the screen out << "Using region solver" << std::endl; @@ -427,7 +417,7 @@ void solveRegionProblemRichardson(const double tol, const bool scaleResidualHist else out << "Using unscaled residual norm." << std::endl; - TEUCHOS_TEST_FOR_EXCEPT_MSG(!(regHierarchy->GetNumLevels()>0), "We require numLevel > 0. Probably, numLevel has not been set, yet."); + TEUCHOS_TEST_FOR_EXCEPT_MSG(!(regHierarchy->GetNumLevels() > 0), "We require numLevel > 0. Probably, numLevel has not been set, yet."); // We first use the non-level container variables to setup the fine grid problem. // This is ok since the initial setup just mimics the application and the outer @@ -439,7 +429,7 @@ void solveRegionProblemRichardson(const double tol, const bool scaleResidualHist // Composite residual vector RCP compRes = VectorFactory::Build(dofMap, true); - compRes = VectorFactory::Build(dofMap, true); + compRes = VectorFactory::Build(dofMap, true); // transform composite vectors to regional layout Teuchos::RCP quasiRegX; @@ -462,17 +452,16 @@ void solveRegionProblemRichardson(const double tol, const bool scaleResidualHist // SWITCH TO RECURSIVE STYLE --> USE LEVEL CONTAINER VARIABLES ///////////////////////////////////////////////////////////////////////// - // Richardson iterations magnitude_type normResIni = Teuchos::ScalarTraits::zero(); - const int old_precision = std::cout.precision(); + const int old_precision = std::cout.precision(); std::cout << std::setprecision(8) << std::scientific; int cycle = 0; // Get Stuff out of Hierarchy - RCP level = regHierarchy->GetLevel(0); - RCP regInterfaceScalings = level->Get >("regInterfaceScalings"); - bool zeroInitGuess = true; + RCP level = regHierarchy->GetLevel(0); + RCP regInterfaceScalings = level->Get>("regInterfaceScalings"); + bool zeroInitGuess = true; for (cycle = 0; cycle < maxIts; ++cycle) { regCorrect->putScalar(SC_zero); // check for convergence @@ -485,8 +474,12 @@ void solveRegionProblemRichardson(const double tol, const bool scaleResidualHist regionalToComposite(regRes, compRes, rowImport); typename Teuchos::ScalarTraits::magnitudeType normRes = compRes->norm2(); - if(cycle == 0) { normResIni = normRes; }// out << "NormResIni = " << normResIni << std::endl; } - if(scaleResidualHist) { normRes /= normResIni; } + if (cycle == 0) { + normResIni = normRes; + } // out << "NormResIni = " << normResIni << std::endl; } + if (scaleResidualHist) { + normRes /= normResIni; + } // Output current residual norm to screen (on proc 0 only) out << cycle << "\t" << normRes << std::endl; @@ -505,70 +498,67 @@ void solveRegionProblemRichardson(const double tol, const bool scaleResidualHist // std::cout << "regB->norm2() " << regRes->norm2() << std::endl; MgCycle(0, cycleType, regHierarchy, - regCorrect, regRes, - smootherParams, zeroInitGuess, coarseSolverData, hierarchyData); + regCorrect, regRes, + smootherParams, zeroInitGuess, coarseSolverData, hierarchyData); // std::cout << "regX->norm2() " << regCorrect->norm2() << std::endl; regX->update(SC_one, *regCorrect, SC_one); - } out << "Number of iterations performed for this solve: " << cycle << std::endl; std::cout << std::setprecision(old_precision); std::cout.unsetf(std::ios::fixed | std::ios::scientific); -} // solveRegionProblemRichardson +} // solveRegionProblemRichardson // Solve via Conjugate Gradient with region MG preconditioning, hand in matrix in composite format template void solveCompositeProblemPCG(const double tol, const bool scaleResidualHist, const int maxIts, const std::string cycleType, const std::string convergenceLog, RCP& coarseSolverData, - Array >& smootherParams, + Array>& smootherParams, RCP hierarchyData, - RCP > & regHierarchy, - RCP >& A, - RCP >& X, - RCP >& B) { - + RCP>& regHierarchy, + RCP>& A, + RCP>& X, + RCP>& B) { using LO = LocalOrdinal; using GO = GlobalOrdinal; using NO = Node; using SC = Scalar; - using Map = Xpetra::Map; - using Import = Xpetra::Import; - using Matrix = Xpetra::Matrix; - using Vector = Xpetra::Vector; - using VectorFactory = Xpetra::VectorFactory; + using Map = Xpetra::Map; + using Import = Xpetra::Import; + using Matrix = Xpetra::Matrix; + using Vector = Xpetra::Vector; + using VectorFactory = Xpetra::VectorFactory; using Level = MueLu::Level; - using STS = Teuchos::ScalarTraits; + using STS = Teuchos::ScalarTraits; using magnitude_type = typename STS::magnitudeType; const Scalar SC_zero = STS::zero(); const Scalar SC_one = STS::one(); // we start by extracting some basic data from the hierarchy - RCP level0 = regHierarchy->GetLevel(0); + RCP level0 = regHierarchy->GetLevel(0); RCP dofMap = X->getMap(); - const int myRank = dofMap->getComm()->getRank(); + const int myRank = dofMap->getComm()->getRank(); // Instead of checking each time for rank, create a rank 0 stream RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& out = *fancy; + Teuchos::FancyOStream& out = *fancy; out.setOutputToRootOnly(0); // Prepare output of residual norm to file RCP log; - if (myRank == 0) - { - log = rcp(new std::ofstream(convergenceLog.c_str())); - (*log) << "# num procs = " << dofMap->getComm()->getSize() << "\n" - << "# iteration | res-norm (scaled=" << scaleResidualHist << ")\n" - << "#\n"; - *log << std::setprecision(16) << std::scientific; - } + if (myRank == 0) { + log = rcp(new std::ofstream(convergenceLog.c_str())); + (*log) << "# num procs = " << dofMap->getComm()->getSize() << "\n" + << "# iteration | res-norm (scaled=" << scaleResidualHist << ")\n" + << "#\n"; + *log << std::setprecision(16) << std::scientific; + } // Print type of residual norm to the screen out << "Using CG solver" << std::endl; @@ -577,23 +567,23 @@ void solveCompositeProblemPCG(const double tol, const bool scaleResidualHist, co else out << "Using unscaled residual norm." << std::endl; - TEUCHOS_TEST_FOR_EXCEPT_MSG(!(regHierarchy->GetNumLevels()>0), "We require numLevel > 0. Probably, numLevel has not been set, yet."); + TEUCHOS_TEST_FOR_EXCEPT_MSG(!(regHierarchy->GetNumLevels() > 0), "We require numLevel > 0. Probably, numLevel has not been set, yet."); // PCG iterations const int old_precision = std::cout.precision(); std::cout << std::setprecision(8) << std::scientific; // Get Stuff out of Hierarchy - RCP level = regHierarchy->GetLevel(0); - RCP regInterfaceScalings = level->Get >("regInterfaceScalings"); - bool zeroInitGuess = true; + RCP level = regHierarchy->GetLevel(0); + RCP regInterfaceScalings = level->Get>("regInterfaceScalings"); + bool zeroInitGuess = true; // Set variables for iterations - int cycle = 0; - RCP Res = VectorFactory::Build(dofMap, true); - RCP Z = VectorFactory::Build(dofMap, true); - RCP P = VectorFactory::Build(dofMap, true); - RCP AP = VectorFactory::Build(dofMap, true); + int cycle = 0; + RCP Res = VectorFactory::Build(dofMap, true); + RCP Z = VectorFactory::Build(dofMap, true); + RCP P = VectorFactory::Build(dofMap, true); + RCP AP = VectorFactory::Build(dofMap, true); magnitude_type normResIni = Teuchos::ScalarTraits::zero(); magnitude_type normRes = Teuchos::ScalarTraits::zero(); @@ -603,9 +593,9 @@ void solveCompositeProblemPCG(const double tol, const bool scaleResidualHist, co Z->putScalar(SC_zero); RegionMgCycleAdapter(cycleType, regHierarchy, - Z, Res, - smootherParams, zeroInitGuess, coarseSolverData, hierarchyData); - P->update(SC_one, *Z, SC_zero); // deep copy values of Z into P + Z, Res, + smootherParams, zeroInitGuess, coarseSolverData, hierarchyData); + P->update(SC_one, *Z, SC_zero); // deep copy values of Z into P Scalar alpha = SC_zero, beta_old = SC_zero, beta_new = SC_zero, PAP = SC_zero; for (cycle = 0; cycle < maxIts; ++cycle) { @@ -613,19 +603,21 @@ void solveCompositeProblemPCG(const double tol, const bool scaleResidualHist, co PAP = P->dot(*AP); TEUCHOS_TEST_FOR_EXCEPTION(PAP <= SC_zero, std::runtime_error, - "At iteration " << (cycle) << " out of " << maxIts - << ", P.dot(AP) = " << PAP << " <= 0. This usually means that " - "the matrix A is not symmetric (Hermitian) positive definite."); + "At iteration " << (cycle) << " out of " << maxIts + << ", P.dot(AP) = " << PAP << " <= 0. This usually means that " + "the matrix A is not symmetric (Hermitian) positive definite."); beta_old = Res->dot(*Z); - alpha = beta_old / PAP; + alpha = beta_old / PAP; X->update(alpha, *P, SC_one); Res->update(-alpha, *AP, SC_one); // check for convergence { normRes = Res->norm2(); - if(scaleResidualHist) { normRes /= normResIni; } + if (scaleResidualHist) { + normRes /= normResIni; + } // Output current residual norm to screen (on proc 0 only) out << cycle << "\t" << normRes << std::endl; @@ -638,8 +630,8 @@ void solveCompositeProblemPCG(const double tol, const bool scaleResidualHist, co Z->putScalar(SC_zero); RegionMgCycleAdapter(cycleType, regHierarchy, - Z, Res, - smootherParams, zeroInitGuess, coarseSolverData, hierarchyData); + Z, Res, + smootherParams, zeroInitGuess, coarseSolverData, hierarchyData); beta_new = Res->dot(*Z); P->update(SC_one, *Z, (beta_new / beta_old)); @@ -649,52 +641,50 @@ void solveCompositeProblemPCG(const double tol, const bool scaleResidualHist, co std::cout << std::setprecision(old_precision); std::cout.unsetf(std::ios::fixed | std::ios::scientific); -} // solveCompositeProblemPCG +} // solveCompositeProblemPCG // Solve via Richardson iteration with region MG preconditioning, hand in matrix in composite format template void solveCompositeProblemRichardson(const double tol, const bool scaleResidualHist, const int maxIts, const std::string cycleType, const std::string convergenceLog, RCP& coarseSolverData, - Array >& smootherParams, + Array>& smootherParams, RCP hierarchyData, - RCP > & regHierarchy, - RCP >& A, - RCP >& X, - RCP >& B) { - + RCP>& regHierarchy, + RCP>& A, + RCP>& X, + RCP>& B) { using LO = LocalOrdinal; using GO = GlobalOrdinal; using NO = Node; using SC = Scalar; - using Map = Xpetra::Map; - using Import = Xpetra::Import; - using Matrix = Xpetra::Matrix; - using Vector = Xpetra::Vector; - using VectorFactory = Xpetra::VectorFactory; + using Map = Xpetra::Map; + using Import = Xpetra::Import; + using Matrix = Xpetra::Matrix; + using Vector = Xpetra::Vector; + using VectorFactory = Xpetra::VectorFactory; using Level = MueLu::Level; - using STS = Teuchos::ScalarTraits; + using STS = Teuchos::ScalarTraits; using magnitude_type = typename STS::magnitudeType; const Scalar SC_zero = STS::zero(); const Scalar SC_one = STS::one(); // we start by extracting some basic data from the hierarchy - RCP level0 = regHierarchy->GetLevel(0); + RCP level0 = regHierarchy->GetLevel(0); RCP dofMap = X->getMap(); - const int myRank = dofMap->getComm()->getRank(); + const int myRank = dofMap->getComm()->getRank(); // Instead of checking each time for rank, create a rank 0 stream RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& out = *fancy; + Teuchos::FancyOStream& out = *fancy; out.setOutputToRootOnly(0); // Prepare output of residual norm to file RCP log; - if (myRank == 0) - { + if (myRank == 0) { log = rcp(new std::ofstream(convergenceLog.c_str())); (*log) << "# num procs = " << dofMap->getComm()->getSize() << "\n" << "# iteration | res-norm (scaled=" << scaleResidualHist << ")\n" @@ -709,25 +699,25 @@ void solveCompositeProblemRichardson(const double tol, const bool scaleResidualH else out << "Using unscaled residual norm." << std::endl; - TEUCHOS_TEST_FOR_EXCEPT_MSG(!(regHierarchy->GetNumLevels()>0), "We require numLevel > 0. Probably, numLevel has not been set, yet."); + TEUCHOS_TEST_FOR_EXCEPT_MSG(!(regHierarchy->GetNumLevels() > 0), "We require numLevel > 0. Probably, numLevel has not been set, yet."); // Richardson iterations const int old_precision = std::cout.precision(); std::cout << std::setprecision(8) << std::scientific; // Set variables for iterations - int cycle = 0; - RCP Correct = VectorFactory::Build(dofMap, true); - RCP Res = VectorFactory::Build(dofMap, true); + int cycle = 0; + RCP Correct = VectorFactory::Build(dofMap, true); + RCP Res = VectorFactory::Build(dofMap, true); magnitude_type normResIni = Teuchos::ScalarTraits::zero(); magnitude_type normRes = Teuchos::ScalarTraits::zero(); // out << "X->norm2() " << X->norm2() << std::endl; // Get Stuff out of Hierarchy - RCP level = regHierarchy->GetLevel(0); - RCP regInterfaceScalings = level->Get >("regInterfaceScalings"); - bool zeroInitGuess = true; + RCP level = regHierarchy->GetLevel(0); + RCP regInterfaceScalings = level->Get>("regInterfaceScalings"); + bool zeroInitGuess = true; for (cycle = 0; cycle < maxIts; ++cycle) { Correct->putScalar(SC_zero); // check for convergence @@ -736,8 +726,12 @@ void solveCompositeProblemRichardson(const double tol, const bool scaleResidualH Res->update(SC_one, *B, SC_one); normRes = Res->norm2(); - if(cycle == 0) { normResIni = normRes; }// out << "NormResIni = " << normResIni << std::endl;} - if(scaleResidualHist) { normRes /= normResIni; } + if (cycle == 0) { + normResIni = normRes; + } // out << "NormResIni = " << normResIni << std::endl;} + if (scaleResidualHist) { + normRes /= normResIni; + } // Output current residual norm to screen (on proc 0 only) out << cycle << "\t" << normRes << std::endl; @@ -749,8 +743,8 @@ void solveCompositeProblemRichardson(const double tol, const bool scaleResidualH } RegionMgCycleAdapter(cycleType, regHierarchy, - Correct, Res, - smootherParams, zeroInitGuess, coarseSolverData, hierarchyData); + Correct, Res, + smootherParams, zeroInitGuess, coarseSolverData, hierarchyData); X->update(SC_one, *Correct, SC_one); } @@ -758,6 +752,6 @@ void solveCompositeProblemRichardson(const double tol, const bool scaleResidualH std::cout << std::setprecision(old_precision); std::cout.unsetf(std::ios::fixed | std::ios::scientific); -} // solveCompositeProblemRichardson +} // solveCompositeProblemRichardson -#endif // MUELU_SOLVEREGIONHIERARCHY_DEF_HPP +#endif // MUELU_SOLVEREGIONHIERARCHY_DEF_HPP diff --git a/packages/muelu/research/regionMG/test/structured/Driver_Structured_Regions.cpp b/packages/muelu/research/regionMG/test/structured/Driver_Structured_Regions.cpp index a2a3625d7101..35008bdb4fcc 100644 --- a/packages/muelu/research/regionMG/test/structured/Driver_Structured_Regions.cpp +++ b/packages/muelu/research/regionMG/test/structured/Driver_Structured_Regions.cpp @@ -88,12 +88,11 @@ #include #include #include -#include // => This header defines Belos::XpetraOp -#include // => This header defines Belos::MueLuOp -#include // => This header defines Belos::TpetraOp +#include // => This header defines Belos::XpetraOp +#include // => This header defines Belos::MueLuOp +#include // => This header defines Belos::TpetraOp #endif - #ifdef HAVE_MUELU_CUDA #include "cuda_profiler_api.h" #endif @@ -126,75 +125,101 @@ #include "SetupRegionHierarchy_def.hpp" #include "SolveRegionHierarchy_def.hpp" - -template -int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int argc, char *argv[]) { +template +int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int argc, char *argv[]) { #include + using Teuchos::ArrayRCP; + using Teuchos::ParameterList; using Teuchos::RCP; using Teuchos::rcp; - using Teuchos::ArrayRCP; using Teuchos::TimeMonitor; - using Teuchos::ParameterList; // ========================================================================= // MPI initialization using Teuchos // ========================================================================= RCP > comm = Teuchos::DefaultComm::getComm(); // const int numRanks = comm->getSize(); - const int myRank = comm->getRank(); + const int myRank = comm->getRank(); // ========================================================================= // Convenient definitions // ========================================================================= using STS = Teuchos::ScalarTraits; SC zero = STS::zero(), one = STS::one(); - using magnitude_type = typename Teuchos::ScalarTraits::magnitudeType; - using real_type = typename STS::coordinateType; - using RealValuedMultiVector = Xpetra::MultiVector; + using magnitude_type = typename Teuchos::ScalarTraits::magnitudeType; + using real_type = typename STS::coordinateType; + using RealValuedMultiVector = Xpetra::MultiVector; // ========================================================================= // Parameters initialization // ========================================================================= GO nx = 10, ny = 10, nz = 10; - Galeri::Xpetra::Parameters galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra - - std::string xmlFileName = ""; clp.setOption("xml", &xmlFileName, "read parameters from an xml file"); - std::string yamlFileName = ""; clp.setOption("yaml", &yamlFileName, "read parameters from a yaml file"); - std::string solverType = "region"; clp.setOption("solverType", &solverType, "iterative solver to be used: (region | Richardson | CG)"); - std::string convergenceLog = "residual_norm.txt"; clp.setOption("convergence-log", &convergenceLog, "file in which the convergence history of the linear solver is stored"); - int maxIts = 200; clp.setOption("its", &maxIts, "maximum number of solver iterations"); - double tol = 1e-12; clp.setOption("tol", &tol, "solver convergence tolerance"); - bool scaleResidualHist = true; clp.setOption("scale", "noscale", &scaleResidualHist, "scaled Krylov residual history"); - bool serialRandom = false; clp.setOption("use-serial-random", "no-use-serial-random", &serialRandom, "generate the random vector serially and then broadcast it"); - std::string cycleType = "V"; clp.setOption("cycleType", &cycleType, "{Multigrid cycle type. Possible values: V, W."); - std::string smootherType = "Jacobi"; clp.setOption("smootherType", &smootherType, "smoother to be used: (None | Jacobi | Gauss | Chebyshev)"); - int smootherIts = 2; clp.setOption("smootherIts", &smootherIts, "number of smoother iterations"); - double smootherDamp = 0.67; clp.setOption("smootherDamp", &smootherDamp, "damping parameter for the level smoother"); - double smootherChebyEigRatio = 2.0; clp.setOption("smootherChebyEigRatio", &smootherChebyEigRatio, "eigenvalue ratio max/min used to approximate the smallest eigenvalue for Chebyshev relaxation"); - double smootherChebyBoostFactor = 1.1; clp.setOption("smootherChebyBoostFactor", &smootherChebyBoostFactor, "boost factor for Chebyshev smoother"); - bool keepCoarseCoords = false; clp.setOption("keep-coarse-coords", "no-keep-coarse-coords", &keepCoarseCoords, "keep coordinates on coarsest level of region hierarchy"); - bool coarseSolverRebalance = false; clp.setOption("rebalance-coarse", "no-rebalance-coarse", &coarseSolverRebalance, "rebalance before AMG coarse grid solve"); - int rebalanceNumPartitions = -1; clp.setOption("numPartitions", &rebalanceNumPartitions, "number of partitions for rebalancing the coarse grid AMG solve"); - std::string coarseSolverType = "direct"; clp.setOption("coarseSolverType", &coarseSolverType, "Type of solver for (composite) coarse level operator (smoother | direct | amg)"); - std::string unstructured = "{}"; clp.setOption("unstructured", &unstructured, "List of ranks to be treated as unstructured, e.g. {0, 2, 5}"); - std::string coarseAmgXmlFile = ""; clp.setOption("coarseAmgXml", &coarseAmgXmlFile, "Read parameters for AMG as coarse level solve from this xml file."); - std::string coarseSmootherXMLFile = ""; clp.setOption("coarseSmootherXML", &coarseSmootherXMLFile, "File containing the parameters to use with the coarse level smoother."); - std::string equilibrate = "no" ; clp.setOption("equilibrate", &equilibrate, "equilibrate the system (no | diag | 1-norm)"); + Galeri::Xpetra::Parameters galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra + + std::string xmlFileName = ""; + clp.setOption("xml", &xmlFileName, "read parameters from an xml file"); + std::string yamlFileName = ""; + clp.setOption("yaml", &yamlFileName, "read parameters from a yaml file"); + std::string solverType = "region"; + clp.setOption("solverType", &solverType, "iterative solver to be used: (region | Richardson | CG)"); + std::string convergenceLog = "residual_norm.txt"; + clp.setOption("convergence-log", &convergenceLog, "file in which the convergence history of the linear solver is stored"); + int maxIts = 200; + clp.setOption("its", &maxIts, "maximum number of solver iterations"); + double tol = 1e-12; + clp.setOption("tol", &tol, "solver convergence tolerance"); + bool scaleResidualHist = true; + clp.setOption("scale", "noscale", &scaleResidualHist, "scaled Krylov residual history"); + bool serialRandom = false; + clp.setOption("use-serial-random", "no-use-serial-random", &serialRandom, "generate the random vector serially and then broadcast it"); + std::string cycleType = "V"; + clp.setOption("cycleType", &cycleType, "{Multigrid cycle type. Possible values: V, W."); + std::string smootherType = "Jacobi"; + clp.setOption("smootherType", &smootherType, "smoother to be used: (None | Jacobi | Gauss | Chebyshev)"); + int smootherIts = 2; + clp.setOption("smootherIts", &smootherIts, "number of smoother iterations"); + double smootherDamp = 0.67; + clp.setOption("smootherDamp", &smootherDamp, "damping parameter for the level smoother"); + double smootherChebyEigRatio = 2.0; + clp.setOption("smootherChebyEigRatio", &smootherChebyEigRatio, "eigenvalue ratio max/min used to approximate the smallest eigenvalue for Chebyshev relaxation"); + double smootherChebyBoostFactor = 1.1; + clp.setOption("smootherChebyBoostFactor", &smootherChebyBoostFactor, "boost factor for Chebyshev smoother"); + bool keepCoarseCoords = false; + clp.setOption("keep-coarse-coords", "no-keep-coarse-coords", &keepCoarseCoords, "keep coordinates on coarsest level of region hierarchy"); + bool coarseSolverRebalance = false; + clp.setOption("rebalance-coarse", "no-rebalance-coarse", &coarseSolverRebalance, "rebalance before AMG coarse grid solve"); + int rebalanceNumPartitions = -1; + clp.setOption("numPartitions", &rebalanceNumPartitions, "number of partitions for rebalancing the coarse grid AMG solve"); + std::string coarseSolverType = "direct"; + clp.setOption("coarseSolverType", &coarseSolverType, "Type of solver for (composite) coarse level operator (smoother | direct | amg)"); + std::string unstructured = "{}"; + clp.setOption("unstructured", &unstructured, "List of ranks to be treated as unstructured, e.g. {0, 2, 5}"); + std::string coarseAmgXmlFile = ""; + clp.setOption("coarseAmgXml", &coarseAmgXmlFile, "Read parameters for AMG as coarse level solve from this xml file."); + std::string coarseSmootherXMLFile = ""; + clp.setOption("coarseSmootherXML", &coarseSmootherXMLFile, "File containing the parameters to use with the coarse level smoother."); + std::string equilibrate = "no"; + clp.setOption("equilibrate", &equilibrate, "equilibrate the system (no | diag | 1-norm)"); #ifdef HAVE_MUELU_CUDA - bool profileSetup = false; clp.setOption("cuda-profile-setup", "no-cuda-profile-setup", &profileSetup, "enable CUDA profiling for setup"); - bool profileSolve = false; clp.setOption("cuda-profile-solve", "no-cuda-profile-solve", &profileSolve, "enable CUDA profiling for solve"); + bool profileSetup = false; + clp.setOption("cuda-profile-setup", "no-cuda-profile-setup", &profileSetup, "enable CUDA profiling for setup"); + bool profileSolve = false; + clp.setOption("cuda-profile-solve", "no-cuda-profile-solve", &profileSolve, "enable CUDA profiling for solve"); #endif - int cacheSize = 0; clp.setOption("cachesize", &cacheSize, "cache size (in KB)"); - bool useStackedTimer = false; clp.setOption("stacked-timer","no-stacked-timer", &useStackedTimer, "use stacked timer"); - bool showTimerSummary = true; clp.setOption("show-timer-summary", "no-show-timer-summary", &showTimerSummary, "Switch on/off the timer summary at the end of the run."); + int cacheSize = 0; + clp.setOption("cachesize", &cacheSize, "cache size (in KB)"); + bool useStackedTimer = false; + clp.setOption("stacked-timer", "no-stacked-timer", &useStackedTimer, "use stacked timer"); + bool showTimerSummary = true; + clp.setOption("show-timer-summary", "no-show-timer-summary", &showTimerSummary, "Switch on/off the timer summary at the end of the run."); clp.recogniseAllOptions(true); switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } TEUCHOS_TEST_FOR_EXCEPTION(xmlFileName != "" && yamlFileName != "", std::runtime_error, @@ -202,7 +227,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar // Instead of checking each time for rank, create a rank 0 stream RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& out = *fancy; + Teuchos::FancyOStream &out = *fancy; out.setOutputToRootOnly(0); ParameterList paramList; @@ -218,18 +243,20 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, Teuchos::Ptr(¶mList), *comm); } - Array > smootherParams(1); //TODO: this is good, resized to numlevel + Array > smootherParams(1); // TODO: this is good, resized to numlevel smootherParams[0] = rcp(new Teuchos::ParameterList()); - smootherParams[0]->set("smoother: type", smootherType); - smootherParams[0]->set("smoother: sweeps", smootherIts); + smootherParams[0]->set("smoother: type", smootherType); + smootherParams[0]->set("smoother: sweeps", smootherIts); smootherParams[0]->set("smoother: damping", smootherDamp); smootherParams[0]->set("smoother: Chebyshev eigRatio", smootherChebyEigRatio); smootherParams[0]->set("smoother: Chebyshev boost factor", smootherChebyBoostFactor); - bool useUnstructured = false; + bool useUnstructured = false; Array unstructuredRanks = Teuchos::fromStringToArray(unstructured); - for(int idx = 0; idx < unstructuredRanks.size(); ++idx) { - if(unstructuredRanks[idx] == myRank) {useUnstructured = true;} + for (int idx = 0; idx < unstructuredRanks.size(); ++idx) { + if (unstructuredRanks[idx] == myRank) { + useUnstructured = true; + } } // Retrieve matrix parameters (they may have been changed on the command line) @@ -242,27 +269,26 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar std::ostringstream galeriStream; #ifdef HAVE_MUELU_OPENMP std::string node_name = Node::name(); - if(!comm->getRank() && !node_name.compare("OpenMP/Wrapper")) - galeriStream<<"OpenMP Max Threads = "<getRank() && !node_name.compare("OpenMP/Wrapper")) + galeriStream << "OpenMP Max Threads = " << omp_get_max_threads() << std::endl; #endif - comm->barrier(); Teuchos::RCP stacked_timer; - if(useStackedTimer) + if (useStackedTimer) stacked_timer = rcp(new Teuchos::StackedTimer("MueLu_Driver")); Teuchos::TimeMonitor::setStackedTimer(stacked_timer); RCP globalTimeMonitor = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: S - Global Time"))); RCP tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 1 - Build Composite Matrix"))); - RCP A; - RCP nodeMap, dofMap; + RCP nodeMap, dofMap; RCP X, B; - RCP nullspace; + RCP nullspace; RCP coordinates; - galeriStream << "========================================================\n" << xpetraParameters << galeriParameters; + galeriStream << "========================================================\n" + << xpetraParameters << galeriParameters; // Galeri will attempt to create a square-as-possible distribution of subdomains di, e.g., // d1 d2 d3 @@ -274,8 +300,8 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar // size. For example, np=14 will give a 7-by-2 distribution. // If you don't want Galeri to do this, specify mx or my on the galeriList. std::string matrixType = galeriParameters.GetMatrixType(); - int numDimensions = 0; - int numDofsPerNode = 0; + int numDimensions = 0; + int numDofsPerNode = 0; Teuchos::Array procsPerDim(3); Teuchos::Array gNodesPerDim(3); Teuchos::Array lNodesPerDim(3); @@ -285,19 +311,19 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar // At the moment, however, things are fragile as we hope that the Problem uses same map and coordinates inside if (matrixType == "Laplace1D") { numDimensions = 1; - nodeMap = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian1D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", nodeMap, galeriList); + nodeMap = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian1D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", nodeMap, galeriList); } else if (matrixType == "Laplace2D" || matrixType == "Star2D" || matrixType == "BigStar2D" || matrixType == "Elasticity2D") { numDimensions = 2; - nodeMap = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian2D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", nodeMap, galeriList); + nodeMap = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian2D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", nodeMap, galeriList); } else if (matrixType == "Laplace3D" || matrixType == "Brick3D" || matrixType == "Elasticity3D") { numDimensions = 3; - nodeMap = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian3D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", nodeMap, galeriList); + nodeMap = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian3D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", nodeMap, galeriList); } // Expand map to do multiple DOF per node for block problems @@ -308,7 +334,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar } else { numDofsPerNode = 1; } - dofMap = Xpetra::MapFactory::Build(nodeMap, numDofsPerNode); + dofMap = Xpetra::MapFactory::Build(nodeMap, numDofsPerNode); galeriStream << "Processor subdomains in x direction: " << galeriList.get("mx") << std::endl << "Processor subdomains in y direction: " << galeriList.get("my") << std::endl @@ -317,15 +343,15 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar if (matrixType == "Elasticity2D" || matrixType == "Elasticity3D") { // Our default test case for elasticity: all boundaries of a square/cube have Neumann b.c. except left which has Dirichlet - galeriList.set("right boundary" , "Neumann"); + galeriList.set("right boundary", "Neumann"); galeriList.set("bottom boundary", "Neumann"); - galeriList.set("top boundary" , "Neumann"); - galeriList.set("front boundary" , "Neumann"); - galeriList.set("back boundary" , "Neumann"); + galeriList.set("top boundary", "Neumann"); + galeriList.set("front boundary", "Neumann"); + galeriList.set("back boundary", "Neumann"); } - RCP > Pr = - Galeri::Xpetra::BuildProblem(galeriParameters.GetMatrixType(), dofMap, galeriList); + RCP > Pr = + Galeri::Xpetra::BuildProblem(galeriParameters.GetMatrixType(), dofMap, galeriList); A = Pr->BuildMatrix(); A->SetFixedBlockSize(numDofsPerNode); @@ -335,40 +361,40 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar X = VectorFactory::Build(dofMap); B = VectorFactory::Build(dofMap); - if(serialRandom) { - //Build the seed on rank zero and broadcast it. + if (serialRandom) { + // Build the seed on rank zero and broadcast it. size_t localNumElements = 0; - if(comm->getRank() == 0) { + if (comm->getRank() == 0) { localNumElements = static_cast(dofMap->getGlobalNumElements()); } - RCP serialMap = MapFactory::Build(dofMap->lib(), - dofMap->getGlobalNumElements(), - localNumElements, - 0, - comm); + RCP serialMap = MapFactory::Build(dofMap->lib(), + dofMap->getGlobalNumElements(), + localNumElements, + 0, + comm); RCP Xserial = VectorFactory::Build(serialMap); Xserial->setSeed(251743369); - Xserial->randomize(true);// using xpetra's randomize. Otherwise random vector is only consistent for first 128 entries + Xserial->randomize(true); // using xpetra's randomize. Otherwise random vector is only consistent for first 128 entries RCP randomnessImporter = ImportFactory::Build(serialMap, dofMap); X->doImport(*Xserial, *randomnessImporter, Xpetra::INSERT); } else { // we set seed for reproducibility Utilities::SetRandomSeed(*comm); - X->randomize(true);// using xpetra's randomize. Otherwise random vector is only consistent for first 128 entries + X->randomize(true); // using xpetra's randomize. Otherwise random vector is only consistent for first 128 entries } A->apply(*X, *B, Teuchos::NO_TRANS, one, zero); Teuchos::Array norms(1); B->norm2(norms); - B->scale(one/norms[0]); + B->scale(one / norms[0]); galeriStream << "Galeri complete.\n========================================================" << std::endl; #ifdef MATLAB_COMPARE - Xpetra::IO::Write("Ax.mm",*B); - Xpetra::IO::Write("A.mm",*A); + Xpetra::IO::Write("Ax.mm", *B); + Xpetra::IO::Write("A.mm", *A); B->putScalar(zero); - Xpetra::IO::Write("rhs.mm",*B); - Xpetra::IO::Write("x.mm",*X); + Xpetra::IO::Write("rhs.mm", *B); + Xpetra::IO::Write("x.mm", *X); #endif out << galeriStream.str(); @@ -380,14 +406,14 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar // Set aggregation type for each region std::string aggregationRegionType; RCP interfaceParams = rcp(new ParameterList()); - if(useUnstructured) { + if (useUnstructured) { aggregationRegionType = "uncoupled"; } else { aggregationRegionType = "structured"; } // Loading geometric info from galeri - if(numDimensions == 1) { + if (numDimensions == 1) { gNodesPerDim[0] = galeriList.get("nx"); gNodesPerDim[1] = 1; gNodesPerDim[2] = 1; @@ -399,7 +425,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar procsPerDim[0] = galeriList.get("mx"); procsPerDim[1] = 1; procsPerDim[2] = 1; - } else if(numDimensions == 2) { + } else if (numDimensions == 2) { gNodesPerDim[0] = galeriList.get("nx"); gNodesPerDim[1] = galeriList.get("ny"); gNodesPerDim[2] = 1; @@ -411,7 +437,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar procsPerDim[0] = galeriList.get("mx"); procsPerDim[1] = galeriList.get("my"); procsPerDim[2] = 1; - } else if(numDimensions == 3) { + } else if (numDimensions == 3) { gNodesPerDim[0] = galeriList.get("nx"); gNodesPerDim[1] = galeriList.get("ny"); gNodesPerDim[2] = galeriList.get("nz"); @@ -446,17 +472,17 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar // First we count how many nodes the region needs to send and receive // and allocate arrays accordingly Array boundaryConditions; - int maxRegPerGID = 0; - int numInterfaces = 0; + int maxRegPerGID = 0; + int numInterfaces = 0; LO numLocalRegionNodes = 0; - Array sendGIDs; + Array sendGIDs; Array sendPIDs; - Array rNodesPerDim(3); - Array compositeToRegionLIDs(nodeMap->getLocalNumElements()*numDofsPerNode); - Array quasiRegionGIDs; - Array quasiRegionCoordGIDs; - Array interfaceGIDs; - Array interfaceLIDsData; + Array rNodesPerDim(3); + Array compositeToRegionLIDs(nodeMap->getLocalNumElements() * numDofsPerNode); + Array quasiRegionGIDs; + Array quasiRegionCoordGIDs; + Array interfaceGIDs; + Array interfaceLIDsData; createRegionData(numDimensions, useUnstructured, numDofsPerNode, gNodesPerDim(), lNodesPerDim(), procsPerDim(), nodeMap, dofMap, @@ -466,7 +492,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar interfaceGIDs, interfaceLIDsData); // std::cout << "p=" << myRank << " | numSend=" << numSend << std::endl; - // << ", numReceive=" << numReceive << std::endl; + // << ", numReceive=" << numReceive << std::endl; // std::cout << "p=" << myRank << " | receiveGIDs: " << receiveGIDs << std::endl; // std::cout << "p=" << myRank << " | receivePIDs: " << receivePIDs << std::endl; // std::cout << "p=" << myRank << " | sendGIDs: " << sendGIDs << std::endl; @@ -475,8 +501,8 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar // Second we actually fill the send and receive arrays with appropriate data // which will allow us to compute the region and composite maps. // Now we can construct a list of GIDs that corresponds to rowMap - Array interfacesDimensions, interfacesLIDs; - if(useUnstructured) { + Array interfacesDimensions, interfacesLIDs; + if (useUnstructured) { findInterface(numDimensions, rNodesPerDim, boundaryConditions, interfacesDimensions, interfacesLIDs); @@ -488,8 +514,8 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar // << "p=" << myRank << " | interfacesLIDs: " << interfacesLIDs << std::endl; } - interfaceParams->set >("interfaces: nodes per dimensions", interfacesDimensions); // nodesPerDimensions); - interfaceParams->set >("interfaces: interface nodes", interfacesLIDs); // interfaceLIDs); + interfaceParams->set >("interfaces: nodes per dimensions", interfacesDimensions); // nodesPerDimensions); + interfaceParams->set >("interfaces: interface nodes", interfacesLIDs); // interfaceLIDs); // std::cout << "p=" << myRank << " | compositeToRegionLIDs: " << compositeToRegionLIDs << std::endl; // std::cout << "p=" << myRank << " | quasiRegionGIDs: " << quasiRegionGIDs << std::endl; @@ -509,34 +535,34 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar RCP tmLocal = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 3.1 - Build Region Maps"))); - Teuchos::RCP > rowMap, colMap; - Teuchos::RCP > revisedRowMap, revisedColMap; - rowMap = Xpetra::MapFactory::Build(dofMap->lib(), + Teuchos::RCP > rowMap, colMap; + Teuchos::RCP > revisedRowMap, revisedColMap; + rowMap = Xpetra::MapFactory::Build(dofMap->lib(), Teuchos::OrdinalTraits::invalid(), quasiRegionGIDs(), dofMap->getIndexBase(), dofMap->getComm()); - colMap = rowMap; - revisedRowMap = Xpetra::MapFactory::Build(dofMap->lib(), - Teuchos::OrdinalTraits::invalid(), - numLocalRegionNodes*numDofsPerNode, - dofMap->getIndexBase(), - dofMap->getComm()); + colMap = rowMap; + revisedRowMap = Xpetra::MapFactory::Build(dofMap->lib(), + Teuchos::OrdinalTraits::invalid(), + numLocalRegionNodes * numDofsPerNode, + dofMap->getIndexBase(), + dofMap->getComm()); revisedColMap = revisedRowMap; // Build objects needed to construct the region coordinates - Teuchos::RCP > quasiRegCoordMap = Xpetra::MapFactory:: - Build(nodeMap->lib(), - Teuchos::OrdinalTraits::invalid(), - quasiRegionCoordGIDs(), - nodeMap->getIndexBase(), - nodeMap->getComm()); - Teuchos::RCP > regCoordMap = Xpetra::MapFactory:: - Build(nodeMap->lib(), - Teuchos::OrdinalTraits::invalid(), - numLocalRegionNodes, - nodeMap->getIndexBase(), - nodeMap->getComm()); + Teuchos::RCP > quasiRegCoordMap = Xpetra::MapFactory:: + Build(nodeMap->lib(), + Teuchos::OrdinalTraits::invalid(), + quasiRegionCoordGIDs(), + nodeMap->getIndexBase(), + nodeMap->getComm()); + Teuchos::RCP > regCoordMap = Xpetra::MapFactory:: + Build(nodeMap->lib(), + Teuchos::OrdinalTraits::invalid(), + numLocalRegionNodes, + nodeMap->getIndexBase(), + nodeMap->getComm()); comm->barrier(); tmLocal = Teuchos::null; @@ -545,15 +571,15 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar // Setup importers RCP rowImport; RCP colImport; - rowImport = ImportFactory::Build(dofMap, rowMap); - colImport = ImportFactory::Build(dofMap, colMap); + rowImport = ImportFactory::Build(dofMap, rowMap); + colImport = ImportFactory::Build(dofMap, colMap); RCP coordImporter = ImportFactory::Build(nodeMap, quasiRegCoordMap); comm->barrier(); tmLocal = Teuchos::null; tmLocal = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 3.3 - Import ghost GIDs"))); - Array interfaceCompositeGIDs, interfaceRegionGIDs; + Array interfaceCompositeGIDs, interfaceRegionGIDs; ExtractListOfInterfaceRegionGIDs(revisedRowMap, interfaceLIDsData, interfaceRegionGIDs); RCP > regionsPerGIDWithGhosts; @@ -589,7 +615,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar rowImport, quasiRegionMats, regionMats); // If we don't need the composite operator on the fine level anymore, free it! - if(solverType == "region") A = Teuchos::null; + if (solverType == "region") A = Teuchos::null; comm->barrier(); tmLocal = Teuchos::null; @@ -614,12 +640,12 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar regionNullspace->replaceMap(revisedRowMap); // create region coordinates vector - regionCoordinates = Xpetra::MultiVectorFactory::Build(quasiRegCoordMap, + regionCoordinates = Xpetra::MultiVectorFactory::Build(quasiRegCoordMap, coordinates->getNumVectors()); regionCoordinates->doImport(*coordinates, *coordImporter, Xpetra::INSERT); regionCoordinates->replaceMap(regCoordMap); - using Tpetra_CrsMatrix = Tpetra::CrsMatrix; + using Tpetra_CrsMatrix = Tpetra::CrsMatrix; using Tpetra_MultiVector = Tpetra::MultiVector; /* Stuff for multi-level algorithm @@ -639,46 +665,44 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar coarseSolverData->set("smoother xml file", coarseSmootherXMLFile); RCP hierarchyData = rcp(new ParameterList()); - // Create MueLu Hierarchy Initially... // Read MueLu parameter list form xml file RCP mueluParams = Teuchos::rcp(new ParameterList()); Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, mueluParams.ptr(), *dofMap->getComm()); // Insert region-specific data into parameter list - const std::string userName = "user data"; - Teuchos::ParameterList& userParamList = mueluParams->sublist(userName); - userParamList.set ("int numDimensions", numDimensions); - userParamList.set > ("Array lNodesPerDim", regionNodesPerDim); + const std::string userName = "user data"; + Teuchos::ParameterList &userParamList = mueluParams->sublist(userName); + userParamList.set("int numDimensions", numDimensions); + userParamList.set >("Array lNodesPerDim", regionNodesPerDim); userParamList.set("string aggregationRegionType", aggregationRegionType); - userParamList.set > ("Array nodeOnInterface", interfaceParams->get >("interfaces: interface nodes")); - userParamList.set > ("Array interfacesDimensions", interfaceParams->get >("interfaces: nodes per dimensions")); - if(Teuchos::nonnull(regionCoordinates)) { + userParamList.set >("Array nodeOnInterface", interfaceParams->get >("interfaces: interface nodes")); + userParamList.set >("Array interfacesDimensions", interfaceParams->get >("interfaces: nodes per dimensions")); + if (Teuchos::nonnull(regionCoordinates)) { userParamList.set("Coordinates", regionCoordinates); } - if(Teuchos::nonnull(regionNullspace)) { + if (Teuchos::nonnull(regionNullspace)) { userParamList.set("Nullspace", regionNullspace); } tmLocal = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("CreateXpetraPreconditioner: Hierarchy"))); // Create multigrid hierarchy part 1 - RCP regHierarchy = MueLu::CreateXpetraPreconditioner(regionMats, *mueluParams); + RCP regHierarchy = MueLu::CreateXpetraPreconditioner(regionMats, *mueluParams); { RCP level = regHierarchy->GetLevel(0); - level->Set > >("rowImport",rowImport); - level->Set > ("compositeToRegionLIDs", compositeToRegionLIDs() ); + level->Set > >("rowImport", rowImport); + level->Set >("compositeToRegionLIDs", compositeToRegionLIDs()); level->Set > >("interfaceGIDs", interfaceGIDsMV); level->Set > >("regionsPerGIDWithGhosts", regionsPerGIDWithGhosts); level->Set >("regionMatVecLIDs", regionMatVecLIDs); level->Set > >("regionInterfaceImporter", regionInterfaceImporter); - level->print( std::cout, MueLu::Extreme ); + level->print(std::cout, MueLu::Extreme); } tmLocal = Teuchos::null; - // Create multigrid hierarchy part 2 createRegionHierarchy(numDimensions, regionNodesPerDim, @@ -693,8 +717,6 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar hierarchyData->print(); - - comm->barrier(); tm = Teuchos::null; @@ -702,10 +724,10 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar const int numLevels = regHierarchy->GetNumLevels(); // Set data for fast MatVec - for(LO levelIdx = 0; levelIdx < numLevels; ++levelIdx) { - RCP level = regHierarchy->GetLevel(levelIdx); + for (LO levelIdx = 0; levelIdx < numLevels; ++levelIdx) { + RCP level = regHierarchy->GetLevel(levelIdx); RCP > regionInterfaceImport = level->Get > >("regionInterfaceImporter"); - Teuchos::ArrayRCP regionMatVecLIDs1 = level->Get >("regionMatVecLIDs"); + Teuchos::ArrayRCP regionMatVecLIDs1 = level->Get >("regionMatVecLIDs"); smootherParams[levelIdx]->set("Fast MatVec: interface LIDs", regionMatVecLIDs1); smootherParams[levelIdx]->set("Fast MatVec: interface importer", @@ -722,68 +744,67 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 5 - Solve with V-cycle"))); #ifdef DUMP_LOCALX_AND_A - FILE *fp; - char str[80]; - sprintf(str,"theMatrix.%d",myRank); - fp = fopen(str,"w"); - fprintf(fp, "%%%%MatrixMarket matrix coordinate real general\n"); - LO numNzs = 0; - for (size_t kkk = 0; kkk < regionMats->getLocalNumRows(); kkk++) { - ArrayView AAcols; - ArrayView AAvals; - regionMats->getLocalRowView(kkk, AAcols, AAvals); - const int *Acols = AAcols.getRawPtr(); - const SC *Avals = AAvals.getRawPtr(); - numNzs += AAvals.size(); - } - fprintf(fp, "%d %d %d\n",regionMats->getLocalNumRows(),regionMats->getLocalNumRows(),numNzs); - - for (size_t kkk = 0; kkk < regionMats->getLocalNumRows(); kkk++) { - ArrayView AAcols; - ArrayView AAvals; - regionMats->getLocalRowView(kkk, AAcols, AAvals); - const int *Acols = AAcols.getRawPtr(); - const SC *Avals = AAvals.getRawPtr(); - LO RowLeng = AAvals.size(); - for (LO kk = 0; kk < RowLeng; kk++) { - fprintf(fp, "%d %d %22.16e\n",kkk+1,Acols[kk]+1,Avals[kk]); - } + FILE *fp; + char str[80]; + sprintf(str, "theMatrix.%d", myRank); + fp = fopen(str, "w"); + fprintf(fp, "%%%%MatrixMarket matrix coordinate real general\n"); + LO numNzs = 0; + for (size_t kkk = 0; kkk < regionMats->getLocalNumRows(); kkk++) { + ArrayView AAcols; + ArrayView AAvals; + regionMats->getLocalRowView(kkk, AAcols, AAvals); + const int *Acols = AAcols.getRawPtr(); + const SC *Avals = AAvals.getRawPtr(); + numNzs += AAvals.size(); + } + fprintf(fp, "%d %d %d\n", regionMats->getLocalNumRows(), regionMats->getLocalNumRows(), numNzs); + + for (size_t kkk = 0; kkk < regionMats->getLocalNumRows(); kkk++) { + ArrayView AAcols; + ArrayView AAvals; + regionMats->getLocalRowView(kkk, AAcols, AAvals); + const int *Acols = AAcols.getRawPtr(); + const SC *Avals = AAvals.getRawPtr(); + LO RowLeng = AAvals.size(); + for (LO kk = 0; kk < RowLeng; kk++) { + fprintf(fp, "%d %d %22.16e\n", kkk + 1, Acols[kk] + 1, Avals[kk]); } - fclose(fp); - // sprintf(str,"theX.%d",myRank); - // fp = fopen(str,"w"); - // ArrayRCP lX= regX->getDataNonConst(0); - // for (size_t kkk = 0; kkk < regionMats->getLocalNumRows(); kkk++) fprintf(fp, "%22.16e\n",lX[kkk]); - // fclose(fp); + } + fclose(fp); + // sprintf(str,"theX.%d",myRank); + // fp = fopen(str,"w"); + // ArrayRCP lX= regX->getDataNonConst(0); + // for (size_t kkk = 0; kkk < regionMats->getLocalNumRows(); kkk++) fprintf(fp, "%22.16e\n",lX[kkk]); + // fclose(fp); #endif - if(solverType == "region") { + if (solverType == "region") { solveRegionProblemRichardson(tol, scaleResidualHist, maxIts, - cycleType, convergenceLog, - coarseSolverData, smootherParams, hierarchyData, - regHierarchy, X, B); - } else if(solverType == "Richardson") { + cycleType, convergenceLog, + coarseSolverData, smootherParams, hierarchyData, + regHierarchy, X, B); + } else if (solverType == "Richardson") { solveCompositeProblemRichardson(tol, scaleResidualHist, maxIts, - cycleType, convergenceLog, - coarseSolverData, smootherParams, hierarchyData, - regHierarchy, A, X, B); - } else if(solverType == "CG") { + cycleType, convergenceLog, + coarseSolverData, smootherParams, hierarchyData, + regHierarchy, A, X, B); + } else if (solverType == "CG") { solveCompositeProblemPCG(tol, scaleResidualHist, maxIts, - cycleType, convergenceLog, - coarseSolverData, smootherParams, hierarchyData, - regHierarchy, A, X, B); + cycleType, convergenceLog, + coarseSolverData, smootherParams, hierarchyData, + regHierarchy, A, X, B); } else { - throw std::runtime_error("Unknown solverType: "+solverType); + throw std::runtime_error("Unknown solverType: " + solverType); } comm->barrier(); - tm = Teuchos::null; + tm = Teuchos::null; globalTimeMonitor = Teuchos::null; - if (showTimerSummary) - { + if (showTimerSummary) { RCP reportParams = rcp(new ParameterList); - const std::string filter = ""; + const std::string filter = ""; if (useStackedTimer) { Teuchos::StackedTimer::OutputOptions options; options.output_fraction = options.output_histogram = options.output_minmax = true; @@ -805,5 +826,5 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar #include "MueLu_Test_ETI.hpp" int main(int argc, char *argv[]) { - return Automatic_Test_ETI(argc,argv); + return Automatic_Test_ETI(argc, argv); } diff --git a/packages/muelu/research/semicoarsening/driver.cpp b/packages/muelu/research/semicoarsening/driver.cpp index b6c5687d748d..1da5a2793418 100644 --- a/packages/muelu/research/semicoarsening/driver.cpp +++ b/packages/muelu/research/semicoarsening/driver.cpp @@ -64,10 +64,10 @@ this directory. // Define default data types //#include -//typedef double Scalar; -//typedef int LocalOrdinal; -//typedef int GlobalOrdinal; -//typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType Node; +// typedef double Scalar; +// typedef int LocalOrdinal; +// typedef int GlobalOrdinal; +// typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType Node; #include #include @@ -95,7 +95,7 @@ this directory. #include #endif -template +template int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int argc, char *argv[]) { // Most MueLu and Xpetra classes are templated on some or all of the // following template types: Scalar, LocalOrdinal, GlobalOrdinal, @@ -122,9 +122,9 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // you don't have to write the namespace along with the class or // object name. This is especially helpful with commonly used things // like std::endl or Teuchos::RCP. + using Teuchos::ParameterList; using Teuchos::RCP; using Teuchos::rcp; - using Teuchos::ParameterList; // Start up MPI, if using MPI. Trilinos doesn't have to be built // with MPI; it's called a "serial" build if you build without MPI. @@ -158,7 +158,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Make an output stream (for verbose output) that only prints on // Process 0 of the communicator. Teuchos::oblackholestream blackHole; - std::ostream& out = (myRank == 0) ? std::cout : blackHole; + std::ostream &out = (myRank == 0) ? std::cout : blackHole; // Teuchos provides an interface to get arguments from the command // line. The first argument indicates that we don't want any @@ -171,36 +171,43 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // - Xpetra parameters: Allow switching between Epetra and Tpetra. // - Driver parameters: These are specified below. GO nx = 6, ny = 5, nz = 163; - Galeri::Xpetra::Parameters galeriParameters(clp, nx, ny, nz, "Laplace3D"); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra - - std::string xmlFileName = "driver1.xml"; clp.setOption("xml", &xmlFileName, "read parameters from a file [default = 'scalingTest.xml']"); - - std::string solveType = "cg"; clp.setOption("solver", &solveType, "solve type: (cg | gmres)"); - double tol = 1e-12; clp.setOption("tol", &tol, "solver convergence tolerance"); - int maxIts = 200; clp.setOption("its", &maxIts, "maximum number of solver iterations"); - - std::string mapFile; clp.setOption("map", &mapFile, "map data file"); - std::string matrixFile; clp.setOption("matrix", &matrixFile, "matrix data file"); - std::string coordFile; clp.setOption("coords", &coordFile, "coordinates data file"); + Galeri::Xpetra::Parameters galeriParameters(clp, nx, ny, nz, "Laplace3D"); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra + + std::string xmlFileName = "driver1.xml"; + clp.setOption("xml", &xmlFileName, "read parameters from a file [default = 'scalingTest.xml']"); + + std::string solveType = "cg"; + clp.setOption("solver", &solveType, "solve type: (cg | gmres)"); + double tol = 1e-12; + clp.setOption("tol", &tol, "solver convergence tolerance"); + int maxIts = 200; + clp.setOption("its", &maxIts, "maximum number of solver iterations"); + + std::string mapFile; + clp.setOption("map", &mapFile, "map data file"); + std::string matrixFile; + clp.setOption("matrix", &matrixFile, "matrix data file"); + std::string coordFile; + clp.setOption("coords", &coordFile, "coordinates data file"); // Command line processor parsing stage switch (clp.parse(argc, argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } // Retrieve matrix parameters (they may have been changed on the // command line). ParameterList galeriList = galeriParameters.GetParameterList(); -//galeriList.set("mx",6); galeriList.set("my",5); galeriList.set("mz",1); + // galeriList.set("mx",6); galeriList.set("my",5); galeriList.set("mz",1); // Construct the problem data. Typically, we construct the matrix, // coordinates, and nullspace, though only the matrix is mandatory. - RCP A; - RCP map; + RCP A; + RCP map; RCP coordinates; if (matrixFile.empty()) { // Use Galeri to construct the data @@ -208,7 +215,8 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Galeri provides several pre-defined problem types, including // some stencil based ones (like Laplace matrices in 2D and 3D), // and some others (like elasticity in 2D and 3D) - out << "========================================================\n" << xpetraParameters << galeriParameters; + out << "========================================================\n" + << xpetraParameters << galeriParameters; // Galeri will attempt to create a square-as-possible distribution of // subdomains di, e.g., @@ -229,16 +237,16 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Create map (Map) and coordinates (MultiVector). Xpetra's Map // and MultiVector imitate Tpetra's interface. if (matrixType == "Laplace1D") { - map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian1D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", map, galeriList); + map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian1D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", map, galeriList); } else if (matrixType == "Laplace2D") { - map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian2D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", map, galeriList); + map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian2D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", map, galeriList); } else if (matrixType == "Laplace3D") { - map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian3D", comm, galeriList); - coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", map, galeriList); + map = Galeri::Xpetra::CreateMap(xpetraParameters.GetLib(), "Cartesian3D", comm, galeriList); + coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", map, galeriList); } out << "Processor subdomains in x direction: " << galeriList.get("mx") << std::endl @@ -248,8 +256,8 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Construct the matrix based on the problem name and provided // parameters. - RCP > Pr = - Galeri::Xpetra::BuildProblem(galeriParameters.GetMatrixType(), map, galeriList); + RCP > Pr = + Galeri::Xpetra::BuildProblem(galeriParameters.GetMatrixType(), map, galeriList); A = Pr->BuildMatrix(); } else { @@ -260,12 +268,12 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // (though you may avoid that in a serial run), a matrix (in a // MatrixMarket format), and a file with coordinates. if (!mapFile.empty()) - map = Xpetra::IO::ReadMap(mapFile, xpetraParameters.GetLib(), comm); + map = Xpetra::IO::ReadMap(mapFile, xpetraParameters.GetLib(), comm); - A = Xpetra::IO::Read(matrixFile, map); + A = Xpetra::IO::Read(matrixFile, map); if (!coordFile.empty()) - coordinates = Xpetra::IO::ReadMultiVector(coordFile, map); + coordinates = Xpetra::IO::ReadMultiVector(coordFile, map); } // For scalar equations, we assume that the constant vector is a @@ -276,7 +284,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg nullspace->putScalar(one); out << "Galeri complete.\n========================================================" << std::endl; -std::cout << coordinates << std::endl; + std::cout << coordinates << std::endl; // Hierarchy is a key concept in MueLu. Once set up, it has all the // data required to apply the preconditioner. MueLu provides @@ -302,7 +310,7 @@ std::cout << coordinates << std::endl; // subsequent coarser levels have increasing indices. This way, if // we have L levels, level 0 corresponds to the finest level, and // level L-1 is the coarsest level. - RCP L = H->GetLevel(0); + RCP L = H->GetLevel(0); // Setting the finest level matrix is mandatory in MueLu. However, // setting the nullspace and coordinates is optional. If nullspace @@ -310,23 +318,23 @@ std::cout << coordinates << std::endl; // vector. Coordinates are needed only in two cases: if one uses // distance Laplacian filtering, or one uses repartitioning (which // is based on geometric partitioning). - L->Set("A", A); + L->Set("A", A); L->Set("Nullspace", nullspace); -/* - if (!coordinates.is_null()) - L->Set("Coordinates", coordinates); -*/ + /* + if (!coordinates.is_null()) + L->Set("Coordinates", coordinates); + */ -Teuchos::ArrayRCP SemiCoarsenInfo = Teuchos::arcp(3); -SemiCoarsenInfo[NUM_ZPTS] = nz; -SemiCoarsenInfo[ORIENTATION] = VERTICAL; -//L->Set("SemiCoarsenInfo",SemiCoarsenInfo, MueLu::NoFactory::get() ); -L->Set("SemiCoarsenInfo",SemiCoarsenInfo); + Teuchos::ArrayRCP SemiCoarsenInfo = Teuchos::arcp(3); + SemiCoarsenInfo[NUM_ZPTS] = nz; + SemiCoarsenInfo[ORIENTATION] = VERTICAL; + // L->Set("SemiCoarsenInfo",SemiCoarsenInfo, MueLu::NoFactory::get() ); + L->Set("SemiCoarsenInfo", SemiCoarsenInfo); -printf("before level print\n"); -L->print(std::cout,Teuchos::VERB_EXTREME); -printf("after level print\n"); + printf("before level print\n"); + L->print(std::cout, Teuchos::VERB_EXTREME); + printf("after level print\n"); // Now that we have set the mandatory data, we can construct the // preconditioner. The result of this is a fully constructed @@ -357,11 +365,11 @@ printf("after level print\n"); Teuchos::Array norms(1); B->norm2(norms); - B->scale(one/norms[0]); + B->scale(one / norms[0]); X->putScalar(zero); } #ifdef HAVE_MUELU_BELOS - typedef MultiVector MV; + typedef MultiVector MV; typedef Belos::OperatorT OP; // Internally, Belos uses its own types. We use special wrappers to @@ -369,18 +377,18 @@ printf("after level print\n"); // one of many different ways one could choose to wrap MueLu and // Xpetra objects in order to get them to work with Belos. RCP belosOp = rcp(new Belos::XpetraOp(A)); - RCP belosPrec = rcp(new Belos::MueLuOp (H)); + RCP belosPrec = rcp(new Belos::MueLuOp(H)); // Construct a Belos LinearProblem object. This is a complete // problem formulation. All the data necessary to solve the system // are now inside that problem. - RCP< Belos::LinearProblem > belosProblem = - rcp (new Belos::LinearProblem (belosOp, X, B)); - belosProblem->setRightPrec (belosPrec); + RCP > belosProblem = + rcp(new Belos::LinearProblem(belosOp, X, B)); + belosProblem->setRightPrec(belosPrec); // Prepare the linear problem to solve the linear system that was // already passed in. - bool set = belosProblem->setProblem (); + bool set = belosProblem->setProblem(); if (!set) { out << "\nERROR: Belos::LinearProblem failed to set up correctly!" << std::endl; return EXIT_FAILURE; @@ -391,11 +399,11 @@ printf("after level print\n"); // the number of iterations or its verbosity. For a full list of // Belos parameters, please consult the Belos package documentation. ParameterList belosList; - belosList.set("Maximum Iterations", maxIts); // Maximum number of iterations allowed - belosList.set("Convergence Tolerance", tol); // Relative convergence tolerance requested - belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); - belosList.set("Output Frequency", 1); - belosList.set("Output Style", Belos::Brief); + belosList.set("Maximum Iterations", maxIts); // Maximum number of iterations allowed + belosList.set("Convergence Tolerance", tol); // Relative convergence tolerance requested + belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); + belosList.set("Output Frequency", 1); + belosList.set("Output Style", Belos::Brief); // Create a Belos iterative linear solver // @@ -406,9 +414,9 @@ printf("after level print\n"); // interface in Belos. RCP > solver; if (solveType == "cg") - solver = rcp(new Belos::PseudoBlockCGSolMgr(belosProblem, rcp(&belosList, false))); + solver = rcp(new Belos::PseudoBlockCGSolMgr(belosProblem, rcp(&belosList, false))); else if (solveType == "gmres") - solver = rcp(new Belos::BlockGmresSolMgr (belosProblem, rcp(&belosList, false))); + solver = rcp(new Belos::BlockGmresSolMgr(belosProblem, rcp(&belosList, false))); // Finally, perform the solve. We wrap it in a try-catch block, // since Belos indicates erros by throwing exceptions. @@ -419,17 +427,21 @@ printf("after level print\n"); // Get the number of iterations for this solve. out << "Converged in " << solver->getNumIters() << " iterations" << std::endl; - } catch(...) { - out << std::endl << "ERROR: Belos threw an error! " << std::endl; + } catch (...) { + out << std::endl + << "ERROR: Belos threw an error! " << std::endl; } // Check convergence if (ret != Belos::Converged) - out << std::endl << "ERROR: Belos did not converge! " << std::endl; + out << std::endl + << "ERROR: Belos did not converge! " << std::endl; else - out << std::endl << "SUCCESS: Belos converged!" << std::endl; + out << std::endl + << "SUCCESS: Belos converged!" << std::endl; #else - out << std::endl << "MueLu has been compiled without Belos support!" << std::endl; + out << std::endl + << "MueLu has been compiled without Belos support!" << std::endl; #endif // GlobalMPISession calls MPI_Finalize() in its destructor, if // appropriate. You don't have to do anything here! Just return @@ -437,14 +449,10 @@ printf("after level print\n"); return 0; } - //- -- -------------------------------------------------------- #define MUELU_AUTOMATIC_TEST_ETI_NAME main_ #include "MueLu_Test_ETI.hpp" int main(int argc, char *argv[]) { - return Automatic_Test_ETI(argc,argv); + return Automatic_Test_ETI(argc, argv); } - - - diff --git a/packages/muelu/research/tawiesn/aria/Driver.cpp b/packages/muelu/research/tawiesn/aria/Driver.cpp index 10a0f563808c..05cdc1077191 100644 --- a/packages/muelu/research/tawiesn/aria/Driver.cpp +++ b/packages/muelu/research/tawiesn/aria/Driver.cpp @@ -50,7 +50,7 @@ #include #include -#include // TODO: move into MueLu.hpp +#include // TODO: move into MueLu.hpp #include #include @@ -60,94 +60,90 @@ #include #include #include -#include // => This header defines Belos::XpetraOp -#include // => This header defines Belos::MueLuOp +#include // => This header defines Belos::XpetraOp +#include // => This header defines Belos::MueLuOp #endif - template - class ExportVTK : public MueLu::VisualizationHelpers { - public: - ExportVTK() {}; - - public: - - void writeFile(std::ofstream& fout, Teuchos::RCP >& coordinates, Teuchos::RCP >& sol) - { - using namespace std; - typedef MueLu::VisualizationHelpers VH; - Teuchos::RCP > nodeMap = coordinates->getMap(); - std::vector vertices; - std::vector geomSize; - LocalOrdinal numFineNodes = Teuchos::as(coordinates->getLocalLength()); - - vertices.reserve(numFineNodes); - geomSize.reserve(numFineNodes); - for(LocalOrdinal i = 0; i < numFineNodes; i++) - { - vertices.push_back(i); - geomSize.push_back(1); - } - - Teuchos::ArrayRCP xCoords = Teuchos::arcp_reinterpret_cast(coordinates->getData(0)); - Teuchos::ArrayRCP yCoords = Teuchos::arcp_reinterpret_cast(coordinates->getData(1)); - Teuchos::ArrayRCP zCoords = Teuchos::null; - if(coordinates->getNumVectors() == 3) { - zCoords = Teuchos::arcp_reinterpret_cast(coordinates->getData(2)); - } - Teuchos::ArrayRCP solData = Teuchos::arcp_reinterpret_cast(sol->getData(0)); - - std::vector uniqueFine = this->makeUnique(vertices); - this->writeFileVTKOpening(fout, uniqueFine, geomSize); - this->writeFileVTKNodes(fout, uniqueFine, nodeMap); - - std::string indent = " "; - fout << " " << std::endl; - fout << indent; - int myRank = coordinates->getMap()->getComm()->getRank(); - for(int i = 0; i < int(uniqueFine.size()); i++) - { - fout << myRank << " "; - if(i % 20 == 19) - fout << std::endl << indent; - } - fout << std::endl; - fout << " " << std::endl; - // solution vector - fout << " " << std::endl; - fout << indent; - for(int i = 0; i < int(uniqueFine.size()); i++) - { - size_t numVec = coordinates->getNumVectors(); - for(int d=0; d" << std::endl; - fout << " " << std::endl; // that is annoying - - this->writeFileVTKCoordinates(fout, uniqueFine, xCoords, yCoords, zCoords, coordinates->getNumVectors()); - this->writeFileVTKCells(fout, uniqueFine, vertices, geomSize); - this->writeFileVTKClosing(fout); - fout.close(); - - }; +template +class ExportVTK : public MueLu::VisualizationHelpers { + public: + ExportVTK(){}; + + public: + void writeFile(std::ofstream& fout, Teuchos::RCP >& coordinates, Teuchos::RCP >& sol) { + using namespace std; + typedef MueLu::VisualizationHelpers VH; + Teuchos::RCP > nodeMap = coordinates->getMap(); + std::vector vertices; + std::vector geomSize; + LocalOrdinal numFineNodes = Teuchos::as(coordinates->getLocalLength()); + + vertices.reserve(numFineNodes); + geomSize.reserve(numFineNodes); + for (LocalOrdinal i = 0; i < numFineNodes; i++) { + vertices.push_back(i); + geomSize.push_back(1); + } + Teuchos::ArrayRCP xCoords = Teuchos::arcp_reinterpret_cast(coordinates->getData(0)); + Teuchos::ArrayRCP yCoords = Teuchos::arcp_reinterpret_cast(coordinates->getData(1)); + Teuchos::ArrayRCP zCoords = Teuchos::null; + if (coordinates->getNumVectors() == 3) { + zCoords = Teuchos::arcp_reinterpret_cast(coordinates->getData(2)); + } + Teuchos::ArrayRCP solData = Teuchos::arcp_reinterpret_cast(sol->getData(0)); + + std::vector uniqueFine = this->makeUnique(vertices); + this->writeFileVTKOpening(fout, uniqueFine, geomSize); + this->writeFileVTKNodes(fout, uniqueFine, nodeMap); + + std::string indent = " "; + fout << " " << std::endl; + fout << indent; + int myRank = coordinates->getMap()->getComm()->getRank(); + for (int i = 0; i < int(uniqueFine.size()); i++) { + fout << myRank << " "; + if (i % 20 == 19) + fout << std::endl + << indent; + } + fout << std::endl; + fout << " " << std::endl; + // solution vector + fout << " " << std::endl; + fout << indent; + for (int i = 0; i < int(uniqueFine.size()); i++) { + size_t numVec = coordinates->getNumVectors(); + for (int d = 0; d < numVec; d++) + fout << solData[i * numVec + d] << " "; + if (i % 3 == 0) + fout << std::endl + << indent; + } + fout << std::endl; + fout << " " << std::endl; + fout << " " << std::endl; // that is annoying + + this->writeFileVTKCoordinates(fout, uniqueFine, xCoords, yCoords, zCoords, coordinates->getNumVectors()); + this->writeFileVTKCells(fout, uniqueFine, vertices, geomSize); + this->writeFileVTKClosing(fout); + fout.close(); }; +}; -template -int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int argc, char *argv[]) { +template +int main_(Teuchos::CommandLineProcessor& clp, Xpetra::UnderlyingLib lib, int argc, char* argv[]) { #include - using Teuchos::RCP; using Teuchos::rcp; + using Teuchos::RCP; + using Teuchos::rcp; using Teuchos::TimeMonitor; bool success = false; bool verbose = true; try { - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); + RCP > comm = Teuchos::DefaultComm::getComm(); // ========================================================================= // Convenient definitions @@ -156,24 +152,25 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Instead of checking each time for rank, create a rank 0 stream RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& out = *fancy; + Teuchos::FancyOStream& out = *fancy; out.setOutputToRootOnly(0); // ========================================================================= // Parameters initialization // ========================================================================= - //GO nx = 100, ny = 100, nz = 100; - //Galeri::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra + // GO nx = 100, ny = 100, nz = 100; + // Galeri::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra - std::string xmlFileName = "driver.xml"; clp.setOption("xml", &xmlFileName, "read parameters from a file. Otherwise, this example uses by default 'scalingTest.xml'"); + std::string xmlFileName = "driver.xml"; + clp.setOption("xml", &xmlFileName, "read parameters from a file. Otherwise, this example uses by default 'scalingTest.xml'"); - switch (clp.parse(argc,argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; + switch (clp.parse(argc, argv)) { + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } // ========================================================================= @@ -181,200 +178,197 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // ========================================================================= RCP globalTimeMonitor = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("MatrixRead: S - Global Time"))), tm; - GlobalOrdinal nGlobalNodes = 0; - if(comm->getRank() == 0) { + if (comm->getRank() == 0) { std::ifstream data_file; data_file.open("xxx.mm"); - TEUCHOS_TEST_FOR_EXCEPTION(data_file.good() == false, MueLu::Exceptions::RuntimeError,"Problem opening file xxx.mm"); + TEUCHOS_TEST_FOR_EXCEPTION(data_file.good() == false, MueLu::Exceptions::RuntimeError, "Problem opening file xxx.mm"); std::string line; - getline(data_file,line); + getline(data_file, line); data_file >> nGlobalNodes; data_file.close(); } - Teuchos::broadcast(*comm,0,1,&nGlobalNodes); + Teuchos::broadcast(*comm, 0, 1, &nGlobalNodes); out << "Found " << nGlobalNodes << " nodes " << std::endl; // each processor reads in its dofPresent array LocalOrdinal nNodes = 0; LocalOrdinal nDofs = 0; - int maxDofPerNode = -1; + int maxDofPerNode = -1; Teuchos::ArrayRCP dofPresent; { - std::stringstream ss; ss << comm->getSize() << "dofPresent" << comm->getRank(); + std::stringstream ss; + ss << comm->getSize() << "dofPresent" << comm->getRank(); - try{ + try { std::ifstream data_file(ss.str()); // read in first line containing number of local nodes and maxDofPerNode data_file >> nNodes >> maxDofPerNode; - dofPresent = Teuchos::ArrayRCP(nNodes * maxDofPerNode,0); + dofPresent = Teuchos::ArrayRCP(nNodes * maxDofPerNode, 0); // loop over all local nodes - for(LocalOrdinal i = 0; i < nNodes; i++) { - for(int j=0; j> tmp; - if(tmp == 1) { - dofPresent[i*maxDofPerNode+j] = 1; nDofs++; + if (tmp == 1) { + dofPresent[i * maxDofPerNode + j] = 1; + nDofs++; } else { - dofPresent[i*maxDofPerNode+j] = 0; + dofPresent[i * maxDofPerNode + j] = 0; } } } + } catch (const std::exception& e) { + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "Problem opening/reading file " << ss.str()); } - catch(const std::exception& e) { - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError,"Problem opening/reading file " << ss.str()); - } - } out << "PROC " << comm->getRank() << "/" << comm->getSize() << " " << nNodes << " " << maxDofPerNode << std::endl; - /*for(LocalOrdinal i = 0; i < nNodes; i++) { for(int j=0; j dofGlobals; Teuchos::Array nodalGlobals; // nodal GIDs for laplacian (with holes) { - std::stringstream ss; ss << comm->getSize() << "proc" << comm->getRank(); - try{ + std::stringstream ss; + ss << comm->getSize() << "proc" << comm->getRank(); + try { std::ifstream data_file(ss.str()); // read in first line containing number of local nodes and maxDofPerNode LocalOrdinal tmpDofs = 0; data_file >> tmpDofs; - TEUCHOS_TEST_FOR_EXCEPTION(tmpDofs != nDofs, MueLu::Exceptions::RuntimeError,"Number of gid entries in map file is " << tmpDofs << " but should be " << nDofs); + TEUCHOS_TEST_FOR_EXCEPTION(tmpDofs != nDofs, MueLu::Exceptions::RuntimeError, "Number of gid entries in map file is " << tmpDofs << " but should be " << nDofs); dofGlobals = Teuchos::Array(nDofs); // loop over all local nodes - for(GlobalOrdinal i = 0; i < nDofs; i++) { + for (GlobalOrdinal i = 0; i < nDofs; i++) { int data; data_file >> data; dofGlobals[i] = Teuchos::as(data); } - } - catch(const std::exception& e) { - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError,"Problem opening/reading file " << ss.str()); + } catch (const std::exception& e) { + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "Problem opening/reading file " << ss.str()); } nodalGlobals = Teuchos::Array(nNodes); GlobalOrdinal count = nDofs - 1; - for(GlobalOrdinal i = nNodes - 1; i>=0; i--) { - for(int j = maxDofPerNode-1; j >=0; j--) { - if(dofPresent[i*maxDofPerNode+j] == 1) + for (GlobalOrdinal i = nNodes - 1; i >= 0; i--) { + for (int j = maxDofPerNode - 1; j >= 0; j--) { + if (dofPresent[i * maxDofPerNode + j] == 1) nodalGlobals[i] = dofGlobals[count--]; } } } - Teuchos::RCP LapMap = MapFactory::Build(lib,nGlobalNodes,nodalGlobals(),0,comm); + Teuchos::RCP LapMap = MapFactory::Build(lib, nGlobalNodes, nodalGlobals(), 0, comm); - //LapMap->describe(out, Teuchos::VERB_EXTREME); + // LapMap->describe(out, Teuchos::VERB_EXTREME); // { - std::stringstream ss; ss << comm->getSize() << "ProcLinear" << comm->getRank(); - try{ + std::stringstream ss; + ss << comm->getSize() << "ProcLinear" << comm->getRank(); + try { std::ifstream data_file(ss.str()); // loop over all local nodes - for(GlobalOrdinal i = 0; i < nNodes; i++) { + for (GlobalOrdinal i = 0; i < nNodes; i++) { int data; data_file >> data; nodalGlobals[i] = Teuchos::as(data); } + } catch (const std::exception& e) { + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "Problem opening/reading file " << ss.str()); } - catch(const std::exception& e) { - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError,"Problem opening/reading file " << ss.str()); - } - for(GlobalOrdinal i = 0; i < nNodes; i++) { + for (GlobalOrdinal i = 0; i < nNodes; i++) { nodalGlobals[i] = nodalGlobals[i] - 1; } } - Teuchos::RCP LinearMap = MapFactory::Build(lib,nGlobalNodes,nodalGlobals(),0,comm); - //LinearMap->describe(out, Teuchos::VERB_EXTREME);LapMap->describe(out, Teuchos::VERB_EXTREME); + Teuchos::RCP LinearMap = MapFactory::Build(lib, nGlobalNodes, nodalGlobals(), 0, comm); + // LinearMap->describe(out, Teuchos::VERB_EXTREME);LapMap->describe(out, Teuchos::VERB_EXTREME); - //LapMap->describe(out, Teuchos::VERB_EXTREME);LapMap->describe(out, Teuchos::VERB_EXTREME); + // LapMap->describe(out, Teuchos::VERB_EXTREME);LapMap->describe(out, Teuchos::VERB_EXTREME); Teuchos::RCP Importer = ImportFactory::Build(LinearMap, LapMap); - Teuchos::RCP xpetraXXX = MultiVectorFactory::Build(LapMap,1); - Teuchos::RCP xpetraYYY = MultiVectorFactory::Build(LapMap,1); + Teuchos::RCP xpetraXXX = MultiVectorFactory::Build(LapMap, 1); + Teuchos::RCP xpetraYYY = MultiVectorFactory::Build(LapMap, 1); - RCP temp = IO::ReadMultiVector ("xxx.mm", LinearMap); + RCP temp = IO::ReadMultiVector("xxx.mm", LinearMap); xpetraXXX->doImport(*temp, *Importer, Xpetra::INSERT); - temp = IO::ReadMultiVector ("yyy.mm", LinearMap); + temp = IO::ReadMultiVector("yyy.mm", LinearMap); xpetraYYY->doImport(*temp, *Importer, Xpetra::INSERT); - Teuchos::RCP coordinates = MultiVectorFactory::Build(LapMap,2); - Teuchos::ArrayRCP< const Scalar > srcX = xpetraXXX->getData(0); - Teuchos::ArrayRCP< const Scalar > srcY = xpetraYYY->getData(0); - Teuchos::ArrayRCP< Scalar > dataX = coordinates->getDataNonConst(0); - Teuchos::ArrayRCP< Scalar > dataY = coordinates->getDataNonConst(1); - for(decltype(coordinates->getLocalLength()) i = 0; i < coordinates->getLocalLength(); i++) { + Teuchos::RCP coordinates = MultiVectorFactory::Build(LapMap, 2); + Teuchos::ArrayRCP srcX = xpetraXXX->getData(0); + Teuchos::ArrayRCP srcY = xpetraYYY->getData(0); + Teuchos::ArrayRCP dataX = coordinates->getDataNonConst(0); + Teuchos::ArrayRCP dataY = coordinates->getDataNonConst(1); + for (decltype(coordinates->getLocalLength()) i = 0; i < coordinates->getLocalLength(); i++) { dataX[i] = srcX[i]; dataY[i] = srcY[i]; } // read in matrix - Teuchos::RCP DistributedMap = Teuchos::null; - Teuchos::RCP DistributedMatrix = Teuchos::null; + Teuchos::RCP DistributedMap = Teuchos::null; + Teuchos::RCP DistributedMatrix = Teuchos::null; // read in matrix to determine number of rows - Teuchos::RCP SerialMatrix = Xpetra::IO::Read("theMatrix.m",lib,comm); - GlobalOrdinal NumRows = SerialMatrix->getRowMap()->getGlobalNumElements(); + Teuchos::RCP SerialMatrix = Xpetra::IO::Read("theMatrix.m", lib, comm); + GlobalOrdinal NumRows = SerialMatrix->getRowMap()->getGlobalNumElements(); // re-read in matrix and distribute it using the user-given distribution over processors - DistributedMap = MapFactory::Build(lib,NumRows,dofGlobals(),0,comm); - DistributedMatrix = Xpetra::IO::Read("theMatrix.m", - DistributedMap, - Teuchos::null, - DistributedMap, - DistributedMap, - true, // callFillComplete = true, - false, // binary = false, - false, // tolerant = false, - false); // debug = false) + DistributedMap = MapFactory::Build(lib, NumRows, dofGlobals(), 0, comm); + DistributedMatrix = Xpetra::IO::Read("theMatrix.m", + DistributedMap, + Teuchos::null, + DistributedMap, + DistributedMap, + true, // callFillComplete = true, + false, // binary = false, + false, // tolerant = false, + false); // debug = false) // read in global vectors (e.g. rhs) GlobalOrdinal nGlobalDof = 0; GlobalOrdinal nLocalDofs = Teuchos::as(nDofs); - Teuchos::reduceAll(*comm,Teuchos::REDUCE_SUM,comm->getSize(),&nLocalDofs,&nGlobalDof); + Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, comm->getSize(), &nLocalDofs, &nGlobalDof); Teuchos::RCP dofLinearMap = Teuchos::null; { - std::stringstream ss; ss << comm->getSize() << "ProcLinear" << comm->getRank(); - try{ + std::stringstream ss; + ss << comm->getSize() << "ProcLinear" << comm->getRank(); + try { std::ifstream data_file(ss.str()); // loop over all local nodes data_file >> dofGlobals[0]; + } catch (const std::exception& e) { + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "Problem opening/reading file " << ss.str()); } - catch(const std::exception& e) { - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError,"Problem opening/reading file " << ss.str()); - } - for(decltype(nDofs) i = 0; i < nDofs; i++) dofGlobals[i] = i + dofGlobals[0]; - for(decltype(nDofs) i = 0; i < nDofs; i++) dofGlobals[i] = dofGlobals[i] - 1; - dofLinearMap = MapFactory::Build(lib,nGlobalDof,dofGlobals(),0,comm); + for (decltype(nDofs) i = 0; i < nDofs; i++) dofGlobals[i] = i + dofGlobals[0]; + for (decltype(nDofs) i = 0; i < nDofs; i++) dofGlobals[i] = dofGlobals[i] - 1; + dofLinearMap = MapFactory::Build(lib, nGlobalDof, dofGlobals(), 0, comm); } Teuchos::RCP dofImporter = ImportFactory::Build(dofLinearMap, DistributedMap); - RCP RHS = MultiVectorFactory::Build(DistributedMap,1); - RCP LHS = MultiVectorFactory::Build(DistributedMap,1); + RCP RHS = MultiVectorFactory::Build(DistributedMap, 1); + RCP LHS = MultiVectorFactory::Build(DistributedMap, 1); - temp = IO::ReadMultiVector ("rhs.mm", dofLinearMap); + temp = IO::ReadMultiVector("rhs.mm", dofLinearMap); RHS->doImport(*temp, *dofImporter, Xpetra::INSERT); LHS->putScalar(zero); @@ -383,9 +377,9 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg Teuchos::ParameterList paramList; Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, Teuchos::Ptr(¶mList), *comm); - const std::string userName = "user data"; + const std::string userName = "user data"; Teuchos::ParameterList& userParamList = paramList.sublist(userName); - userParamList.set("multivector Coordinates",coordinates); + userParamList.set("multivector Coordinates", coordinates); userParamList.set("ArrayRCP DofPresent", dofPresent); RCP H; @@ -395,15 +389,15 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg { tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 4 - Belos Solve"))); // Operator and Multivector type that will be used with Belos - typedef MultiVector MV; + typedef MultiVector MV; typedef Belos::OperatorT OP; H->IsPreconditioner(true); // Define Operator and Preconditioner - Teuchos::RCP belosOp = Teuchos::rcp(new Belos::XpetraOp(DistributedMatrix)); // Turns a Xpetra::Matrix object into a Belos operator - Teuchos::RCP belosPrec = Teuchos::rcp(new Belos::MueLuOp(H)); // Turns a MueLu::Hierarchy object into a Belos operator + Teuchos::RCP belosOp = Teuchos::rcp(new Belos::XpetraOp(DistributedMatrix)); // Turns a Xpetra::Matrix object into a Belos operator + Teuchos::RCP belosPrec = Teuchos::rcp(new Belos::MueLuOp(H)); // Turns a MueLu::Hierarchy object into a Belos operator // Construct a Belos LinearProblem object - RCP< Belos::LinearProblem > belosProblem = rcp(new Belos::LinearProblem(belosOp, LHS, RHS)); + RCP > belosProblem = rcp(new Belos::LinearProblem(belosOp, LHS, RHS)); belosProblem->setRightPrec(belosPrec); bool set = belosProblem->setProblem(); if (set == false) { @@ -413,14 +407,14 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Belos parameter list Teuchos::ParameterList belosList; - belosList.set("Maximum Iterations", 300); // Maximum number of iterations allowed - belosList.set("Convergence Tolerance", 1e-8); // Relative convergence tolerance requested - belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); - belosList.set("Output Frequency", 1); - belosList.set("Output Style", Belos::Brief); + belosList.set("Maximum Iterations", 300); // Maximum number of iterations allowed + belosList.set("Convergence Tolerance", 1e-8); // Relative convergence tolerance requested + belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); + belosList.set("Output Frequency", 1); + belosList.set("Output Style", Belos::Brief); // Create an iterative solver manager - RCP< Belos::SolverManager > solver; + RCP > solver; solver = rcp(new Belos::PseudoBlockGmresSolMgr(belosProblem, rcp(&belosList, false))); // Perform solve @@ -431,14 +425,17 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Get the number of iterations for this solve. out << "Number of iterations performed for this solve: " << solver->getNumIters() << std::endl; - } catch(...) { - out << std::endl << "ERROR: Belos threw an error! " << std::endl; + } catch (...) { + out << std::endl + << "ERROR: Belos threw an error! " << std::endl; } // Check convergence if (ret != Belos::Converged) - out << std::endl << "ERROR: Belos did not converge or a warning occured! " << std::endl; + out << std::endl + << "ERROR: Belos did not converge or a warning occured! " << std::endl; else - out << std::endl << "SUCCESS: Belos converged!" << std::endl; + out << std::endl + << "SUCCESS: Belos converged!" << std::endl; } #endif @@ -446,14 +443,14 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg } TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); - return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); -} //main + return (success ? EXIT_SUCCESS : EXIT_FAILURE); +} // main int main(int argc, char* argv[]) { bool success = false; bool verbose = true; - Teuchos::GlobalMPISession mpiSession(&argc,&argv); + Teuchos::GlobalMPISession mpiSession(&argc, &argv); try { const bool throwExceptions = false; @@ -462,20 +459,21 @@ int main(int argc, char* argv[]) { Teuchos::CommandLineProcessor clp(throwExceptions, recogniseAllOptions); Xpetra::Parameters xpetraParameters(clp); - std::string node = ""; clp.setOption("node", &node, "node type (serial | openmp | cuda | hip)"); + std::string node = ""; + clp.setOption("node", &node, "node type (serial | openmp | cuda | hip)"); switch (clp.parse(argc, argv, NULL)) { - case Teuchos::CommandLineProcessor::PARSE_ERROR: return EXIT_FAILURE; + case Teuchos::CommandLineProcessor::PARSE_ERROR: return EXIT_FAILURE; case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } Xpetra::UnderlyingLib lib = xpetraParameters.GetLib(); if (lib == Xpetra::UseEpetra) { #ifdef HAVE_MUELU_EPETRA - return main_(clp, lib, argc, argv); + return main_(clp, lib, argc, argv); #else throw MueLu::Exceptions::RuntimeError("Epetra is not available"); #endif @@ -486,35 +484,35 @@ int main(int argc, char* argv[]) { typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType Node; #ifndef HAVE_MUELU_EXPLICIT_INSTANTIATION - return main_(clp, lib, argc, argv); + return main_(clp, lib, argc, argv); +#else +#if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_INT_INT) + return main_(clp, lib, argc, argv); +#elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_INT_LONG) + return main_(clp, lib, argc, argv); +#elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_INT_LONG_LONG) + return main_(clp, lib, argc, argv); #else -# if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_INT_INT) - return main_ (clp, lib, argc, argv); -# elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_INT_LONG) - return main_(clp, lib, argc, argv); -# elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_INT_LONG_LONG) - return main_(clp, lib, argc, argv); -# else throw MueLu::Exceptions::RuntimeError("Found no suitable instantiation"); -# endif +#endif #endif } else if (node == "serial") { #ifdef KOKKOS_ENABLE_SERIAL typedef Tpetra::KokkosCompat::KokkosSerialWrapperNode Node; -# ifndef HAVE_MUELU_EXPLICIT_INSTANTIATION - return main_(clp, lib, argc, argv); -# else -# if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_SERIAL) && defined(HAVE_TPETRA_INST_INT_INT) - return main_ (clp, lib, argc, argv); -# elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_SERIAL) && defined(HAVE_TPETRA_INST_INT_LONG) - return main_(clp, lib, argc, argv); -# elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_SERIAL) && defined(HAVE_TPETRA_INST_INT_LONG_LONG) - return main_(clp, lib, argc, argv); -# else +#ifndef HAVE_MUELU_EXPLICIT_INSTANTIATION + return main_(clp, lib, argc, argv); +#else +#if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_SERIAL) && defined(HAVE_TPETRA_INST_INT_INT) + return main_(clp, lib, argc, argv); +#elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_SERIAL) && defined(HAVE_TPETRA_INST_INT_LONG) + return main_(clp, lib, argc, argv); +#elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_SERIAL) && defined(HAVE_TPETRA_INST_INT_LONG_LONG) + return main_(clp, lib, argc, argv); +#else throw MueLu::Exceptions::RuntimeError("Found no suitable instantiation"); -# endif -# endif +#endif +#endif #else throw MueLu::Exceptions::RuntimeError("Serial node type is disabled"); #endif @@ -522,19 +520,19 @@ int main(int argc, char* argv[]) { #ifdef KOKKOS_ENABLE_OPENMP typedef Tpetra::KokkosCompat::KokkosOpenMPWrapperNode Node; -# ifndef HAVE_MUELU_EXPLICIT_INSTANTIATION - return main_(clp, argc, argv); -# else -# if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_OPENMP) && defined(HAVE_TPETRA_INST_INT_INT) - return main_ (clp, lib, argc, argv); -# elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_OPENMP) && defined(HAVE_TPETRA_INST_INT_LONG) - return main_(clp, lib, argc, argv); -# elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_OPENMP) && defined(HAVE_TPETRA_INST_INT_LONG_LONG) - return main_(clp, lib, argc, argv); -# else +#ifndef HAVE_MUELU_EXPLICIT_INSTANTIATION + return main_(clp, argc, argv); +#else +#if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_OPENMP) && defined(HAVE_TPETRA_INST_INT_INT) + return main_(clp, lib, argc, argv); +#elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_OPENMP) && defined(HAVE_TPETRA_INST_INT_LONG) + return main_(clp, lib, argc, argv); +#elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_OPENMP) && defined(HAVE_TPETRA_INST_INT_LONG_LONG) + return main_(clp, lib, argc, argv); +#else throw MueLu::Exceptions::RuntimeError("Found no suitable instantiation"); -# endif -# endif +#endif +#endif #else throw MueLu::Exceptions::RuntimeError("OpenMP node type is disabled"); #endif @@ -542,19 +540,19 @@ int main(int argc, char* argv[]) { #ifdef KOKKOS_ENABLE_CUDA typedef Tpetra::KokkosCompat::KokkosCudaWrapperNode Node; -# ifndef HAVE_MUELU_EXPLICIT_INSTANTIATION - return main_(clp, argc, argv); -# else -# if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_CUDA) && defined(HAVE_TPETRA_INST_INT_INT) - return main_ (clp, lib, argc, argv); -# elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_CUDA) && defined(HAVE_TPETRA_INST_INT_LONG) - return main_(clp, lib, argc, argv); -# elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_CUDA) && defined(HAVE_TPETRA_INST_INT_LONG_LONG) - return main_(clp, lib, argc, argv); -# else +#ifndef HAVE_MUELU_EXPLICIT_INSTANTIATION + return main_(clp, argc, argv); +#else +#if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_CUDA) && defined(HAVE_TPETRA_INST_INT_INT) + return main_(clp, lib, argc, argv); +#elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_CUDA) && defined(HAVE_TPETRA_INST_INT_LONG) + return main_(clp, lib, argc, argv); +#elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_CUDA) && defined(HAVE_TPETRA_INST_INT_LONG_LONG) + return main_(clp, lib, argc, argv); +#else throw MueLu::Exceptions::RuntimeError("Found no suitable instantiation"); -# endif -# endif +#endif +#endif #else throw MueLu::Exceptions::RuntimeError("CUDA node type is disabled"); #endif @@ -562,19 +560,19 @@ int main(int argc, char* argv[]) { #ifdef KOKKOS_ENABLE_HIP typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node; -# ifndef HAVE_MUELU_EXPLICIT_INSTANTIATION - return main_(clp, argc, argv); -# else -# if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_HIP) && defined(HAVE_TPETRA_INST_INT_INT) - return main_ (clp, lib, argc, argv); -# elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_HIP) && defined(HAVE_TPETRA_INST_INT_LONG) - return main_(clp, lib, argc, argv); -# elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_HIP) && defined(HAVE_TPETRA_INST_INT_LONG_LONG) - return main_(clp, lib, argc, argv); -# else +#ifndef HAVE_MUELU_EXPLICIT_INSTANTIATION + return main_(clp, argc, argv); +#else +#if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_HIP) && defined(HAVE_TPETRA_INST_INT_INT) + return main_(clp, lib, argc, argv); +#elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_HIP) && defined(HAVE_TPETRA_INST_INT_LONG) + return main_(clp, lib, argc, argv); +#elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_HIP) && defined(HAVE_TPETRA_INST_INT_LONG_LONG) + return main_(clp, lib, argc, argv); +#else throw MueLu::Exceptions::RuntimeError("Found no suitable instantiation"); -# endif -# endif +#endif +#endif #else throw MueLu::Exceptions::RuntimeError("HIP node type is disabled"); #endif @@ -582,22 +580,22 @@ int main(int argc, char* argv[]) { #ifdef KOKKOS_ENABLE_SYCL typedef Tpetra::KokkosCompatKokkosSYCLWrapperNode Node; -# ifndef HAVE_MUELU_EXPLICIT_INSTANTIATION - return main_(clp, argc, argv); -# else -# if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_SYCL) && defined(HAVE_TPETRA_INST_INT_INT) - return main_ (clp, lib, argc, argv); -# elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_SYCL) && defined(HAVE_TPETRA_INST_INT_LONG) - return main_(clp, lib, argc, argv); -# elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_SYCL) && defined(HAVE_TPETRA_INST_INT_LONG_LONG) - return main_(clp, lib, argc, argv); -# else +#ifndef HAVE_MUELU_EXPLICIT_INSTANTIATION + return main_(clp, argc, argv); +#else +#if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_SYCL) && defined(HAVE_TPETRA_INST_INT_INT) + return main_(clp, lib, argc, argv); +#elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_SYCL) && defined(HAVE_TPETRA_INST_INT_LONG) + return main_(clp, lib, argc, argv); +#elif defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_SYCL) && defined(HAVE_TPETRA_INST_INT_LONG_LONG) + return main_(clp, lib, argc, argv); +#else throw MueLu::Exceptions::RuntimeError("Found no suitable instantiation"); -# endif -# endif +#endif +#endif #else throw MueLu::Exceptions::RuntimeError("SYCL node type is disabled"); -#endif +#endif } else { throw MueLu::Exceptions::RuntimeError("Unrecognized node type"); } @@ -605,6 +603,5 @@ int main(int argc, char* argv[]) { } TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); - return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); + return (success ? EXIT_SUCCESS : EXIT_FAILURE); } - diff --git a/packages/muelu/research/tawiesn/crada/Driver.cpp b/packages/muelu/research/tawiesn/crada/Driver.cpp index 60d7d410adb9..f6efdc1886a8 100644 --- a/packages/muelu/research/tawiesn/crada/Driver.cpp +++ b/packages/muelu/research/tawiesn/crada/Driver.cpp @@ -70,7 +70,7 @@ #include #include #include -#include // TODO: move into MueLu.hpp +#include // TODO: move into MueLu.hpp #include #include @@ -83,95 +83,90 @@ #include #include #include -#include // => This header defines Belos::XpetraOp -#include // => This header defines Belos::MueLuOp +#include // => This header defines Belos::XpetraOp +#include // => This header defines Belos::MueLuOp #endif - template - class ExportVTK : public MueLu::VisualizationHelpers { - public: - using real_type = typename Teuchos::ScalarTraits::coordinateType; - using RealValuedMultiVector = typename Xpetra::MultiVector; - - ExportVTK() {}; - - public: - - void writeFile(std::ofstream& fout, Teuchos::RCP& coordinates, Teuchos::RCP >& sol) - { - using namespace std; - typedef MueLu::VisualizationHelpers VH; - Teuchos::RCP > nodeMap = coordinates->getMap(); - std::vector vertices; - std::vector geomSize; - LocalOrdinal numFineNodes = Teuchos::as(coordinates->getLocalLength()); - - vertices.reserve(numFineNodes); - geomSize.reserve(numFineNodes); - for(LocalOrdinal i = 0; i < numFineNodes; i++) - { - vertices.push_back(i); - geomSize.push_back(1); - } - - Teuchos::ArrayRCP xCoords = Teuchos::arcp_reinterpret_cast(coordinates->getData(0)); - Teuchos::ArrayRCP yCoords = Teuchos::arcp_reinterpret_cast(coordinates->getData(1)); - Teuchos::ArrayRCP zCoords = Teuchos::null; - if(coordinates->getNumVectors() == 3) { - zCoords = Teuchos::arcp_reinterpret_cast(coordinates->getData(2)); - } - Teuchos::ArrayRCP solData = Teuchos::arcp_reinterpret_cast(sol->getData(0)); - - std::vector uniqueFine = this->makeUnique(vertices); - this->writeFileVTKOpening(fout, uniqueFine, geomSize); - this->writeFileVTKNodes(fout, uniqueFine, nodeMap); - - std::string indent = " "; - fout << " " << std::endl; - fout << indent; - int myRank = coordinates->getMap()->getComm()->getRank(); - for(int i = 0; i < int(uniqueFine.size()); i++) - { - fout << myRank << " "; - if(i % 20 == 19) - fout << std::endl << indent; - } - fout << std::endl; - fout << " " << std::endl; - // solution vector - fout << " " << std::endl; - fout << indent; - for(int i = 0; i < int(uniqueFine.size()); i++) - { - size_t numVec = coordinates->getNumVectors(); - for(int d=0; d<(int)numVec; d++) - fout << solData[i*numVec+d] << " "; - if(i % 3 == 0) - fout << std::endl << indent; - } - fout << std::endl; - fout << " " << std::endl; - fout << " " << std::endl; // that is annoying - - this->writeFileVTKCoordinates(fout, uniqueFine, xCoords, yCoords, zCoords, coordinates->getNumVectors()); - this->writeFileVTKCells(fout, uniqueFine, vertices, geomSize); - this->writeFileVTKClosing(fout); - fout.close(); - - }; +template +class ExportVTK : public MueLu::VisualizationHelpers { + public: + using real_type = typename Teuchos::ScalarTraits::coordinateType; + using RealValuedMultiVector = typename Xpetra::MultiVector; + + ExportVTK(){}; + + public: + void writeFile(std::ofstream& fout, Teuchos::RCP& coordinates, Teuchos::RCP >& sol) { + using namespace std; + typedef MueLu::VisualizationHelpers VH; + Teuchos::RCP > nodeMap = coordinates->getMap(); + std::vector vertices; + std::vector geomSize; + LocalOrdinal numFineNodes = Teuchos::as(coordinates->getLocalLength()); + + vertices.reserve(numFineNodes); + geomSize.reserve(numFineNodes); + for (LocalOrdinal i = 0; i < numFineNodes; i++) { + vertices.push_back(i); + geomSize.push_back(1); + } + Teuchos::ArrayRCP xCoords = Teuchos::arcp_reinterpret_cast(coordinates->getData(0)); + Teuchos::ArrayRCP yCoords = Teuchos::arcp_reinterpret_cast(coordinates->getData(1)); + Teuchos::ArrayRCP zCoords = Teuchos::null; + if (coordinates->getNumVectors() == 3) { + zCoords = Teuchos::arcp_reinterpret_cast(coordinates->getData(2)); + } + Teuchos::ArrayRCP solData = Teuchos::arcp_reinterpret_cast(sol->getData(0)); + + std::vector uniqueFine = this->makeUnique(vertices); + this->writeFileVTKOpening(fout, uniqueFine, geomSize); + this->writeFileVTKNodes(fout, uniqueFine, nodeMap); + + std::string indent = " "; + fout << " " << std::endl; + fout << indent; + int myRank = coordinates->getMap()->getComm()->getRank(); + for (int i = 0; i < int(uniqueFine.size()); i++) { + fout << myRank << " "; + if (i % 20 == 19) + fout << std::endl + << indent; + } + fout << std::endl; + fout << " " << std::endl; + // solution vector + fout << " " << std::endl; + fout << indent; + for (int i = 0; i < int(uniqueFine.size()); i++) { + size_t numVec = coordinates->getNumVectors(); + for (int d = 0; d < (int)numVec; d++) + fout << solData[i * numVec + d] << " "; + if (i % 3 == 0) + fout << std::endl + << indent; + } + fout << std::endl; + fout << " " << std::endl; + fout << " " << std::endl; // that is annoying + + this->writeFileVTKCoordinates(fout, uniqueFine, xCoords, yCoords, zCoords, coordinates->getNumVectors()); + this->writeFileVTKCells(fout, uniqueFine, vertices, geomSize); + this->writeFileVTKClosing(fout); + fout.close(); }; +}; -template -int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int argc, char *argv[]) { +template +int main_(Teuchos::CommandLineProcessor& clp, Xpetra::UnderlyingLib lib, int argc, char* argv[]) { #include - using Teuchos::RCP; // reference count pointers + using Teuchos::RCP; // reference count pointers using Teuchos::rcp; using Teuchos::TimeMonitor; - using TST = Teuchos::ScalarTraits; - using MT = typename Teuchos::ScalarTraits::magnitudeType; - using real_type = typename Teuchos::ScalarTraits::coordinateType; + using TST = Teuchos::ScalarTraits; + using MT = typename Teuchos::ScalarTraits::magnitudeType; + using real_type = typename Teuchos::ScalarTraits::coordinateType; using RealValuedMultiVector = Xpetra::MultiVector; // ========================================================================= // MPI initialization using Teuchos @@ -180,7 +175,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg bool success = false; bool verbose = true; try { - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); + RCP > comm = Teuchos::DefaultComm::getComm(); // ========================================================================= // Convenient definitions @@ -189,44 +184,65 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Instead of checking each time for rank, create a rank 0 stream RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - Teuchos::FancyOStream& fancyout = *fancy; + Teuchos::FancyOStream& fancyout = *fancy; fancyout.setOutputToRootOnly(0); // ========================================================================= // Parameters initialization // ========================================================================= - //GO nx = 100, ny = 100, nz = 100; - //Galeri::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case - Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra - - std::string xmlFileName = "driver.xml"; clp.setOption("xml", &xmlFileName, "read parameters from a file. Otherwise, this example uses by default 'scalingTest.xml'"); - std::string belosFileName = ""; clp.setOption("belosXml", &belosFileName, "read parameters for Belos from a file."); - int amgAsPrecond = 1; clp.setOption("precond", &amgAsPrecond, "apply multigrid as preconditioner"); - int amgAsSolver = 0; clp.setOption("fixPoint", &amgAsSolver, "apply multigrid as solver"); - bool printTimings = true; clp.setOption("timings", "notimings", &printTimings, "print timings to screen"); - int blockedSystem = 1; clp.setOption("split", &blockedSystem, "split system matrix into 2x2 system (default=0)"); - int useThyraGIDs = 0; clp.setOption("thyra", &useThyraGIDs, "use Thyra style numbering of GIDs."); - int writeMatricesOPT = -2; clp.setOption("write", &writeMatricesOPT, "write matrices to file (-1 means all; i>=0 means level i)"); - double tol = 1e-6; clp.setOption("tol", &tol, "solver convergence tolerance"); - std::string krylovMethod = "gmres"; clp.setOption("krylov", &krylovMethod, "outer Krylov method"); - int maxIts = 100; clp.setOption("maxits", &maxIts, "maximum number of Krylov iterations"); - int output = 1; clp.setOption("output", &output, "how often to print Krylov residual history"); - std::string matrixFileName = "crada1/crada_A.mm"; clp.setOption("matrixfile", &matrixFileName, "matrix market file containing matrix"); - std::string rhsFileName = "crada1/crada_b.mm"; clp.setOption("rhsfile", &rhsFileName, "matrix market file containing right-hand side"); - std::string nspFileName = "crada1/crada_ns.mm"; clp.setOption("nspfile", &nspFileName, "matrix market file containing fine level null space"); - std::string cooFileName = "crada1/crada_coordinates.mm"; clp.setOption("coordinatesfile",&cooFileName, "matrix market file containing fine level coordinates"); - std::string spcFileName = "crada1/crada_special.mm"; clp.setOption("specialfile", &spcFileName, "matrix market file containing fine level special dofs"); - int nPDE = 3; clp.setOption("numpdes", &nPDE, "number of PDE equations"); - int nNspVectors = 6; clp.setOption("numnsp", &nNspVectors, "number of nullspace vectors. Only used if null space is read from file. Must be smaller or equal than the number of null space vectors read in from file."); - std::string convType = "r0"; clp.setOption("convtype", &convType, "convergence type (r0 or none)"); - std::string strOutputFilename = ""; clp.setOption("output", &strOutputFilename,"filename prefix for output file name. If empty, no output is written."); - - switch (clp.parse(argc,argv)) { - case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; + // GO nx = 100, ny = 100, nz = 100; + // Galeri::Xpetra::Parameters matrixParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra + + std::string xmlFileName = "driver.xml"; + clp.setOption("xml", &xmlFileName, "read parameters from a file. Otherwise, this example uses by default 'scalingTest.xml'"); + std::string belosFileName = ""; + clp.setOption("belosXml", &belosFileName, "read parameters for Belos from a file."); + int amgAsPrecond = 1; + clp.setOption("precond", &amgAsPrecond, "apply multigrid as preconditioner"); + int amgAsSolver = 0; + clp.setOption("fixPoint", &amgAsSolver, "apply multigrid as solver"); + bool printTimings = true; + clp.setOption("timings", "notimings", &printTimings, "print timings to screen"); + int blockedSystem = 1; + clp.setOption("split", &blockedSystem, "split system matrix into 2x2 system (default=0)"); + int useThyraGIDs = 0; + clp.setOption("thyra", &useThyraGIDs, "use Thyra style numbering of GIDs."); + int writeMatricesOPT = -2; + clp.setOption("write", &writeMatricesOPT, "write matrices to file (-1 means all; i>=0 means level i)"); + double tol = 1e-6; + clp.setOption("tol", &tol, "solver convergence tolerance"); + std::string krylovMethod = "gmres"; + clp.setOption("krylov", &krylovMethod, "outer Krylov method"); + int maxIts = 100; + clp.setOption("maxits", &maxIts, "maximum number of Krylov iterations"); + int output = 1; + clp.setOption("output", &output, "how often to print Krylov residual history"); + std::string matrixFileName = "crada1/crada_A.mm"; + clp.setOption("matrixfile", &matrixFileName, "matrix market file containing matrix"); + std::string rhsFileName = "crada1/crada_b.mm"; + clp.setOption("rhsfile", &rhsFileName, "matrix market file containing right-hand side"); + std::string nspFileName = "crada1/crada_ns.mm"; + clp.setOption("nspfile", &nspFileName, "matrix market file containing fine level null space"); + std::string cooFileName = "crada1/crada_coordinates.mm"; + clp.setOption("coordinatesfile", &cooFileName, "matrix market file containing fine level coordinates"); + std::string spcFileName = "crada1/crada_special.mm"; + clp.setOption("specialfile", &spcFileName, "matrix market file containing fine level special dofs"); + int nPDE = 3; + clp.setOption("numpdes", &nPDE, "number of PDE equations"); + int nNspVectors = 6; + clp.setOption("numnsp", &nNspVectors, "number of nullspace vectors. Only used if null space is read from file. Must be smaller or equal than the number of null space vectors read in from file."); + std::string convType = "r0"; + clp.setOption("convtype", &convType, "convergence type (r0 or none)"); + std::string strOutputFilename = ""; + clp.setOption("output", &strOutputFilename, "filename prefix for output file name. If empty, no output is written."); + + switch (clp.parse(argc, argv)) { + case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break; - case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; + case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } // ========================================================================= @@ -240,42 +256,41 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg RCP A = Teuchos::null; if (matrixFileName != "") { fancyout << "Read matrix from file " << matrixFileName << std::endl; - RCP Atest = Xpetra::IO::Read(std::string(matrixFileName), xpetraParameters.GetLib(), comm); - RCP maptest = Atest->getRowMap(); + RCP Atest = Xpetra::IO::Read(std::string(matrixFileName), xpetraParameters.GetLib(), comm); + RCP maptest = Atest->getRowMap(); // re-read matrix and make sure it is properly distributed over all processors // make sure that all DOF rows per node are located on same processor. GO numGlobalNodes = maptest->getGlobalNumElements() / nPDE; GO numMyNodes = numGlobalNodes / comm->getSize(); - if (comm->getRank() == comm->getSize()-1) - numMyNodes = numGlobalNodes - (comm->getSize()-1) * numMyNodes; + if (comm->getRank() == comm->getSize() - 1) + numMyNodes = numGlobalNodes - (comm->getSize() - 1) * numMyNodes; LO numMyDofs = Teuchos::as(numMyNodes) * nPDE; // construct new row map for equally distributing the matrix rows without // splitting dofs that belong to the same node Teuchos::Array myDofGIDs(numMyDofs); - for(LO i = 0; i < numMyDofs; i++) { - if (comm->getRank() == comm->getSize()-1) + for (LO i = 0; i < numMyDofs; i++) { + if (comm->getRank() == comm->getSize() - 1) myDofGIDs[i] = Teuchos::as(maptest->getGlobalNumElements()) - Teuchos::as(numMyDofs - i); else myDofGIDs[i] = Teuchos::as(comm->getRank() * numMyDofs) + Teuchos::as(i); } - RCP Arowmap = MapFactory::Build (xpetraParameters.GetLib(),Teuchos::OrdinalTraits::invalid(),myDofGIDs(),0,comm); + RCP Arowmap = MapFactory::Build(xpetraParameters.GetLib(), Teuchos::OrdinalTraits::invalid(), myDofGIDs(), 0, comm); - A = Xpetra::IO::Read(matrixFileName,Arowmap); + A = Xpetra::IO::Read(matrixFileName, Arowmap); A->SetFixedBlockSize(Teuchos::as(nPDE)); } - RCP map = A->getRowMap(); - RCP nullspace = MultiVectorFactory::Build(A->getDomainMap(),nPDE); + RCP map = A->getRowMap(); + RCP nullspace = MultiVectorFactory::Build(A->getDomainMap(), nPDE); A->SetFixedBlockSize(nPDE); fancyout << "#pdes = " << A->GetFixedBlockSize() << std::endl; if (nspFileName != "") { fancyout << "Read null space from file " << nspFileName << std::endl; - - nullspace = Xpetra::IO::ReadMultiVector(std::string(nspFileName), A->getRowMap()); - //nullspace = MultiVectorFactory::Build(A->getRowMap(),6);//haq + nullspace = Xpetra::IO::ReadMultiVector(std::string(nspFileName), A->getRowMap()); + // nullspace = MultiVectorFactory::Build(A->getRowMap(),6);//haq fancyout << "Found " << nullspace->getNumVectors() << " null space vectors" << std::endl; if (nNspVectors > Teuchos::as(nullspace->getNumVectors())) { fancyout << "Set number of null space vectors from " << nNspVectors << " to " << nullspace->getNumVectors() << " as only " << nullspace->getNumVectors() << " are provided by " << nspFileName << std::endl; @@ -286,11 +301,11 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg nNspVectors = nullspace->getNumVectors(); } if (nNspVectors < Teuchos::as(nullspace->getNumVectors())) { - RCP temp = MultiVectorFactory::Build(A->getDomainMap(),nNspVectors); - for(int j=0; j tempData = temp->getDataNonConst(j); - Teuchos::ArrayRCP nsData = nullspace->getData(j); - for (int i=0; i temp = MultiVectorFactory::Build(A->getDomainMap(), nNspVectors); + for (int j = 0; j < nNspVectors; j++) { + Teuchos::ArrayRCP tempData = temp->getDataNonConst(j); + Teuchos::ArrayRCP nsData = nullspace->getData(j); + for (int i = 0; i < nsData.size(); ++i) { tempData[i] = nsData[i]; } } @@ -299,46 +314,46 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg } } else { if (nPDE == 1) - nullspace->putScalar( Teuchos::ScalarTraits::one() ); + nullspace->putScalar(Teuchos::ScalarTraits::one()); else { - for (int i=0; i nsData = nullspace->getDataNonConst(i); - for (int j=0; jgetDomainMap()->getGlobalElement(j) - A->getDomainMap()->getIndexBase(); - if ((gel-i) % nPDE == 0) + if ((gel - i) % nPDE == 0) nsData[j] = Teuchos::ScalarTraits::one(); } } } } - RCP coordinates = Teuchos::null; //MultiVectorFactory::Build(A->getDomainMap(),1); + RCP coordinates = Teuchos::null; // MultiVectorFactory::Build(A->getDomainMap(),1); if (cooFileName != "") { TEUCHOS_TEST_FOR_EXCEPTION(map->getLocalNumElements() % A->GetFixedBlockSize() != 0, MueLu::Exceptions::RuntimeError, "Driver: Number of DOFs on proc " << comm->getRank() << " is " << map->getLocalNumElements() << " and not divisible by 3."); - Teuchos::ArrayView dofGidList = map->getLocalElementList(); - GlobalOrdinal indexBase = map->getIndexBase(); - LocalOrdinal blkSize = A->GetFixedBlockSize(); + Teuchos::ArrayView dofGidList = map->getLocalElementList(); + GlobalOrdinal indexBase = map->getIndexBase(); + LocalOrdinal blkSize = A->GetFixedBlockSize(); TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as(dofGidList.size()) != Teuchos::as(map->getLocalNumElements()), MueLu::Exceptions::RuntimeError, "Driver: Number of local DOFs inconsistent."); - size_t numNodes = dofGidList.size() / blkSize; - Teuchos::Array nodeList(numNodes); + size_t numNodes = dofGidList.size() / blkSize; + Teuchos::Array nodeList(numNodes); // Amalgamate the map for (LO i = 0; i < Teuchos::as(numNodes); i++) - nodeList[i] = (dofGidList[i*blkSize]-indexBase)/blkSize + indexBase; + nodeList[i] = (dofGidList[i * blkSize] - indexBase) / blkSize + indexBase; TEUCHOS_TEST_FOR_EXCEPTION(dofGidList.size() / blkSize != nodeList.size(), MueLu::Exceptions::RuntimeError, "Driver: Number of local DOFs and local Nodes inconsistent."); GO gCntGIDs = 0; GO glCntGIDs = Teuchos::as(nodeList.size()); - MueLu_sumAll(comm,glCntGIDs,gCntGIDs); + MueLu_sumAll(comm, glCntGIDs, gCntGIDs); - //Teuchos::Array eltList(myGIDs); - RCP myCoordMap = MapFactory::Build (xpetraParameters.GetLib(),gCntGIDs,nodeList(),indexBase,comm); + // Teuchos::Array eltList(myGIDs); + RCP myCoordMap = MapFactory::Build(xpetraParameters.GetLib(), gCntGIDs, nodeList(), indexBase, comm); fancyout << "Read fine level coordinates from file " << cooFileName << std::endl; - coordinates = Xpetra::IO::ReadMultiVector(std::string(cooFileName), myCoordMap); + coordinates = Xpetra::IO::ReadMultiVector(std::string(cooFileName), myCoordMap); fancyout << "Found " << coordinates->getNumVectors() << " coordinate vectors of length " << myCoordMap->getGlobalNumElements() << std::endl; /*TEUCHOS_TEST_FOR_EXCEPTION(myCoordMap->getMinGlobalIndex() != map->getMinGlobalIndex() / blkSize, MueLu::Exceptions::RuntimeError, "Driver: Inconsistent minGlobalIndex on proc " << comm->getRank()); @@ -354,55 +369,55 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg std::string line; Teuchos::Array mySpecialGids; Teuchos::Array nonSpecialGids; - GlobalOrdinal cnt = 0; // count overall number of gids - //GlobalOrdinal mycnt = 0; // count only local gids - while ( std::getline(infile, line)) { - if(0 == line.find("%")) continue; - if(0 == line.find(" ")) { + GlobalOrdinal cnt = 0; // count overall number of gids + // GlobalOrdinal mycnt = 0; // count only local gids + while (std::getline(infile, line)) { + if (0 == line.find("%")) continue; + if (0 == line.find(" ")) { cnt++; GlobalOrdinal gid; std::istringstream iss(line); iss >> gid; - gid--; // note, that the matlab vector starts counting at 1 and not 0! - if(map->isNodeGlobalElement(gid)) { + gid--; // note, that the matlab vector starts counting at 1 and not 0! + if (map->isNodeGlobalElement(gid)) { mySpecialGids.push_back(gid); - //mycnt++; + // mycnt++; } } } std::vector mySpecialNodeGids; - for(size_t k = 0; k < Teuchos::as(mySpecialGids.size()); k++) { - mySpecialNodeGids.push_back(mySpecialGids[k]/3); + for (size_t k = 0; k < Teuchos::as(mySpecialGids.size()); k++) { + mySpecialNodeGids.push_back(mySpecialGids[k] / 3); } - std::sort(mySpecialNodeGids.begin(),mySpecialNodeGids.end()); + std::sort(mySpecialNodeGids.begin(), mySpecialNodeGids.end()); mySpecialNodeGids.erase(std::unique(mySpecialNodeGids.begin(), mySpecialNodeGids.end()), mySpecialNodeGids.end()); cnt = 0; Teuchos::Array myFinalSpecialGids; - for(size_t k = 0; k < mySpecialNodeGids.size(); k++) { - myFinalSpecialGids.push_back(mySpecialNodeGids[k]*3); - myFinalSpecialGids.push_back(mySpecialNodeGids[k]*3+1); - myFinalSpecialGids.push_back(mySpecialNodeGids[k]*3+2); + for (size_t k = 0; k < mySpecialNodeGids.size(); k++) { + myFinalSpecialGids.push_back(mySpecialNodeGids[k] * 3); + myFinalSpecialGids.push_back(mySpecialNodeGids[k] * 3 + 1); + myFinalSpecialGids.push_back(mySpecialNodeGids[k] * 3 + 2); cnt += 3; } std::cout << "Number of special gids read: " << mySpecialGids.size() << " Final number of special gids: " << myFinalSpecialGids.size() << std::endl; - //Teuchos::Array eltList(mySpecialGids); - mySpecialMap = MapFactory::Build (xpetraParameters.GetLib(),cnt,myFinalSpecialGids(),0,comm); + // Teuchos::Array eltList(mySpecialGids); + mySpecialMap = MapFactory::Build(xpetraParameters.GetLib(), cnt, myFinalSpecialGids(), 0, comm); // empty processors - std::vector lelePerProc(comm->getSize(),0); - std::vector gelePerProc(comm->getSize(),0); + std::vector lelePerProc(comm->getSize(), 0); + std::vector gelePerProc(comm->getSize(), 0); lelePerProc[comm->getRank()] = mySpecialMap->getLocalNumElements(); - Teuchos::reduceAll(*comm,Teuchos::REDUCE_MAX,comm->getSize(),&lelePerProc[0],&gelePerProc[0]); - if(comm->getRank() == 0) { + Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, comm->getSize(), &lelePerProc[0], &gelePerProc[0]); + if (comm->getRank() == 0) { fancyout << "Distribution of " << cnt << " special dofs over processors:" << std::endl; fancyout << "Proc #DOFs" << std::endl; - for(int i=0; igetSize(); i++) { - fancyout << i << " " << gelePerProc[i] << std::endl; + for (int i = 0; i < comm->getSize(); i++) { + fancyout << i << " " << gelePerProc[i] << std::endl; } } } @@ -411,15 +426,14 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg tm = Teuchos::null; comm->barrier(); - tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 1.2 - LHS and RHS initialization"))); + tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 1.2 - LHS and RHS initialization"))); - RCP X = VectorFactory::Build(map,1); - RCP B = VectorFactory::Build(map,1); + RCP X = VectorFactory::Build(map, 1); + RCP B = VectorFactory::Build(map, 1); if (rhsFileName != "") - B = Xpetra::IO::ReadMultiVector(std::string(rhsFileName), A->getRowMap()); - else - { + B = Xpetra::IO::ReadMultiVector(std::string(rhsFileName), A->getRowMap()); + else { // we set seed for reproducibility X->setSeed(846930886); bool useSameRandomGen = false; @@ -428,7 +442,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg Teuchos::Array norms(1); B->norm2(norms); - //B->scale(1.0/norms[0]); + // B->scale(1.0/norms[0]); } X->putScalar(zero); @@ -443,7 +457,6 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 1.5 - MueLu read XML and feed MueLu"))); ParameterListInterpreter mueLuFactory(xmlFileName, *comm); - RCP H = mueLuFactory.CreateHierarchy(); // By default, we use Extreme. However, typically the xml file contains verbosity parameter @@ -457,7 +470,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // - split the linear system in a 2x2 multiphysics problem using Xpetra style gids // - split the linear system in a 2x2 multiphysics problem using Thyra style gids // - solve the problem as a monolithic linear system - if(blockedSystem == 1) { + if (blockedSystem == 1) { // split matrix and vectors // create map extractor @@ -474,16 +487,15 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg std::cout << "non special gids: " << nonSpecialGids.size() << std::endl; - //MapFactory::Build (xpetraParameters.GetLib(),Teuchos::OrdinalTraits::invalid(),nonSpecialGids(),0,comm); - std::vector strInfo(1,nPDE); - RCP myStridedNonSpecialMap = StridedMapFactory::Build(xpetraParameters.GetLib(),Teuchos::OrdinalTraits::invalid(),nonSpecialGids(),0,strInfo,comm); - RCP myStridedSpecialMap = StridedMapFactory::Build(xpetraParameters.GetLib(),Teuchos::OrdinalTraits::invalid(),specialGids() ,0,strInfo,comm); + // MapFactory::Build (xpetraParameters.GetLib(),Teuchos::OrdinalTraits::invalid(),nonSpecialGids(),0,comm); + std::vector strInfo(1, nPDE); + RCP myStridedNonSpecialMap = StridedMapFactory::Build(xpetraParameters.GetLib(), Teuchos::OrdinalTraits::invalid(), nonSpecialGids(), 0, strInfo, comm); + RCP myStridedSpecialMap = StridedMapFactory::Build(xpetraParameters.GetLib(), Teuchos::OrdinalTraits::invalid(), specialGids(), 0, strInfo, comm); // RCP myStridedSpecialMap = StridedMapFactory::Build(mySpecialMap, strInfo); - //std::cout << "map " << map->getMaxAllGlobalIndex() << " nonspecial " << myStridedNonSpecialMap->getMinAllGlobalIndex() << " " << myStridedNonSpecialMap->getMaxAllGlobalIndex() << " (" << myStridedNonSpecialMap->getGlobalNumElements() << ") special " << mySpecialMap->getMinAllGlobalIndex() << " " << mySpecialMap->getMaxAllGlobalIndex() << "(" << myStridedSpecialMap->getGlobalNumElements() << ")" << std::endl; - //std::cout << Teuchos::rcp_dynamic_cast >(myNonSpecialMap)->getEpetra_Map() << std::endl; - //std::cout << Teuchos::rcp_dynamic_cast >(myStridedSpecialMap)->getEpetra_Map() << std::endl; - + // std::cout << "map " << map->getMaxAllGlobalIndex() << " nonspecial " << myStridedNonSpecialMap->getMinAllGlobalIndex() << " " << myStridedNonSpecialMap->getMaxAllGlobalIndex() << " (" << myStridedNonSpecialMap->getGlobalNumElements() << ") special " << mySpecialMap->getMinAllGlobalIndex() << " " << mySpecialMap->getMaxAllGlobalIndex() << "(" << myStridedSpecialMap->getGlobalNumElements() << ")" << std::endl; + // std::cout << Teuchos::rcp_dynamic_cast >(myNonSpecialMap)->getEpetra_Map() << std::endl; + // std::cout << Teuchos::rcp_dynamic_cast >(myStridedSpecialMap)->getEpetra_Map() << std::endl; // We always build an Xpetra style map extractor with unique global ids TEUCHOS_TEST_FOR_EXCEPTION(map->getLocalNumElements() != myStridedNonSpecialMap->getLocalNumElements() + myStridedSpecialMap->getLocalNumElements(), MueLu::Exceptions::RuntimeError, "Driver: Number of DOFs on proc " << comm->getRank() << " is " << map->getLocalNumElements() << " and does not match the sum of the partial maps of size " << myStridedNonSpecialMap->getLocalNumElements() << " and " << myStridedSpecialMap->getLocalNumElements()); @@ -493,15 +505,15 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg xmaps.push_back(myStridedSpecialMap); // Xpetra mode - Teuchos::RCP > map_extractor = Xpetra::MapExtractorFactory::Build(map,xmaps); + Teuchos::RCP > map_extractor = Xpetra::MapExtractorFactory::Build(map, xmaps); // split null space vectors - RCP nullspace1 = map_extractor->ExtractVector(nullspace,0); - RCP nullspace2 = map_extractor->ExtractVector(nullspace,1); + RCP nullspace1 = map_extractor->ExtractVector(nullspace, 0); + RCP nullspace2 = map_extractor->ExtractVector(nullspace, 1); - bool bThyraMode = (useThyraGIDs==1) ? true : false; - Teuchos::RCP > bOp = - Xpetra::MatrixUtils::SplitMatrix(*A,map_extractor,map_extractor,Teuchos::null,bThyraMode); + bool bThyraMode = (useThyraGIDs == 1) ? true : false; + Teuchos::RCP > bOp = + Xpetra::MatrixUtils::SplitMatrix(*A, map_extractor, map_extractor, Teuchos::null, bThyraMode); // TODO plausibility checks // TODO set number of dofs per node @@ -513,8 +525,8 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg RCP coordinates1 = Teuchos::null; RCP coordinates2 = Teuchos::null; if (coordinates != Teuchos::null) { - TEUCHOS_TEST_FOR_EXCEPTION( myStridedNonSpecialMap->getLocalNumElements() % 3 != 0, MueLu::Exceptions::RuntimeError, "Driver: Number of DOFs of non-special map on proc " << comm->getRank() << " is " << myStridedNonSpecialMap->getLocalNumElements() << " and cannot divided by 3"); - TEUCHOS_TEST_FOR_EXCEPTION( myStridedSpecialMap->getLocalNumElements() % 3 != 0, MueLu::Exceptions::RuntimeError, "Driver: Number of DOFs of special map on proc " << comm->getRank() << " is " << myStridedSpecialMap->getLocalNumElements() << " and cannot divided by 3"); + TEUCHOS_TEST_FOR_EXCEPTION(myStridedNonSpecialMap->getLocalNumElements() % 3 != 0, MueLu::Exceptions::RuntimeError, "Driver: Number of DOFs of non-special map on proc " << comm->getRank() << " is " << myStridedNonSpecialMap->getLocalNumElements() << " and cannot divided by 3"); + TEUCHOS_TEST_FOR_EXCEPTION(myStridedSpecialMap->getLocalNumElements() % 3 != 0, MueLu::Exceptions::RuntimeError, "Driver: Number of DOFs of special map on proc " << comm->getRank() << " is " << myStridedSpecialMap->getLocalNumElements() << " and cannot divided by 3"); Teuchos::Array nonSpecialCoordGids; Teuchos::Array SpecialCoordGids; @@ -530,64 +542,63 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg nonSpecialCoordGids.append(gid); break; } else - TEUCHOS_TEST_FOR_EXCEPTION( true, MueLu::Exceptions::RuntimeError, "Driver: DofGid " << dofgid << " is neither contained in special nor in non-special map."); + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "Driver: DofGid " << dofgid << " is neither contained in special nor in non-special map."); } } - RCP myNonSpecialCoordsMap = MapFactory::Build(xpetraParameters.GetLib(),Teuchos::OrdinalTraits::invalid(),nonSpecialCoordGids(),0,comm); - TEUCHOS_TEST_FOR_EXCEPTION( myStridedNonSpecialMap->getLocalNumElements() / 3 != myNonSpecialCoordsMap->getLocalNumElements(), MueLu::Exceptions::RuntimeError, "Driver: Number of entries in non-special node map is inconsistent"); + RCP myNonSpecialCoordsMap = MapFactory::Build(xpetraParameters.GetLib(), Teuchos::OrdinalTraits::invalid(), nonSpecialCoordGids(), 0, comm); + TEUCHOS_TEST_FOR_EXCEPTION(myStridedNonSpecialMap->getLocalNumElements() / 3 != myNonSpecialCoordsMap->getLocalNumElements(), MueLu::Exceptions::RuntimeError, "Driver: Number of entries in non-special node map is inconsistent"); - RCP mySpecialCoordsMap = MapFactory::Build(xpetraParameters.GetLib(),Teuchos::OrdinalTraits::invalid(),SpecialCoordGids(),0,comm); - TEUCHOS_TEST_FOR_EXCEPTION( myStridedSpecialMap->getLocalNumElements() / 3 != mySpecialCoordsMap->getLocalNumElements(), MueLu::Exceptions::RuntimeError, "Driver: Number of entries in non-special node map is inconsistent"); + RCP mySpecialCoordsMap = MapFactory::Build(xpetraParameters.GetLib(), Teuchos::OrdinalTraits::invalid(), SpecialCoordGids(), 0, comm); + TEUCHOS_TEST_FOR_EXCEPTION(myStridedSpecialMap->getLocalNumElements() / 3 != mySpecialCoordsMap->getLocalNumElements(), MueLu::Exceptions::RuntimeError, "Driver: Number of entries in non-special node map is inconsistent"); std::vector > nodexmaps; nodexmaps.push_back(myNonSpecialCoordsMap); nodexmaps.push_back(mySpecialCoordsMap); - Teuchos::RCP > nodemap_extractor = Xpetra::MapExtractorFactory::Build(coordinates->getMap(),nodexmaps); + Teuchos::RCP > nodemap_extractor = Xpetra::MapExtractorFactory::Build(coordinates->getMap(), nodexmaps); // split coordinate vectors - coordinates1 = nodemap_extractor->ExtractVector(coordinates,0); - coordinates2 = nodemap_extractor->ExtractVector(coordinates,1); - //std::cout << Teuchos::rcp_dynamic_cast >(myNonSpecialCoordsMap)->getEpetra_Map() << std::endl; - //std::cout << Teuchos::rcp_dynamic_cast >(mySpecialCoordsMap)->getEpetra_Map() << std::endl; + coordinates1 = nodemap_extractor->ExtractVector(coordinates, 0); + coordinates2 = nodemap_extractor->ExtractVector(coordinates, 1); + // std::cout << Teuchos::rcp_dynamic_cast >(myNonSpecialCoordsMap)->getEpetra_Map() << std::endl; + // std::cout << Teuchos::rcp_dynamic_cast >(mySpecialCoordsMap)->getEpetra_Map() << std::endl; } - - if(bThyraMode == false) { + if (bThyraMode == false) { // use Xpetra style GIDs - H->GetLevel(0)->Set("A", Teuchos::rcp_dynamic_cast(bOp)); - H->GetLevel(0)->Set("Nullspace1", nullspace1); - H->GetLevel(0)->Set("Nullspace2", nullspace2); - H->GetLevel(0)->Set("Coordinates", coordinates); // TODO split coordinates for rebalancing! (or provide the full vector in the right map and split it in the factories!) + H->GetLevel(0)->Set("A", Teuchos::rcp_dynamic_cast(bOp)); + H->GetLevel(0)->Set("Nullspace1", nullspace1); + H->GetLevel(0)->Set("Nullspace2", nullspace2); + H->GetLevel(0)->Set("Coordinates", coordinates); // TODO split coordinates for rebalancing! (or provide the full vector in the right map and split it in the factories!) H->GetLevel(0)->Set("Coordinates1", coordinates1); H->GetLevel(0)->Set("Coordinates2", coordinates2); - if(mySpecialMap!=Teuchos::null) H->GetLevel(0)->Set("map SpecialMap", mySpecialMap); + if (mySpecialMap != Teuchos::null) H->GetLevel(0)->Set("map SpecialMap", mySpecialMap); } else { // use Thyra style GIDs - RCP nsp1shrinked = Xpetra::MatrixUtils::xpetraGidNumbering2ThyraGidNumbering(*nullspace1); - RCP nsp2shrinked = Xpetra::MatrixUtils::xpetraGidNumbering2ThyraGidNumbering(*nullspace2); - RCP coordsshrinked = Xpetra::MatrixUtils::xpetraGidNumbering2ThyraGidNumbering(*coordinates); - H->GetLevel(0)->Set("A", Teuchos::rcp_dynamic_cast(bOp)); - H->GetLevel(0)->Set("Nullspace1", nsp1shrinked); - H->GetLevel(0)->Set("Nullspace2", nsp2shrinked); + RCP nsp1shrinked = Xpetra::MatrixUtils::xpetraGidNumbering2ThyraGidNumbering(*nullspace1); + RCP nsp2shrinked = Xpetra::MatrixUtils::xpetraGidNumbering2ThyraGidNumbering(*nullspace2); + RCP coordsshrinked = Xpetra::MatrixUtils::xpetraGidNumbering2ThyraGidNumbering(*coordinates); + H->GetLevel(0)->Set("A", Teuchos::rcp_dynamic_cast(bOp)); + H->GetLevel(0)->Set("Nullspace1", nsp1shrinked); + H->GetLevel(0)->Set("Nullspace2", nsp2shrinked); H->GetLevel(0)->Set("Coordinates", coordsshrinked); // TODO split coordinates for rebalancing! (or provide the full vector in the right map and split it in the factories!) - if(mySpecialMap!=Teuchos::null) { - RCP specialmapshrinked = Xpetra::MapUtils::shrinkMapGIDs(*mySpecialMap,*mySpecialMap); + if (mySpecialMap != Teuchos::null) { + RCP specialmapshrinked = Xpetra::MapUtils::shrinkMapGIDs(*mySpecialMap, *mySpecialMap); H->GetLevel(0)->Set("map SpecialMap", Teuchos::rcp_const_cast(specialmapshrinked)); } // rearrange contents of rhs vector B RCP thy_map_extractor = bOp->getRangeMapExtractor(); - RCP Bshrinked = Xpetra::MultiVectorFactory::Build(thy_map_extractor->getFullMap(),B->getNumVectors(),true); + RCP Bshrinked = Xpetra::MultiVectorFactory::Build(thy_map_extractor->getFullMap(), B->getNumVectors(), true); size_t numMaps = map_extractor->NumMaps(); - for(size_t k = 0; k < numMaps; k++) { - RCP partVec = map_extractor->ExtractVector(B, k); - RCP newPartVec = thy_map_extractor->getVector(k, B->getNumVectors(), false); // we need real GIDs (not zero based Thyra GIDs) + for (size_t k = 0; k < numMaps; k++) { + RCP partVec = map_extractor->ExtractVector(B, k); + RCP newPartVec = thy_map_extractor->getVector(k, B->getNumVectors(), false); // we need real GIDs (not zero based Thyra GIDs) // copy data for (size_t c = 0; c < partVec->getNumVectors(); c++) { - Teuchos::ArrayRCP< const Scalar > data = partVec->getData(c); + Teuchos::ArrayRCP data = partVec->getData(c); for (size_t r = 0; r < partVec->getLocalLength(); r++) { newPartVec->replaceLocalValue(Teuchos::as(r), c, data[r]); } @@ -596,17 +607,17 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg } B = Bshrinked; - X = VectorFactory::Build(thy_map_extractor->getFullMap(),1); + X = VectorFactory::Build(thy_map_extractor->getFullMap(), 1); X->putScalar(zero); // TODO the ordering of the solution vector X is different! } } else { // standard (non-blocked) case - H->GetLevel(0)->Set("A", A); - H->GetLevel(0)->Set("Nullspace", nullspace); + H->GetLevel(0)->Set("A", A); + H->GetLevel(0)->Set("Nullspace", nullspace); H->GetLevel(0)->Set("Coordinates", coordinates); - if(mySpecialMap!=Teuchos::null) H->GetLevel(0)->Set("map SpecialMap", mySpecialMap); + if (mySpecialMap != Teuchos::null) H->GetLevel(0)->Set("map SpecialMap", mySpecialMap); } comm->barrier(); @@ -632,31 +643,31 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg comm->barrier(); if (amgAsSolver) { - tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 3 - Fixed Point Solve"))); + tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 3 - Fixed Point Solve"))); H->IsPreconditioner(false); Teuchos::Array norms(1); - norms = Utilities::ResidualNorm(*A,*X,*B); + norms = Utilities::ResidualNorm(*A, *X, *B); std::cout << " iter: 0 residual = " << norms[0] << std::endl; - for (int i=0; i< maxIts; ++i) { + for (int i = 0; i < maxIts; ++i) { H->Iterate(*B, *X); - norms = Utilities::ResidualNorm(*A,*X,*B); - std::cout << " iter: " << i+1 << " residual = " << norms[0] << std::endl; + norms = Utilities::ResidualNorm(*A, *X, *B); + std::cout << " iter: " << i + 1 << " residual = " << norms[0] << std::endl; } } else if (amgAsPrecond) { #ifdef HAVE_MUELU_BELOS tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 4 - Belos Solve"))); // Operator and Multivector type that will be used with Belos - typedef MultiVector MV; + typedef MultiVector MV; typedef Belos::OperatorT OP; H->IsPreconditioner(true); // Define Operator and Preconditioner - Teuchos::RCP belosOp = Teuchos::rcp(new Belos::XpetraOp(A)); // Turns a Xpetra::Matrix object into a Belos operator - Teuchos::RCP belosPrec = Teuchos::rcp(new Belos::MueLuOp(H)); // Turns a MueLu::Hierarchy object into a Belos operator + Teuchos::RCP belosOp = Teuchos::rcp(new Belos::XpetraOp(A)); // Turns a Xpetra::Matrix object into a Belos operator + Teuchos::RCP belosPrec = Teuchos::rcp(new Belos::MueLuOp(H)); // Turns a MueLu::Hierarchy object into a Belos operator // Construct a Belos LinearProblem object - RCP< Belos::LinearProblem > belosProblem = rcp(new Belos::LinearProblem(belosOp, X, B)); + RCP > belosProblem = rcp(new Belos::LinearProblem(belosOp, X, B)); belosProblem->setRightPrec(belosPrec); bool set = belosProblem->setProblem(); if (set == false) { @@ -666,15 +677,15 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Belos parameter list Teuchos::ParameterList belosList; - if(belosFileName == "") { - belosList.set("Maximum Iterations", maxIts); // Maximum number of iterations allowed - belosList.set("Convergence Tolerance", tol); // Relative convergence tolerance requested - belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); - belosList.set("Output Frequency", output); - belosList.set("Output Style", Belos::Brief); + if (belosFileName == "") { + belosList.set("Maximum Iterations", maxIts); // Maximum number of iterations allowed + belosList.set("Convergence Tolerance", tol); // Relative convergence tolerance requested + belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); + belosList.set("Output Frequency", output); + belosList.set("Output Style", Belos::Brief); if (convType == "none") { - belosList.set("Explicit Residual Scaling", "None"); - belosList.set("Implicit Residual Scaling", "None"); + belosList.set("Explicit Residual Scaling", "None"); + belosList.set("Implicit Residual Scaling", "None"); } } else { Teuchos::updateParametersFromXmlFileAndBroadcast(belosFileName, Teuchos::Ptr(&belosList), *comm); @@ -711,7 +722,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg } // Create an iterative solver manager - RCP< Belos::SolverManager > solver; + RCP > solver; if (krylovMethod == "cg") { solver = rcp(new Belos::BlockCGSolMgr(belosProblem, rcp(&belosList, false))); } else if (krylovMethod == "gmres") { @@ -728,22 +739,25 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Get the number of iterations for this solve. fancyout << "Number of iterations performed for this solve: " << solver->getNumIters() << std::endl; - } catch(...) { - fancyout << std::endl << "ERROR: Belos threw an error! " << std::endl; + } catch (...) { + fancyout << std::endl + << "ERROR: Belos threw an error! " << std::endl; } // Check convergence if (ret != Belos::Converged) - fancyout << std::endl << "ERROR: Belos did not converge! " << std::endl; + fancyout << std::endl + << "ERROR: Belos did not converge! " << std::endl; else - fancyout << std::endl << "SUCCESS: Belos converged!" << std::endl; -#endif //ifdef HAVE_MUELU_BELOS + fancyout << std::endl + << "SUCCESS: Belos converged!" << std::endl; +#endif // ifdef HAVE_MUELU_BELOS } comm->barrier(); tm = Teuchos::null; if (strOutputFilename.empty() == false) { tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 5 - Export solution in VTK"))); - if(comm->getSize() > 1) { + if (comm->getSize() > 1) { std::stringstream ss; ss << "-proc" << comm->getRank(); strOutputFilename.append(ss.str()); @@ -751,19 +765,19 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg strOutputFilename.append(".vtu"); std::ofstream fout(strOutputFilename); - ExportVTK expVTK; - expVTK.writeFile(fout,coordinates,X); + ExportVTK expVTK; + expVTK.writeFile(fout, coordinates, X); fout.close(); size_t start_pos = strOutputFilename.find(".vtu"); strOutputFilename.replace(start_pos, 4, ".m"); - Xpetra::IO::Write(strOutputFilename,*X); + Xpetra::IO::Write(strOutputFilename, *X); } // print timings comm->barrier(); - tm = Teuchos::null; + tm = Teuchos::null; globalTimeMonitor = Teuchos::null; if (printTimings) { @@ -775,17 +789,13 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg } TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); - return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); -} //main - + return (success ? EXIT_SUCCESS : EXIT_FAILURE); +} // main //- -- -------------------------------------------------------- #define MUELU_AUTOMATIC_TEST_ETI_NAME main_ #include "MueLu_Test_ETI.hpp" -int main(int argc, char *argv[]) { - return Automatic_Test_ETI(argc,argv); +int main(int argc, char* argv[]) { + return Automatic_Test_ETI(argc, argv); } - - - diff --git a/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_decl.hpp b/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_decl.hpp index 6b38a9f96750..588762b95df3 100644 --- a/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_decl.hpp +++ b/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_decl.hpp @@ -70,105 +70,103 @@ /*! @class BrickAggregationFactory @brief Aggregation method for generating "brick" aggregates. It also does "hotdogs" and "pancakes." - + This factory can generate aggregates of size 1, 2 or 3 in each dimension, in any combination. */ namespace MueLu { - template - class BrickAggregationFactory : public SingleLevelFactoryBase { +template +class BrickAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_BRICKAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - private: - typedef Teuchos::ScalarTraits STS; - - // Comparator for doubles - // Generally, the coordinates for coarser levels would come out of averaging of fine level coordinates - // It is possible that the result of the averaging differs slightly between clusters, as we might have - // 3x2 and 2x2 cluster which would result in averaging 6 and 4 y-coordinates respectively, leading to - // slightly different results. - // Therefore, we hardcode a constant so that close points are considered the same. - class compare { - public: - bool operator()(const Scalar& x, const Scalar& y) const { - if (STS::magnitude(x - y) < 1e-14) - return false; - return STS::real(x) < STS::real(y); - } - }; - typedef std::map container; + private: + typedef Teuchos::ScalarTraits STS; - public: - //! @name Constructors/Destructors. - //@{ + // Comparator for doubles + // Generally, the coordinates for coarser levels would come out of averaging of fine level coordinates + // It is possible that the result of the averaging differs slightly between clusters, as we might have + // 3x2 and 2x2 cluster which would result in averaging 6 and 4 y-coordinates respectively, leading to + // slightly different results. + // Therefore, we hardcode a constant so that close points are considered the same. + class compare { + public: + bool operator()(const Scalar& x, const Scalar& y) const { + if (STS::magnitude(x - y) < 1e-14) + return false; + return STS::real(x) < STS::real(y); + } + }; + typedef std::map container; - //! Constructor. - BrickAggregationFactory() : nDim_(-1), nx_(-1), ny_(-1), nz_(-1), bx_(-1), by_(-1), bz_(-1) { }; + public: + //! @name Constructors/Destructors. + //@{ - //! Destructor. - virtual ~BrickAggregationFactory() { } + //! Constructor. + BrickAggregationFactory() + : nDim_(-1) + , nx_(-1) + , ny_(-1) + , nz_(-1) + , bx_(-1) + , by_(-1) + , bz_(-1){}; - RCP GetValidParameterList() const; + //! Destructor. + virtual ~BrickAggregationFactory() {} - //@} + RCP GetValidParameterList() const; - // Options shared by all aggregation algorithms + //@} - //! Input - //@{ + // Options shared by all aggregation algorithms - void DeclareInput(Level ¤tLevel) const; + //! Input + //@{ - //@} + void DeclareInput(Level& currentLevel) const; - //! @name Build methods. - //@{ + //@} - /*! @brief Build aggregates. */ - void Build(Level ¤tLevel) const; + //! @name Build methods. + //@{ - //@} + /*! @brief Build aggregates. */ + void Build(Level& currentLevel) const; - private: - void Setup(const RCP >& comm, const RCP::magnitudeType,LO,GO,NO> >& coords, const RCP& map) const; - RCP Construct1DMap(const RCP >& comm, const ArrayRCP::magnitudeType>& x) const; + //@} - void BuildGraph(Level& currentLevel, const RCP& A) const; + private: + void Setup(const RCP >& comm, const RCP::magnitudeType, LO, GO, NO> >& coords, const RCP& map) const; + RCP Construct1DMap(const RCP >& comm, const ArrayRCP::magnitudeType>& x) const; + void BuildGraph(Level& currentLevel, const RCP& A) const; - bool isDirichlet(LocalOrdinal LID) const; - bool isRoot (LocalOrdinal LID) const; - GlobalOrdinal getRoot (LocalOrdinal LID) const; - GlobalOrdinal getAggGID(LocalOrdinal LID) const; + bool isDirichlet(LocalOrdinal LID) const; + bool isRoot(LocalOrdinal LID) const; + GlobalOrdinal getRoot(LocalOrdinal LID) const; + GlobalOrdinal getAggGID(LocalOrdinal LID) const; - void getIJK(LocalOrdinal LID, int &i, int &j, int &k) const; - void getAggIJK(LocalOrdinal LID, int &i, int &j, int &k) const; + void getIJK(LocalOrdinal LID, int& i, int& j, int& k) const; + void getAggIJK(LocalOrdinal LID, int& i, int& j, int& k) const; - mutable - int nDim_; - mutable - RCP xMap_, yMap_, zMap_; - mutable - ArrayRCP::magnitudeType> x_, y_, z_; - mutable - int nx_, ny_, nz_; - mutable - int bx_, by_, bz_; - mutable - bool dirichletX_,dirichletY_,dirichletZ_; - mutable - int naggx_, naggy_, naggz_; + mutable int nDim_; + mutable RCP xMap_, yMap_, zMap_; + mutable ArrayRCP::magnitudeType> x_, y_, z_; + mutable int nx_, ny_, nz_; + mutable int bx_, by_, bz_; + mutable bool dirichletX_, dirichletY_, dirichletZ_; + mutable int naggx_, naggy_, naggz_; - mutable - std::map revMap_; - }; // class BrickAggregationFactory + mutable std::map revMap_; +}; // class BrickAggregationFactory -} +} // namespace MueLu #define MUELU_BRICKAGGREGATIONFACTORY_SHORT #endif /* MUELU_BRICKAGGREGATIONFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_def.hpp b/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_def.hpp index aa6863becd89..7b7ab9f9e07e 100644 --- a/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_def.hpp @@ -70,518 +70,506 @@ #include "MueLu_Graph.hpp" #include "MueLu_LWGraph.hpp" - namespace MueLu { - template - RCP BrickAggregationFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP BrickAggregationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: brick x size"); - SET_VALID_ENTRY("aggregation: brick y size"); - SET_VALID_ENTRY("aggregation: brick z size"); - SET_VALID_ENTRY("aggregation: brick x Dirichlet"); - SET_VALID_ENTRY("aggregation: brick y Dirichlet"); - SET_VALID_ENTRY("aggregation: brick z Dirichlet"); -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory for matrix"); - validParamList->set< RCP >("Coordinates", Teuchos::null, "Generating factory for coordinates"); - return validParamList; + SET_VALID_ENTRY("aggregation: brick x size"); + SET_VALID_ENTRY("aggregation: brick y size"); + SET_VALID_ENTRY("aggregation: brick z size"); + SET_VALID_ENTRY("aggregation: brick x Dirichlet"); + SET_VALID_ENTRY("aggregation: brick y Dirichlet"); + SET_VALID_ENTRY("aggregation: brick z Dirichlet"); +#undef SET_VALID_ENTRY + + validParamList->set >("A", Teuchos::null, "Generating factory for matrix"); + validParamList->set >("Coordinates", Teuchos::null, "Generating factory for coordinates"); + return validParamList; +} + +template +void BrickAggregationFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + Input(currentLevel, "Coordinates"); +} + +// The current implementation cannot deal with bricks larger than 3x3(x3) in +// parallel. The reason is that aggregation infrastructure in place has +// major drawbacks. +// +// Aggregates class is constructed with a help of a provided map, either +// taken from a graph, or provided directly. This map is usually taken to be +// a column map of a matrix. The reason for that is that if we have an +// overlapped aggregation, we want the processor owning aggregates to store +// agg id for all nodes in this aggregate. If we used row map, there would +// be no way for the processor to know whether there are some other nodes on +// a different processor which belong to its aggregate. On the other hand, +// using column map allows both vertex2AggId and procWinner arrays in +// Aggregates class to store some extra data, such as whether nodes belonging +// to a different processor belong to this processor aggregate. +// +// The drawback of this is that it stores only overlap=1 data. For aggressive +// coarsening, such a brick aggregation with a large single dimension of +// brick, it could happen that we need to know depth two or more extra nodes +// in the other processor subdomain. +// +// Another issue is that we may have some implicit connection between +// aggregate map and maps of A used in the construction of a tentative +// prolongator. +// +// Another issue is that it seems that some info is unused or not required. +// Specifically, it seems that if a node belongs to an aggregate on a +// different processor, we don't actually need to set vertex2AggId and +// procWinner, despite the following comment in +// Aggregates decl: +// vertex2AggId[k] gives a local id +// corresponding to the aggregate to which +// local id k has been assigned. While k +// is the local id on my processor (MyPID) +// vertex2AggId[k] is the local id on the +// processor which actually owns the +// aggregate. This owning processor has id +// given by procWinner[k]. +// It is possible that that info is only used during arbitration in +// CoupledAggregationFactory. +// +// The steps that we need to do to resolve this issue: +// - Break the link between maps in TentativePFactory, allowing any maps in Aggregates +// - Allow Aggregates to construct their own maps, if necessary, OR +// - construct aggregates based on row map +template +void BrickAggregationFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + typedef Xpetra::MultiVector::magnitudeType, LO, GO, NO> MultiVector_d; + + const ParameterList& pL = GetParameterList(); + RCP coords = Get >(currentLevel, "Coordinates"); + RCP A = Get >(currentLevel, "A"); + RCP rowMap = A->getRowMap(); + RCP colMap = A->getColMap(); + GO GO_INVALID = Teuchos::OrdinalTraits::invalid(); + + RCP > comm = rowMap->getComm(); + int numProcs = comm->getSize(); + int myRank = comm->getRank(); + + int numPoints = colMap->getLocalNumElements(); + + bx_ = pL.get("aggregation: brick x size"); + by_ = pL.get("aggregation: brick y size"); + bz_ = pL.get("aggregation: brick z size"); + + dirichletX_ = pL.get("aggregation: brick x Dirichlet"); + dirichletY_ = pL.get("aggregation: brick y Dirichlet"); + dirichletZ_ = pL.get("aggregation: brick z Dirichlet"); + if (dirichletX_) GetOStream(Runtime0) << "Dirichlet boundaries in the x direction" << std::endl; + if (dirichletY_) GetOStream(Runtime0) << "Dirichlet boundaries in the y direction" << std::endl; + if (dirichletZ_) GetOStream(Runtime0) << "Dirichlet boundaries in the z direction" << std::endl; + + if (numProcs > 1) { + // TODO: deal with block size > 1 (see comments above) + // TEUCHOS_TEST_FOR_EXCEPTION(bx_ > 3 || by_ > 3 || bz_ > 3, Exceptions::RuntimeError, "Currently cannot deal with brick size > 3"); } - template - void BrickAggregationFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - Input(currentLevel, "Coordinates"); + RCP overlappedCoords = coords; + RCP importer = ImportFactory::Build(coords->getMap(), colMap); + if (!importer.is_null()) { + overlappedCoords = Xpetra::MultiVectorFactory::magnitudeType, LO, GO, NO>::Build(colMap, coords->getNumVectors()); + overlappedCoords->doImport(*coords, *importer, Xpetra::INSERT); } - // The current implementation cannot deal with bricks larger than 3x3(x3) in - // parallel. The reason is that aggregation infrastructure in place has - // major drawbacks. - // - // Aggregates class is constructed with a help of a provided map, either - // taken from a graph, or provided directly. This map is usually taken to be - // a column map of a matrix. The reason for that is that if we have an - // overlapped aggregation, we want the processor owning aggregates to store - // agg id for all nodes in this aggregate. If we used row map, there would - // be no way for the processor to know whether there are some other nodes on - // a different processor which belong to its aggregate. On the other hand, - // using column map allows both vertex2AggId and procWinner arrays in - // Aggregates class to store some extra data, such as whether nodes belonging - // to a different processor belong to this processor aggregate. - // - // The drawback of this is that it stores only overlap=1 data. For aggressive - // coarsening, such a brick aggregation with a large single dimension of - // brick, it could happen that we need to know depth two or more extra nodes - // in the other processor subdomain. - // - // Another issue is that we may have some implicit connection between - // aggregate map and maps of A used in the construction of a tentative - // prolongator. - // - // Another issue is that it seems that some info is unused or not required. - // Specifically, it seems that if a node belongs to an aggregate on a - // different processor, we don't actually need to set vertex2AggId and - // procWinner, despite the following comment in - // Aggregates decl: - // vertex2AggId[k] gives a local id - // corresponding to the aggregate to which - // local id k has been assigned. While k - // is the local id on my processor (MyPID) - // vertex2AggId[k] is the local id on the - // processor which actually owns the - // aggregate. This owning processor has id - // given by procWinner[k]. - // It is possible that that info is only used during arbitration in - // CoupledAggregationFactory. - // - // The steps that we need to do to resolve this issue: - // - Break the link between maps in TentativePFactory, allowing any maps in Aggregates - // - Allow Aggregates to construct their own maps, if necessary, OR - // - construct aggregates based on row map - template - void BrickAggregationFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - - typedef Xpetra::MultiVector::magnitudeType,LO,GO,NO> MultiVector_d; - - const ParameterList& pL = GetParameterList(); - RCP coords = Get >(currentLevel, "Coordinates"); - RCP A = Get< RCP > (currentLevel, "A"); - RCP rowMap = A->getRowMap(); - RCP colMap = A->getColMap(); - GO GO_INVALID = Teuchos::OrdinalTraits::invalid(); - - RCP > comm = rowMap->getComm(); - int numProcs = comm->getSize(); - int myRank = comm->getRank(); - - int numPoints = colMap->getLocalNumElements(); - - bx_ = pL.get("aggregation: brick x size"); - by_ = pL.get("aggregation: brick y size"); - bz_ = pL.get("aggregation: brick z size"); - - dirichletX_ = pL.get("aggregation: brick x Dirichlet"); - dirichletY_ = pL.get("aggregation: brick y Dirichlet"); - dirichletZ_ = pL.get("aggregation: brick z Dirichlet"); - if(dirichletX_) GetOStream(Runtime0) << "Dirichlet boundaries in the x direction"< 1) { - // TODO: deal with block size > 1 (see comments above) - //TEUCHOS_TEST_FOR_EXCEPTION(bx_ > 3 || by_ > 3 || bz_ > 3, Exceptions::RuntimeError, "Currently cannot deal with brick size > 3"); - } - - RCP overlappedCoords = coords; - RCP importer = ImportFactory::Build(coords->getMap(), colMap); - if (!importer.is_null()) { - overlappedCoords = Xpetra::MultiVectorFactory::magnitudeType,LO,GO,NO>::Build(colMap, coords->getNumVectors()); - overlappedCoords->doImport(*coords, *importer, Xpetra::INSERT); - } - - // Setup misc structures - // Logically, we construct enough data to query topological information of a rectangular grid - Setup(comm, overlappedCoords, colMap); - - GetOStream(Runtime0) << "Using brick size: " << bx_ - << (nDim_ > 1 ? "x " + toString(by_) : "") - << (nDim_ > 2 ? "x " + toString(bz_) : "") << std::endl; - - // Build the graph - BuildGraph(currentLevel,A); - - // Construct aggregates - RCP aggregates = rcp(new Aggregates(colMap)); - aggregates->setObjectLabel("Brick"); - - ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates->GetProcWinner() ->getDataNonConst(0); - - // In the first pass, we set a mapping from a vertex to aggregate global id. We deal with a structured - // rectangular mesh, therefore we know the structure of aggregates. For each vertex we can tell exactly - // which aggregate it belongs to. - // If we determine that the aggregate does not belong to us (i.e. the root vertex does not belong to this - // processor, or is outside and we lost "" arbitration), we record the global aggregate id in order to - // fetch the local info from the processor owning the aggregate. This is required for aggregates, as it - // uses the local aggregate ids of the owning processor. - std::set myAggGIDs, remoteAggGIDs; - for (LO LID = 0; LID < numPoints; LID++) { - GO aggGID = getAggGID(LID); - // printf("[%d] (%d,%d,%d) => agg %d\n",LID,(int)(*xMap_)[x_[LID]],nDim_ > 1 ? (int)(*yMap_)[y_[LID]] : -1,nDim_ > 2 ? (int)(*zMap_)[z_[LID]] : -1,(int)aggGID); - if(aggGID == GO_INVALID) continue; - // printf("[%d] getRoot = %d\n",(int)LID,(int)getRoot(LID)); - - if ((revMap_.find(getRoot(LID)) != revMap_.end()) && rowMap->isNodeGlobalElement(colMap->getGlobalElement(revMap_[getRoot(LID)]))) { - // Root of the brick aggregate containing GID (<- LID) belongs to us - vertex2AggId[LID] = aggGID; - myAggGIDs.insert(aggGID); - - if (isRoot(LID)) - aggregates->SetIsRoot(LID); - // printf("[%d] initial vertex2AggId = %d\n",(int)LID,(int)vertex2AggId[LID]); - } else { - remoteAggGIDs.insert(aggGID); - } - } - size_t numAggregates = myAggGIDs .size(); - size_t numRemote = remoteAggGIDs.size(); - aggregates->SetNumAggregates(numAggregates); - - std::map AggG2L; // Map: Agg GID -> Agg LID (possibly on a different processor) - std::map AggG2R; // Map: Agg GID -> processor rank owning aggregate - - Array myAggGIDsArray(numAggregates), remoteAggGIDsArray(numRemote); - - // Fill in the maps for aggregates that we own - size_t ind = 0; - for (typename std::set::const_iterator it = myAggGIDs.begin(); it != myAggGIDs.end(); it++) { - AggG2L[*it] = ind; - AggG2R[*it] = myRank; - - myAggGIDsArray[ind++] = *it; - } - - // The map is a convenient way to fetch remote local indices from global indices. - RCP aggMap = MapFactory::Build(rowMap->lib(), Teuchos::OrdinalTraits::invalid(), - myAggGIDsArray, 0, comm); - - ind = 0; - for (typename std::set::const_iterator it = remoteAggGIDs.begin(); it != remoteAggGIDs.end(); it++) - remoteAggGIDsArray[ind++] = *it; - - // Fetch the required aggregate local ids and ranks - Array remoteProcIDs(numRemote); - Array remoteLIDs (numRemote); - aggMap->getRemoteIndexList(remoteAggGIDsArray, remoteProcIDs, remoteLIDs); - - // Fill in the maps for aggregates that we don't own but which have some of our vertices - for (size_t i = 0; i < numRemote; i++) { - AggG2L[remoteAggGIDsArray[i]] = remoteLIDs [i]; - AggG2R[remoteAggGIDsArray[i]] = remoteProcIDs[i]; + // Setup misc structures + // Logically, we construct enough data to query topological information of a rectangular grid + Setup(comm, overlappedCoords, colMap); + + GetOStream(Runtime0) << "Using brick size: " << bx_ + << (nDim_ > 1 ? "x " + toString(by_) : "") + << (nDim_ > 2 ? "x " + toString(bz_) : "") << std::endl; + + // Build the graph + BuildGraph(currentLevel, A); + + // Construct aggregates + RCP aggregates = rcp(new Aggregates(colMap)); + aggregates->setObjectLabel("Brick"); + + ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates->GetProcWinner()->getDataNonConst(0); + + // In the first pass, we set a mapping from a vertex to aggregate global id. We deal with a structured + // rectangular mesh, therefore we know the structure of aggregates. For each vertex we can tell exactly + // which aggregate it belongs to. + // If we determine that the aggregate does not belong to us (i.e. the root vertex does not belong to this + // processor, or is outside and we lost "" arbitration), we record the global aggregate id in order to + // fetch the local info from the processor owning the aggregate. This is required for aggregates, as it + // uses the local aggregate ids of the owning processor. + std::set myAggGIDs, remoteAggGIDs; + for (LO LID = 0; LID < numPoints; LID++) { + GO aggGID = getAggGID(LID); + // printf("[%d] (%d,%d,%d) => agg %d\n",LID,(int)(*xMap_)[x_[LID]],nDim_ > 1 ? (int)(*yMap_)[y_[LID]] : -1,nDim_ > 2 ? (int)(*zMap_)[z_[LID]] : -1,(int)aggGID); + if (aggGID == GO_INVALID) continue; + // printf("[%d] getRoot = %d\n",(int)LID,(int)getRoot(LID)); + + if ((revMap_.find(getRoot(LID)) != revMap_.end()) && rowMap->isNodeGlobalElement(colMap->getGlobalElement(revMap_[getRoot(LID)]))) { + // Root of the brick aggregate containing GID (<- LID) belongs to us + vertex2AggId[LID] = aggGID; + myAggGIDs.insert(aggGID); + + if (isRoot(LID)) + aggregates->SetIsRoot(LID); + // printf("[%d] initial vertex2AggId = %d\n",(int)LID,(int)vertex2AggId[LID]); + } else { + remoteAggGIDs.insert(aggGID); } - - // Remap aggregate GIDs to LIDs and set up owning processors - for (LO LID = 0; LID < numPoints; LID++) { - if (revMap_.find(getRoot(LID)) != revMap_.end() && rowMap->isNodeGlobalElement(colMap->getGlobalElement(revMap_[getRoot(LID)]))) { - GO aggGID = vertex2AggId[LID]; - if(aggGID != MUELU_UNAGGREGATED) { - vertex2AggId[LID] = AggG2L[aggGID]; - procWinner [LID] = AggG2R[aggGID]; - } - } - } - - - GO numGlobalRemote; - MueLu_sumAll(comm, as(numRemote), numGlobalRemote); - aggregates->AggregatesCrossProcessors(numGlobalRemote); - - Set(currentLevel, "Aggregates", aggregates); - - GetOStream(Statistics1) << aggregates->description() << std::endl; } + size_t numAggregates = myAggGIDs.size(); + size_t numRemote = remoteAggGIDs.size(); + aggregates->SetNumAggregates(numAggregates); - template - void BrickAggregationFactory:: - Setup(const RCP >& comm, const RCP::magnitudeType,LO,GO,NO> >& coords, const RCP& /* map */) const { - nDim_ = coords->getNumVectors(); - - x_ = coords->getData(0); - xMap_ = Construct1DMap(comm, x_); - nx_ = xMap_->size(); + std::map AggG2L; // Map: Agg GID -> Agg LID (possibly on a different processor) + std::map AggG2R; // Map: Agg GID -> processor rank owning aggregate - ny_ = 1; - if (nDim_ > 1) { - y_ = coords->getData(1); - yMap_ = Construct1DMap(comm, y_); - ny_ = yMap_->size(); - } + Array myAggGIDsArray(numAggregates), remoteAggGIDsArray(numRemote); - nz_ = 1; - if (nDim_ > 2) { - z_ = coords->getData(2); - zMap_ = Construct1DMap(comm, z_); - nz_ = zMap_->size(); - } + // Fill in the maps for aggregates that we own + size_t ind = 0; + for (typename std::set::const_iterator it = myAggGIDs.begin(); it != myAggGIDs.end(); it++) { + AggG2L[*it] = ind; + AggG2R[*it] = myRank; - for (size_t ind = 0; ind < coords->getLocalLength(); ind++) { - GO i = (*xMap_)[(coords->getData(0))[ind]], j = 0, k = 0; - if (nDim_ > 1) - j = (*yMap_)[(coords->getData(1))[ind]]; - if (nDim_ > 2) - k = (*zMap_)[(coords->getData(2))[ind]]; - - revMap_[k*ny_*nx_ + j*nx_ + i] = ind; - } + myAggGIDsArray[ind++] = *it; + } - - // Get the number of aggregates in each direction, correcting for Dirichlet - int xboost = dirichletX_ ? 1 : 0; - int yboost = dirichletY_ ? 1 : 0; - int zboost = dirichletZ_ ? 1 : 0; - naggx_ = (nx_-2*xboost)/bx_ + ((nx_-2*xboost) % bx_ ? 1 : 0); + // The map is a convenient way to fetch remote local indices from global indices. + RCP aggMap = MapFactory::Build(rowMap->lib(), Teuchos::OrdinalTraits::invalid(), + myAggGIDsArray, 0, comm); - if(nDim_ > 1) - naggy_ = (ny_-2*yboost)/by_ + ( (ny_-2*yboost) % by_ ? 1 : 0); - else - naggy_ = 1; + ind = 0; + for (typename std::set::const_iterator it = remoteAggGIDs.begin(); it != remoteAggGIDs.end(); it++) + remoteAggGIDsArray[ind++] = *it; - if(nDim_ > 2) - naggz_ = (nz_-2*zboost)/bz_ + ( (nz_-2*zboost) % bz_ ? 1 : 0); - else - naggz_ = 1; + // Fetch the required aggregate local ids and ranks + Array remoteProcIDs(numRemote); + Array remoteLIDs(numRemote); + aggMap->getRemoteIndexList(remoteAggGIDsArray, remoteProcIDs, remoteLIDs); + // Fill in the maps for aggregates that we don't own but which have some of our vertices + for (size_t i = 0; i < numRemote; i++) { + AggG2L[remoteAggGIDsArray[i]] = remoteLIDs[i]; + AggG2R[remoteAggGIDsArray[i]] = remoteProcIDs[i]; } - template - RCP::container> - BrickAggregationFactory:: - Construct1DMap (const RCP >& comm, - const ArrayRCP::magnitudeType>& x) const - { - int n = x.size(); - - // Step 1: Create a local vector with unique coordinate points - RCP gMap = rcp(new container); - for (int i = 0; i < n; i++) - (*gMap)[x[i]] = 0; - -#ifdef HAVE_MPI - // Step 2: exchange coordinates - // NOTE: we assume the coordinates are double, or double compatible - // That means that for complex case, we assume that all imaginary parts are zeros - int numProcs = comm->getSize(); - if (numProcs > 1) { - RCP > dupMpiComm = rcp_dynamic_cast >(comm->duplicate()); - - MPI_Comm rawComm = (*dupMpiComm->getRawMpiComm())(); - - int sendCnt = gMap->size(), cnt = 0, recvSize; - Array recvCnt(numProcs), Displs(numProcs); - Array sendBuf, recvBuf; - - sendBuf.resize(sendCnt); - for (typename container::const_iterator cit = gMap->begin(); cit != gMap->end(); cit++) - sendBuf[cnt++] = Teuchos::as(STS::real(cit->first)); - - MPI_Allgather(&sendCnt, 1, MPI_INT, recvCnt.getRawPtr(), 1, MPI_INT, rawComm); - Displs[0] = 0; - for (int i = 0; i < numProcs-1; i++) - Displs[i+1] = Displs[i] + recvCnt[i]; - recvSize = Displs[numProcs-1] + recvCnt[numProcs-1]; - recvBuf.resize(recvSize); - MPI_Allgatherv(sendBuf.getRawPtr(), sendCnt, MPI_DOUBLE, recvBuf.getRawPtr(), recvCnt.getRawPtr(), Displs.getRawPtr(), MPI_DOUBLE, rawComm); - - for (int i = 0; i < recvSize; i++) - (*gMap)[as(recvBuf[i])] = 0; + // Remap aggregate GIDs to LIDs and set up owning processors + for (LO LID = 0; LID < numPoints; LID++) { + if (revMap_.find(getRoot(LID)) != revMap_.end() && rowMap->isNodeGlobalElement(colMap->getGlobalElement(revMap_[getRoot(LID)]))) { + GO aggGID = vertex2AggId[LID]; + if (aggGID != MUELU_UNAGGREGATED) { + vertex2AggId[LID] = AggG2L[aggGID]; + procWinner[LID] = AggG2R[aggGID]; + } } -#endif - - GO cnt = 0; - for (typename container::iterator it = gMap->begin(); it != gMap->end(); it++) - it->second = cnt++; - - return gMap; } - template - bool BrickAggregationFactory::isRoot(LocalOrdinal LID) const { - int i,j,k; - getIJK(LID,i,j,k); + GO numGlobalRemote; + MueLu_sumAll(comm, as(numRemote), numGlobalRemote); + aggregates->AggregatesCrossProcessors(numGlobalRemote); - return (k*ny_*nx_ + j*nx_ + i) == getRoot(LID); - } - - template - bool BrickAggregationFactory::isDirichlet(LocalOrdinal LID) const { - bool boundary = false; - int i,j,k; - getIJK(LID,i,j,k); - if( dirichletX_ && (i == 0 || i == nx_-1) ) - boundary = true; - if(nDim_ > 1 && dirichletY_ && (j == 0 || j == ny_-1) ) - boundary = true; - if(nDim_ > 2 && dirichletZ_ && (k == 0 || k == nz_-1) ) - boundary = true; - - return boundary; - } + Set(currentLevel, "Aggregates", aggregates); + GetOStream(Statistics1) << aggregates->description() << std::endl; +} - template - GlobalOrdinal BrickAggregationFactory::getRoot(LocalOrdinal LID) const { - if(isDirichlet(LID)) - return Teuchos::OrdinalTraits::invalid(); +template +void BrickAggregationFactory:: + Setup(const RCP >& comm, const RCP::magnitudeType, LO, GO, NO> >& coords, const RCP& /* map */) const { + nDim_ = coords->getNumVectors(); - int aggI,aggJ,aggK; - getAggIJK(LID,aggI,aggJ,aggK); - int xboost = dirichletX_ ? 1 : 0; - int yboost = dirichletY_ ? 1 : 0; - int zboost = dirichletZ_ ? 1 : 0; - - int i = xboost + aggI*bx_ + (bx_-1)/2; - int j = (nDim_>1) ? yboost + aggJ*by_ + (by_-1)/2 : 0; - int k = (nDim_>2) ? zboost + aggK*bz_ + (bz_-1)/2 : 0; + x_ = coords->getData(0); + xMap_ = Construct1DMap(comm, x_); + nx_ = xMap_->size(); - return k*ny_*nx_ + j*nx_ + i; + ny_ = 1; + if (nDim_ > 1) { + y_ = coords->getData(1); + yMap_ = Construct1DMap(comm, y_); + ny_ = yMap_->size(); } - template - void BrickAggregationFactory::getIJK(LocalOrdinal LID, int &i, int &j, int &k) const { - i = (*xMap_)[x_[LID]]; - j = (nDim_>1) ? (*yMap_)[y_[LID]] : 0; - k = (nDim_>2) ? (*zMap_)[z_[LID]] : 0; + nz_ = 1; + if (nDim_ > 2) { + z_ = coords->getData(2); + zMap_ = Construct1DMap(comm, z_); + nz_ = zMap_->size(); } + for (size_t ind = 0; ind < coords->getLocalLength(); ind++) { + GO i = (*xMap_)[(coords->getData(0))[ind]], j = 0, k = 0; + if (nDim_ > 1) + j = (*yMap_)[(coords->getData(1))[ind]]; + if (nDim_ > 2) + k = (*zMap_)[(coords->getData(2))[ind]]; - template - void BrickAggregationFactory::getAggIJK(LocalOrdinal LID, int &i, int &j, int &k) const { - int xboost = dirichletX_ ? 1 : 0; - int yboost = dirichletY_ ? 1 : 0; - int zboost = dirichletZ_ ? 1 : 0; - int pointI, pointJ, pointK; - getIJK(LID,pointI,pointJ,pointK); - i = (pointI-xboost)/bx_; - - if (nDim_ > 1) j = (pointJ-yboost)/by_; - else j = 0; - - if (nDim_ > 2) k = (pointK-zboost)/bz_; - else k = 0; + revMap_[k * ny_ * nx_ + j * nx_ + i] = ind; } - template - GlobalOrdinal BrickAggregationFactory::getAggGID(LocalOrdinal LID) const { - bool boundary = false; - - int i, j, k; - getIJK(LID,i,j,k); - int ii , jj, kk; - getAggIJK(LID,ii,jj,kk); - - if( dirichletX_ && (i == 0 || i == nx_ - 1)) boundary = true; - if (nDim_ > 1 && dirichletY_ && (j == 0 || j == ny_ - 1)) boundary = true; - if (nDim_ > 2 && dirichletZ_ && (k == 0 || k == nz_ - 1)) boundary = true; - - /* - if(boundary) - printf("[%d] coord = (%d,%d,%d) {%d,%d,%d} agg = (%d,%d,%d) {%d,%d,%d} => agg %s\n",LID,i,j,k,nx_,ny_,nz_,ii,jj,kk,naggx_,naggy_,naggz_,"BOUNDARY"); - else - printf("[%d] coord = (%d,%d,%d) {%d,%d,%d} agg = (%d,%d,%d) {%d,%d,%d} => agg %d\n",LID,i,j,k,nx_,ny_,nz_,ii,jj,kk,naggx_,naggy_,naggz_,kk*naggy_*naggx_ + jj*naggx_ + ii); - */ - - if (boundary) - return Teuchos::OrdinalTraits::invalid(); - else - return Teuchos::as(kk*naggy_*naggx_) + Teuchos::as(jj*naggx_) + ii; - - } + // Get the number of aggregates in each direction, correcting for Dirichlet + int xboost = dirichletX_ ? 1 : 0; + int yboost = dirichletY_ ? 1 : 0; + int zboost = dirichletZ_ ? 1 : 0; + naggx_ = (nx_ - 2 * xboost) / bx_ + ((nx_ - 2 * xboost) % bx_ ? 1 : 0); + + if (nDim_ > 1) + naggy_ = (ny_ - 2 * yboost) / by_ + ((ny_ - 2 * yboost) % by_ ? 1 : 0); + else + naggy_ = 1; + + if (nDim_ > 2) + naggz_ = (nz_ - 2 * zboost) / bz_ + ((nz_ - 2 * zboost) % bz_ ? 1 : 0); + else + naggz_ = 1; +} + +template +RCP::container> +BrickAggregationFactory:: + Construct1DMap(const RCP >& comm, + const ArrayRCP::magnitudeType>& x) const { + int n = x.size(); + + // Step 1: Create a local vector with unique coordinate points + RCP gMap = rcp(new container); + for (int i = 0; i < n; i++) + (*gMap)[x[i]] = 0; +#ifdef HAVE_MPI + // Step 2: exchange coordinates + // NOTE: we assume the coordinates are double, or double compatible + // That means that for complex case, we assume that all imaginary parts are zeros + int numProcs = comm->getSize(); + if (numProcs > 1) { + RCP > dupMpiComm = rcp_dynamic_cast >(comm->duplicate()); + + MPI_Comm rawComm = (*dupMpiComm->getRawMpiComm())(); + + int sendCnt = gMap->size(), cnt = 0, recvSize; + Array recvCnt(numProcs), Displs(numProcs); + Array sendBuf, recvBuf; + + sendBuf.resize(sendCnt); + for (typename container::const_iterator cit = gMap->begin(); cit != gMap->end(); cit++) + sendBuf[cnt++] = Teuchos::as(STS::real(cit->first)); + + MPI_Allgather(&sendCnt, 1, MPI_INT, recvCnt.getRawPtr(), 1, MPI_INT, rawComm); + Displs[0] = 0; + for (int i = 0; i < numProcs - 1; i++) + Displs[i + 1] = Displs[i] + recvCnt[i]; + recvSize = Displs[numProcs - 1] + recvCnt[numProcs - 1]; + recvBuf.resize(recvSize); + MPI_Allgatherv(sendBuf.getRawPtr(), sendCnt, MPI_DOUBLE, recvBuf.getRawPtr(), recvCnt.getRawPtr(), Displs.getRawPtr(), MPI_DOUBLE, rawComm); + + for (int i = 0; i < recvSize; i++) + (*gMap)[as(recvBuf[i])] = 0; + } +#endif - template - void BrickAggregationFactory::BuildGraph(Level& currentLevel, const RCP& A) const { - // TODO: Currently only works w/ 1 DOF per node - double dirichletThreshold = 0.0; - - if(bx_ > 1 && (nDim_ <= 1 || by_ > 1) && (nDim_ <=2 || bz_>1) ) { - FactoryMonitor m(*this, "Generating Graph (trivial)", currentLevel); - /*** Case 1: Use the matrix is the graph ***/ - // Bricks are of non-trivial size in all active dimensions - RCP graph = rcp(new Graph(A->getCrsGraph(), "graph of A")); - ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - graph->SetBoundaryNodeMap(boundaryNodes); - - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < boundaryNodes.size(); ++i) - if (boundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; - } - Set(currentLevel, "DofsPerNode", 1); - Set(currentLevel, "Graph", graph); - Set(currentLevel, "Filtering",false); + GO cnt = 0; + for (typename container::iterator it = gMap->begin(); it != gMap->end(); it++) + it->second = cnt++; + + return gMap; +} + +template +bool BrickAggregationFactory::isRoot(LocalOrdinal LID) const { + int i, j, k; + getIJK(LID, i, j, k); + + return (k * ny_ * nx_ + j * nx_ + i) == getRoot(LID); +} + +template +bool BrickAggregationFactory::isDirichlet(LocalOrdinal LID) const { + bool boundary = false; + int i, j, k; + getIJK(LID, i, j, k); + if (dirichletX_ && (i == 0 || i == nx_ - 1)) + boundary = true; + if (nDim_ > 1 && dirichletY_ && (j == 0 || j == ny_ - 1)) + boundary = true; + if (nDim_ > 2 && dirichletZ_ && (k == 0 || k == nz_ - 1)) + boundary = true; + + return boundary; +} + +template +GlobalOrdinal BrickAggregationFactory::getRoot(LocalOrdinal LID) const { + if (isDirichlet(LID)) + return Teuchos::OrdinalTraits::invalid(); + + int aggI, aggJ, aggK; + getAggIJK(LID, aggI, aggJ, aggK); + int xboost = dirichletX_ ? 1 : 0; + int yboost = dirichletY_ ? 1 : 0; + int zboost = dirichletZ_ ? 1 : 0; + + int i = xboost + aggI * bx_ + (bx_ - 1) / 2; + int j = (nDim_ > 1) ? yboost + aggJ * by_ + (by_ - 1) / 2 : 0; + int k = (nDim_ > 2) ? zboost + aggK * bz_ + (bz_ - 1) / 2 : 0; + + return k * ny_ * nx_ + j * nx_ + i; +} + +template +void BrickAggregationFactory::getIJK(LocalOrdinal LID, int& i, int& j, int& k) const { + i = (*xMap_)[x_[LID]]; + j = (nDim_ > 1) ? (*yMap_)[y_[LID]] : 0; + k = (nDim_ > 2) ? (*zMap_)[z_[LID]] : 0; +} + +template +void BrickAggregationFactory::getAggIJK(LocalOrdinal LID, int& i, int& j, int& k) const { + int xboost = dirichletX_ ? 1 : 0; + int yboost = dirichletY_ ? 1 : 0; + int zboost = dirichletZ_ ? 1 : 0; + int pointI, pointJ, pointK; + getIJK(LID, pointI, pointJ, pointK); + i = (pointI - xboost) / bx_; + + if (nDim_ > 1) + j = (pointJ - yboost) / by_; + else + j = 0; + + if (nDim_ > 2) + k = (pointK - zboost) / bz_; + else + k = 0; +} + +template +GlobalOrdinal BrickAggregationFactory::getAggGID(LocalOrdinal LID) const { + bool boundary = false; + + int i, j, k; + getIJK(LID, i, j, k); + int ii, jj, kk; + getAggIJK(LID, ii, jj, kk); + + if (dirichletX_ && (i == 0 || i == nx_ - 1)) boundary = true; + if (nDim_ > 1 && dirichletY_ && (j == 0 || j == ny_ - 1)) boundary = true; + if (nDim_ > 2 && dirichletZ_ && (k == 0 || k == nz_ - 1)) boundary = true; + + /* + if(boundary) + printf("[%d] coord = (%d,%d,%d) {%d,%d,%d} agg = (%d,%d,%d) {%d,%d,%d} => agg %s\n",LID,i,j,k,nx_,ny_,nz_,ii,jj,kk,naggx_,naggy_,naggz_,"BOUNDARY"); + else + printf("[%d] coord = (%d,%d,%d) {%d,%d,%d} agg = (%d,%d,%d) {%d,%d,%d} => agg %d\n",LID,i,j,k,nx_,ny_,nz_,ii,jj,kk,naggx_,naggy_,naggz_,kk*naggy_*naggx_ + jj*naggx_ + ii); + */ + + if (boundary) + return Teuchos::OrdinalTraits::invalid(); + else + return Teuchos::as(kk * naggy_ * naggx_) + Teuchos::as(jj * naggx_) + ii; +} + +template +void BrickAggregationFactory::BuildGraph(Level& currentLevel, const RCP& A) const { + // TODO: Currently only works w/ 1 DOF per node + double dirichletThreshold = 0.0; + + if (bx_ > 1 && (nDim_ <= 1 || by_ > 1) && (nDim_ <= 2 || bz_ > 1)) { + FactoryMonitor m(*this, "Generating Graph (trivial)", currentLevel); + /*** Case 1: Use the matrix is the graph ***/ + // Bricks are of non-trivial size in all active dimensions + RCP graph = rcp(new Graph(A->getCrsGraph(), "graph of A")); + ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); + graph->SetBoundaryNodeMap(boundaryNodes); + + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < boundaryNodes.size(); ++i) + if (boundaryNodes[i]) + numLocalBoundaryNodes++; + RCP > comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; } - else { - FactoryMonitor m(*this, "Generating Graph", currentLevel); - /*** Case 2: Dropping required ***/ - // There is at least one active dimension in which we are not coarsening. - // Those connections need to be dropped - bool drop_x = (bx_ == 1); - bool drop_y = (nDim_> 1 && by_ == 1); - bool drop_z = (nDim_> 2 && bz_ == 1); - - ArrayRCP rows (A->getLocalNumRows()+1); - ArrayRCP columns(A->getLocalNumEntries()); - - size_t N = A->getRowMap()->getLocalNumElements(); - - // FIXME: Do this on the host because indexing functions are host functions - auto G = A->getLocalMatrixHost().graph; - auto rowptr = G.row_map; - auto colind = G.entries; - - int ct=0; - rows[0] = 0; - for(size_t row=0; rowgetColMap()->getLocalElement(A->getRowMap()->getGlobalElement(row)); - getIJK(row2,ir,jr,kr); - - for(size_t cidx=rowptr[row]; cidx 1 && by_ == 1); + bool drop_z = (nDim_ > 2 && bz_ == 1); + + ArrayRCP rows(A->getLocalNumRows() + 1); + ArrayRCP columns(A->getLocalNumEntries()); + + size_t N = A->getRowMap()->getLocalNumElements(); + + // FIXME: Do this on the host because indexing functions are host functions + auto G = A->getLocalMatrixHost().graph; + auto rowptr = G.row_map; + auto colind = G.entries; + + int ct = 0; + rows[0] = 0; + for (size_t row = 0; row < N; row++) { + // NOTE: Assumes that the first part of the colmap is the rowmap + int ir, jr, kr; + LO row2 = A->getColMap()->getLocalElement(A->getRowMap()->getGlobalElement(row)); + getIJK(row2, ir, jr, kr); + + for (size_t cidx = rowptr[row]; cidx < rowptr[row + 1]; cidx++) { + int ic, jc, kc; + LO col = colind[cidx]; + getIJK(col, ic, jc, kc); + + if ((row2 != col) && ((drop_x && ir != ic) || (drop_y && jr != jc) || (drop_z && kr != kc))) { + // Drop it + // printf("[%4d] DROP row = (%d,%d,%d) col = (%d,%d,%d)\n",(int)row,ir,jr,kr,ic,jc,kc); + } else { + // Keep it + // printf("[%4d] KEEP row = (%d,%d,%d) col = (%d,%d,%d)\n",(int)row,ir,jr,kr,ic,jc,kc); + columns[ct] = col; + ct++; } - rows[row+1] = ct; - }//end for - - RCP graph = rcp(new LWGraph(rows, columns, A->getRowMap(), A->getColMap(), "thresholded graph of A")); - - - ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - graph->SetBoundaryNodeMap(boundaryNodes); - - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < boundaryNodes.size(); ++i) - if (boundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; } - Set(currentLevel, "DofsPerNode", 1); - Set(currentLevel, "Graph", graph); - Set(currentLevel, "Filtering",true); - }//end else - - - }//end BuildGraph - - + rows[row + 1] = ct; + } // end for + + RCP graph = rcp(new LWGraph(rows, columns, A->getRowMap(), A->getColMap(), "thresholded graph of A")); + + ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); + graph->SetBoundaryNodeMap(boundaryNodes); + + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < boundaryNodes.size(); ++i) + if (boundaryNodes[i]) + numLocalBoundaryNodes++; + RCP > comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; + } + Set(currentLevel, "DofsPerNode", 1); + Set(currentLevel, "Graph", graph); + Set(currentLevel, "Filtering", true); + } // end else +} // end BuildGraph -} //namespace MueLu +} // namespace MueLu #endif /* MUELU_BRICKAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/Containers/MueLu_Aggregates_decl.hpp b/packages/muelu/src/Graph/Containers/MueLu_Aggregates_decl.hpp index 056673b7b9db..b11e8584442e 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_Aggregates_decl.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_Aggregates_decl.hpp @@ -66,20 +66,20 @@ #include "MueLu_IndexManager.hpp" #include "MueLu_IndexManager_kokkos.hpp" -#define MUELU_UNAGGREGATED -1 /* indicates that a node is unassigned to */ - /* any aggregate. */ - -#define MUELU_UNASSIGNED -1 /* indicates a vertex is not yet claimed */ - /* by a processor during aggregation. */ - /* Note, it is possible at */ - /* this stage that some processors may have*/ - /* claimed their copy of a vertex for one */ - /* of their aggregates. However, some */ - /* arbitration still needs to occur. */ - /* The corresponding procWinner[]'s remain */ - /* as MUELU_UNASSIGNED until */ - /* ArbitrateAndCommunicate() is */ - /* invoked to arbitrate. */ +#define MUELU_UNAGGREGATED -1 /* indicates that a node is unassigned to */ + /* any aggregate. */ + +#define MUELU_UNASSIGNED -1 /* indicates a vertex is not yet claimed */ + /* by a processor during aggregation. */ + /* Note, it is possible at */ + /* this stage that some processors may have*/ + /* claimed their copy of a vertex for one */ + /* of their aggregates. However, some */ + /* arbitration still needs to occur. */ + /* The corresponding procWinner[]'s remain */ + /* as MUELU_UNASSIGNED until */ + /* ArbitrateAndCommunicate() is */ + /* invoked to arbitrate. */ /***************************************************************************** @@ -102,275 +102,272 @@ namespace MueLu { where rows (or vertices) correspond to aggregates and colunmns (or edges) correspond to nodes. While not strictly necessary, it might be convenient. */ - template - class Aggregates; - - template - class Aggregates > : public BaseClass { - public: - using local_ordinal_type = LocalOrdinal; - using global_ordinal_type = GlobalOrdinal; - using execution_space = typename DeviceType::execution_space; - using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; - using device_type = DeviceType; - using range_type = Kokkos::RangePolicy; - using LO_view = Kokkos::View; - - using aggregates_sizes_type = Kokkos::View; - - private: - // For compatibility - typedef node_type Node; +template +class Aggregates; + +template +class Aggregates > : public BaseClass { + public: + using local_ordinal_type = LocalOrdinal; + using global_ordinal_type = GlobalOrdinal; + using execution_space = typename DeviceType::execution_space; + using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; + using device_type = DeviceType; + using range_type = Kokkos::RangePolicy; + using LO_view = Kokkos::View; + + using aggregates_sizes_type = Kokkos::View; + + private: + // For compatibility + typedef node_type Node; #undef MUELU_AGGREGATES_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - - // Defining types that require the short names included above - using local_graph_type = typename LWGraph_kokkos::local_graph_type; - using colors_view_type = Kokkos::View; - - /*! @brief Standard constructor for Aggregates structure - * - * Standard constructor of aggregates takes a Graph object as parameter. - * Uses the graph.GetImportMap() to initialize the internal vector for mapping nodes to (local) aggregate ids as well as - * the mapping of node to the owning processor id. - * - */ - Aggregates(const GraphBase & graph); - - /*! @brief Standard constructor for Aggregates structure - * - * Standard constructor of aggregates takes a LWGraph object as parameter. - * Uses the graph.GetImportMap() to initialize the internal vector for mapping nodes to (local) aggregate ids as well as - * the mapping of node to the owning processor id. - * - */ - Aggregates(LWGraph_kokkos graph); - - /*! @brief Constructor for Aggregates structure - * - * This constructor takes a RCP pointer to a map which is used for the internal mappings of nodes to the (local) aggregate ids and the owning processor. - * - */ - Aggregates(const RCP& map); - - /*! @brief Destructor - * - */ - virtual ~Aggregates() { } - - //! @name Set/Get Methods for specific aggregation data - //@{ - - /*! @brief Get the index manager used by structured aggregation algorithms. - This has to be done by the aggregation factory. - */ - RCP& GetIndexManagerKokkos() { return geoDataKokkos_; } - - /*! @brief Set the index manager used by structured aggregation algorithms. - This has to be done by the aggregation factory. - */ - void SetIndexManagerKokkos(RCP & geoDataKokkos) { geoDataKokkos_ = geoDataKokkos; } - - /*! @brief Get the index manager used by various aggregation algorithms. - This has to be done by the aggregation factory. - */ - RCP& GetIndexManager() { return geoData_; } - - /*! @brief Set the index manager used by various aggregation algorithms. - This has to be done by the aggregation factory. - */ - void SetIndexManager(RCP & geoData) { geoData_ = geoData; } - - /*! @brief Get a distance 2 coloring of the underlying graph. - The coloring is computed and set during Phase1 of aggregation. - */ - colors_view_type& GetGraphColors() { return graphColors_; } - - /*! @brief Set a distance 2 coloring of the underlying graph. - The coloring is computed and set during Phase1 of aggregation. - */ - void SetGraphColors(colors_view_type graphColors) { graphColors_ = graphColors; } + public: + // Defining types that require the short names included above + using local_graph_type = typename LWGraph_kokkos::local_graph_type; + using colors_view_type = Kokkos::View; + + /*! @brief Standard constructor for Aggregates structure + * + * Standard constructor of aggregates takes a Graph object as parameter. + * Uses the graph.GetImportMap() to initialize the internal vector for mapping nodes to (local) aggregate ids as well as + * the mapping of node to the owning processor id. + * + */ + Aggregates(const GraphBase& graph); + + /*! @brief Standard constructor for Aggregates structure + * + * Standard constructor of aggregates takes a LWGraph object as parameter. + * Uses the graph.GetImportMap() to initialize the internal vector for mapping nodes to (local) aggregate ids as well as + * the mapping of node to the owning processor id. + * + */ + Aggregates(LWGraph_kokkos graph); + + /*! @brief Constructor for Aggregates structure + * + * This constructor takes a RCP pointer to a map which is used for the internal mappings of nodes to the (local) aggregate ids and the owning processor. + * + */ + Aggregates(const RCP& map); + + /*! @brief Destructor + * + */ + virtual ~Aggregates() {} + + //! @name Set/Get Methods for specific aggregation data + //@{ + + /*! @brief Get the index manager used by structured aggregation algorithms. + This has to be done by the aggregation factory. + */ + RCP& GetIndexManagerKokkos() { return geoDataKokkos_; } + + /*! @brief Set the index manager used by structured aggregation algorithms. + This has to be done by the aggregation factory. + */ + void SetIndexManagerKokkos(RCP& geoDataKokkos) { geoDataKokkos_ = geoDataKokkos; } + + /*! @brief Get the index manager used by various aggregation algorithms. + This has to be done by the aggregation factory. + */ + RCP& GetIndexManager() { return geoData_; } - /*! @brief Get the number of colors needed by the distance 2 coloring. - */ - LO GetGraphNumColors() { return graphNumColors_; } + /*! @brief Set the index manager used by various aggregation algorithms. + This has to be done by the aggregation factory. + */ + void SetIndexManager(RCP& geoData) { geoData_ = geoData; } + + /*! @brief Get a distance 2 coloring of the underlying graph. + The coloring is computed and set during Phase1 of aggregation. + */ + colors_view_type& GetGraphColors() { return graphColors_; } + + /*! @brief Set a distance 2 coloring of the underlying graph. + The coloring is computed and set during Phase1 of aggregation. + */ + void SetGraphColors(colors_view_type graphColors) { graphColors_ = graphColors; } - /*! @brief Set the number of colors needed by the distance 2 coloring. - */ - void SetGraphNumColors(const LO graphNumColors) { graphNumColors_ = graphNumColors; } + /*! @brief Get the number of colors needed by the distance 2 coloring. + */ + LO GetGraphNumColors() { return graphNumColors_; } - //@} - - /*! @brief Set number of local aggregates on current processor. + /*! @brief Set the number of colors needed by the distance 2 coloring. + */ + void SetGraphNumColors(const LO graphNumColors) { graphNumColors_ = graphNumColors; } - This has to be done by the aggregation routines. - */ - void SetNumAggregates(LO nAggregates) { numAggregates_ = nAggregates; } + //@} + + /*! @brief Set number of local aggregates on current processor. - /*! @brief Set number of global aggregates on current processor. - - This has to be done by the aggregation routines. - */ - void SetNumGlobalAggregates(GO nGlobalAggregates) { numGlobalAggregates_ = nGlobalAggregates; } + This has to be done by the aggregation routines. + */ + void SetNumAggregates(LO nAggregates) { numAggregates_ = nAggregates; } - ///< returns the number of aggregates of the current processor. Note: could/should be renamed to GetNumLocalAggregates? - KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const { - return numAggregates_; - } - - //! @brief Record whether aggregates include DOFs from other processes. - KOKKOS_INLINE_FUNCTION void AggregatesCrossProcessors(const bool& flag) { - aggregatesIncludeGhosts_ = flag; - } - - /*! @brief Return false if and only if no aggregates include DOFs from other processes. - - Used in construction of tentative prolongator to skip a communication phase. - */ - KOKKOS_INLINE_FUNCTION bool AggregatesCrossProcessors() const { - return aggregatesIncludeGhosts_; - } - - /*! @brief Returns a nonconstant vector that maps local node IDs to local aggregates IDs. - - For local node ID i, the corresponding vector entry v[i] is the local aggregate id to which i belongs on the current processor. - */ - RCP& GetVertex2AggIdNonConst() { return vertex2AggId_; } - - /*! @brief Returns nonconstant vector that maps local node IDs to owning processor IDs. - - For local node ID i, the corresponding vector entry v[i] is the owning processor ID. - */ - RCP& GetProcWinnerNonConst() { return procWinner_; } - /*! @brief Returns constant vector that maps local node IDs to local aggregates IDs. - - For local node ID i, the corresponding vector entry v[i] is the local aggregate id to which i belongs on the current processor. - */ - const RCP& GetVertex2AggId() const { return vertex2AggId_; } + /*! @brief Set number of global aggregates on current processor. - /*! @brief Returns constant vector that maps local node IDs to owning processor IDs. + This has to be done by the aggregation routines. + */ + void SetNumGlobalAggregates(GO nGlobalAggregates) { numGlobalAggregates_ = nGlobalAggregates; } - For local node ID i, the corresponding vector entry v[i] is the owning processor ID. - */ - const RCP& GetProcWinner() const { return procWinner_; } + ///< returns the number of aggregates of the current processor. Note: could/should be renamed to GetNumLocalAggregates? + KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const { + return numAggregates_; + } + + //! @brief Record whether aggregates include DOFs from other processes. + KOKKOS_INLINE_FUNCTION void AggregatesCrossProcessors(const bool& flag) { + aggregatesIncludeGhosts_ = flag; + } + + /*! @brief Return false if and only if no aggregates include DOFs from other processes. + + Used in construction of tentative prolongator to skip a communication phase. + */ + KOKKOS_INLINE_FUNCTION bool AggregatesCrossProcessors() const { + return aggregatesIncludeGhosts_; + } + + /*! @brief Returns a nonconstant vector that maps local node IDs to local aggregates IDs. + + For local node ID i, the corresponding vector entry v[i] is the local aggregate id to which i belongs on the current processor. + */ + RCP& GetVertex2AggIdNonConst() { return vertex2AggId_; } - //! Returns true if node with given local node id is marked to be a root node - inline bool IsRoot(LO i) const { return isRoot_[i]; } + /*! @brief Returns nonconstant vector that maps local node IDs to owning processor IDs. + + For local node ID i, the corresponding vector entry v[i] is the owning processor ID. + */ + RCP& GetProcWinnerNonConst() { return procWinner_; } + /*! @brief Returns constant vector that maps local node IDs to local aggregates IDs. - /*! @brief Set root node information. + For local node ID i, the corresponding vector entry v[i] is the local aggregate id to which i belongs on the current processor. + */ + const RCP& GetVertex2AggId() const { return vertex2AggId_; } - Used by aggregation methods only. - */ - inline void SetIsRoot(LO i, bool value = true) { isRoot_[i] = value; } + /*! @brief Returns constant vector that maps local node IDs to owning processor IDs. - const RCP GetMap() const; ///< returns (overlapping) map of aggregate/node distribution + For local node ID i, the corresponding vector entry v[i] is the owning processor ID. + */ + const RCP& GetProcWinner() const { return procWinner_; } - /*! @brief Compute sizes of aggregates + //! Returns true if node with given local node id is marked to be a root node + inline bool IsRoot(LO i) const { return isRoot_[i]; } - Returns the number of nodes in each aggregate in an array. - If the aggregate sizes are not stored internally (which is the default), they are computed and returned. - If the aggregate sizes have been stored internally, then they are *not* recomputed, but instead the - stored sizes are returned. + /*! @brief Set root node information. - @param[in] forceRecompute if true, force recomputation of the aggregate sizes. - */ - typename aggregates_sizes_type::const_type ComputeAggregateSizes(bool forceRecompute = false) const; + Used by aggregation methods only. + */ + inline void SetIsRoot(LO i, bool value = true) { isRoot_[i] = value; } - /*! @brief Compute sizes of aggregates + const RCP GetMap() const; ///< returns (overlapping) map of aggregate/node distribution - Returns the number of nodes in each aggregate in an array. - If the aggregate sizes are not stored internally (which is the default), they are computed and returned. - If the aggregate sizes have been stored internally, then they are *not* recomputed, but instead the - stored sizes are returned. + /*! @brief Compute sizes of aggregates - @param[in] forceRecompute if true, force recomputation of the aggregate sizes. - */ - Teuchos::ArrayRCP ComputeAggregateSizesArrayRCP(bool forceRecompute = false) const; + Returns the number of nodes in each aggregate in an array. + If the aggregate sizes are not stored internally (which is the default), they are computed and returned. + If the aggregate sizes have been stored internally, then they are *not* recomputed, but instead the + stored sizes are returned. - local_graph_type GetGraph() const; + @param[in] forceRecompute if true, force recomputation of the aggregate sizes. + */ + typename aggregates_sizes_type::const_type ComputeAggregateSizes(bool forceRecompute = false) const; - /*! @brief Generates a compressed list of nodes in each aggregate, where - the entries in aggNodes[aggPtr[i]] up to aggNodes[aggPtr[i+1]-1] contain the nodes in aggregate i. - unaggregated contains the list of nodes which are, for whatever reason, not aggregated (e.g. Dirichlet) - */ - void ComputeNodesInAggregate(LO_view & aggPtr, LO_view & aggNodes, LO_view & unaggregated) const; + /*! @brief Compute sizes of aggregates - //! Get global number of aggregates - // If # of global aggregates is unknown, this method does coummunication and internally record the value - GO GetNumGlobalAggregatesComputeIfNeeded(); + Returns the number of nodes in each aggregate in an array. + If the aggregate sizes are not stored internally (which is the default), they are computed and returned. + If the aggregate sizes have been stored internally, then they are *not* recomputed, but instead the + stored sizes are returned. - //! @name Overridden from Teuchos::Describable - //@{ + @param[in] forceRecompute if true, force recomputation of the aggregate sizes. + */ + Teuchos::ArrayRCP ComputeAggregateSizesArrayRCP(bool forceRecompute = false) const; - //! Return a simple one-line description of this object. - std::string description() const; + local_graph_type GetGraph() const; - //! Print the object with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - void print(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel = verbLevel_default) const; + /*! @brief Generates a compressed list of nodes in each aggregate, where + the entries in aggNodes[aggPtr[i]] up to aggNodes[aggPtr[i+1]-1] contain the nodes in aggregate i. + unaggregated contains the list of nodes which are, for whatever reason, not aggregated (e.g. Dirichlet) + */ + void ComputeNodesInAggregate(LO_view& aggPtr, LO_view& aggNodes, LO_view& unaggregated) const; - private: - LO numAggregates_; ///< Number of aggregates on this processor - GO numGlobalAggregates_; ///< Number of global aggregates + //! Get global number of aggregates + // If # of global aggregates is unknown, this method does coummunication and internally record the value + GO GetNumGlobalAggregatesComputeIfNeeded(); - /*! vertex2AggId[k] gives a local id corresponding to the aggregate to which - * local id k has been assigned. While k is the local id on my processor (MyPID), - * vertex2AggId[k] is the local id on the processor which actually owns the aggregate. - */ - RCP vertex2AggId_; + //! @name Overridden from Teuchos::Describable + //@{ - /*! - * If k is the local id on my processor (MyPID), the owning processor has the - * id given by procWinner[k] - */ - RCP procWinner_; + //! Return a simple one-line description of this object. + std::string description() const; - /*! geoData stores an index manager object that is used to perform structured aggreation - * on a problem. - */ - RCP geoDataKokkos_; + //! Print the object with some verbosity level to an FancyOStream object. + // using MueLu::Describable::describe; // overloading, not hiding + void print(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel = verbLevel_default) const; - /*! geoData stores an index manager object that is used to perform structured aggreation - * on a problem. - */ - RCP geoData_; + private: + LO numAggregates_; ///< Number of aggregates on this processor + GO numGlobalAggregates_; ///< Number of global aggregates - /*! graphColors_ stores a view that assigns a color to each node in the graph - * These colors are used to parallelize the aggregation process in UncoupledAggregation - */ - colors_view_type graphColors_; + /*! vertex2AggId[k] gives a local id corresponding to the aggregate to which + * local id k has been assigned. While k is the local id on my processor (MyPID), + * vertex2AggId[k] is the local id on the processor which actually owns the aggregate. + */ + RCP vertex2AggId_; - /*! graphNumColors_ stores the number of colors that are needed to perform a distance 2 - * coloring of the underlying graph. - */ - LO graphNumColors_; + /*! + * If k is the local id on my processor (MyPID), the owning processor has the + * id given by procWinner[k] + */ + RCP procWinner_; - //! An ArrayRCP of booleans specifying if a local entry is an aggregate root. - Teuchos::ArrayRCP isRoot_; + /*! geoData stores an index manager object that is used to perform structured aggreation + * on a problem. + */ + RCP geoDataKokkos_; - //! Set to false iff aggregates do not include any DOFs belong to other processes. - bool aggregatesIncludeGhosts_; + /*! geoData stores an index manager object that is used to perform structured aggreation + * on a problem. + */ + RCP geoData_; - //! Array of sizes of each local aggregate. - mutable - aggregates_sizes_type aggregateSizes_; + /*! graphColors_ stores a view that assigns a color to each node in the graph + * These colors are used to parallelize the aggregation process in UncoupledAggregation + */ + colors_view_type graphColors_; - /*! aggragateSizesHost_ is a host copy of aggregate sizes, which - * helps slightly reduce the cost of calling ComputeAggregateSizes - * from different parts of MueLu that require such data on the host device. - */ - mutable - typename aggregates_sizes_type::HostMirror aggregateSizesHost_; + /*! graphNumColors_ stores the number of colors that are needed to perform a distance 2 + * coloring of the underlying graph. + */ + LO graphNumColors_; - //! Aggregates represented as Kokkos graph type - mutable - local_graph_type graph_; - }; + //! An ArrayRCP of booleans specifying if a local entry is an aggregate root. + Teuchos::ArrayRCP isRoot_; -} //namespace MueLu + //! Set to false iff aggregates do not include any DOFs belong to other processes. + bool aggregatesIncludeGhosts_; + + //! Array of sizes of each local aggregate. + mutable aggregates_sizes_type aggregateSizes_; + + /*! aggragateSizesHost_ is a host copy of aggregate sizes, which + * helps slightly reduce the cost of calling ComputeAggregateSizes + * from different parts of MueLu that require such data on the host device. + */ + mutable + typename aggregates_sizes_type::HostMirror aggregateSizesHost_; + + //! Aggregates represented as Kokkos graph type + mutable local_graph_type graph_; +}; + +} // namespace MueLu #define MUELU_AGGREGATES_SHORT -#endif // MUELU_AGGREGATES_DECL_HPP +#endif // MUELU_AGGREGATES_DECL_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_Aggregates_def.hpp b/packages/muelu/src/Graph/Containers/MueLu_Aggregates_def.hpp index c9940f5524ac..aec11bce17c3 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_Aggregates_def.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_Aggregates_def.hpp @@ -59,254 +59,262 @@ namespace MueLu { - template - Aggregates >::Aggregates(const GraphBase & graph) { - numAggregates_ = 0; - numGlobalAggregates_ = 0; +template +Aggregates>::Aggregates(const GraphBase& graph) { + numAggregates_ = 0; + numGlobalAggregates_ = 0; - vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1); - vertex2AggId_->putScalar(MUELU_UNAGGREGATED); + vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1); + vertex2AggId_->putScalar(MUELU_UNAGGREGATED); - procWinner_ = LOVectorFactory::Build(graph.GetImportMap()); - procWinner_->putScalar(MUELU_UNASSIGNED); + procWinner_ = LOVectorFactory::Build(graph.GetImportMap()); + procWinner_->putScalar(MUELU_UNASSIGNED); - isRoot_ = Teuchos::ArrayRCP(graph.GetImportMap()->getLocalNumElements(), false); + isRoot_ = Teuchos::ArrayRCP(graph.GetImportMap()->getLocalNumElements(), false); - // slow but safe, force TentativePFactory to build column map for P itself - aggregatesIncludeGhosts_ = true; - } + // slow but safe, force TentativePFactory to build column map for P itself + aggregatesIncludeGhosts_ = true; +} - template - Aggregates >:: - Aggregates(LWGraph_kokkos graph) { - numAggregates_ = 0; - numGlobalAggregates_ = 0; +template +Aggregates>:: + Aggregates(LWGraph_kokkos graph) { + numAggregates_ = 0; + numGlobalAggregates_ = 0; - vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1); - vertex2AggId_->putScalar(MUELU_UNAGGREGATED); + vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1); + vertex2AggId_->putScalar(MUELU_UNAGGREGATED); - procWinner_ = LOVectorFactory::Build(graph.GetImportMap()); - procWinner_->putScalar(MUELU_UNASSIGNED); + procWinner_ = LOVectorFactory::Build(graph.GetImportMap()); + procWinner_->putScalar(MUELU_UNASSIGNED); - isRoot_ = Teuchos::ArrayRCP(graph.GetImportMap()->getLocalNumElements(), false); + isRoot_ = Teuchos::ArrayRCP(graph.GetImportMap()->getLocalNumElements(), false); - // slow but safe, force TentativePFactory to build column map for P itself - aggregatesIncludeGhosts_ = true; - } + // slow but safe, force TentativePFactory to build column map for P itself + aggregatesIncludeGhosts_ = true; +} - template - Aggregates >:: - Aggregates(const RCP& map) { - numAggregates_ = 0; - numGlobalAggregates_ = 0; +template +Aggregates>:: + Aggregates(const RCP& map) { + numAggregates_ = 0; + numGlobalAggregates_ = 0; - vertex2AggId_ = LOMultiVectorFactory::Build(map, 1); - vertex2AggId_->putScalar(MUELU_UNAGGREGATED); + vertex2AggId_ = LOMultiVectorFactory::Build(map, 1); + vertex2AggId_->putScalar(MUELU_UNAGGREGATED); - procWinner_ = LOVectorFactory::Build(map); - procWinner_->putScalar(MUELU_UNASSIGNED); + procWinner_ = LOVectorFactory::Build(map); + procWinner_->putScalar(MUELU_UNASSIGNED); - isRoot_ = Teuchos::ArrayRCP(map->getLocalNumElements(), false); + isRoot_ = Teuchos::ArrayRCP(map->getLocalNumElements(), false); - // slow but safe, force TentativePFactory to build column map for P itself - aggregatesIncludeGhosts_ = true; - } + // slow but safe, force TentativePFactory to build column map for P itself + aggregatesIncludeGhosts_ = true; +} - template - typename Aggregates >::aggregates_sizes_type::const_type - Aggregates >::ComputeAggregateSizes(bool forceRecompute) const { - if (aggregateSizes_.size() && !forceRecompute) { - return aggregateSizes_; +template +typename Aggregates>::aggregates_sizes_type::const_type +Aggregates>::ComputeAggregateSizes(bool forceRecompute) const { + if (aggregateSizes_.size() && !forceRecompute) { + return aggregateSizes_; - } else { - // It is necessary to initialize this to 0 - aggregates_sizes_type aggregateSizes("aggregates", numAggregates_); + } else { + // It is necessary to initialize this to 0 + aggregates_sizes_type aggregateSizes("aggregates", numAggregates_); - int myPID = GetMap()->getComm()->getRank(); + int myPID = GetMap()->getComm()->getRank(); - auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly); - auto procWinner = procWinner_ ->getDeviceLocalView(Xpetra::Access::ReadOnly); + auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly); + auto procWinner = procWinner_->getDeviceLocalView(Xpetra::Access::ReadOnly); - typename AppendTrait::type aggregateSizesAtomic = aggregateSizes; - Kokkos::parallel_for("MueLu:Aggregates:ComputeAggregateSizes:for", range_type(0,procWinner.size()), + typename AppendTrait::type aggregateSizesAtomic = aggregateSizes; + Kokkos::parallel_for( + "MueLu:Aggregates:ComputeAggregateSizes:for", range_type(0, procWinner.size()), KOKKOS_LAMBDA(const LO i) { if (procWinner(i, 0) == myPID) aggregateSizesAtomic(vertex2AggId(i, 0))++; }); - aggregateSizes_ = aggregateSizes; - - return aggregateSizes; - } + aggregateSizes_ = aggregateSizes; + return aggregateSizes; } - - template - typename Teuchos::ArrayRCP - Aggregates >:: - ComputeAggregateSizesArrayRCP(bool forceRecompute) const { - auto aggregateSizes = this->ComputeAggregateSizes(forceRecompute); - - // if this is the first time this is called, setup the host mirror and fill it - if(!aggregateSizesHost_.is_allocated()) { - aggregateSizesHost_ = Kokkos::create_mirror_view(aggregateSizes); +} + +template +typename Teuchos::ArrayRCP +Aggregates>:: + ComputeAggregateSizesArrayRCP(bool forceRecompute) const { + auto aggregateSizes = this->ComputeAggregateSizes(forceRecompute); + + // if this is the first time this is called, setup the host mirror and fill it + if (!aggregateSizesHost_.is_allocated()) { + aggregateSizesHost_ = Kokkos::create_mirror_view(aggregateSizes); + Kokkos::deep_copy(aggregateSizesHost_, aggregateSizes); + } else { + // otherwise, only update if we forced a recompute + if (forceRecompute) Kokkos::deep_copy(aggregateSizesHost_, aggregateSizes); - } else { - // otherwise, only update if we forced a recompute - if(forceRecompute) - Kokkos::deep_copy(aggregateSizesHost_, aggregateSizes); - } + } - // put the data in an ArrayRCP, but do not give it ownership of the data - Teuchos::ArrayRCP aggregateSizesArrayRCP(aggregateSizesHost_.data(),0,aggregateSizesHost_.extent(0),false); + // put the data in an ArrayRCP, but do not give it ownership of the data + Teuchos::ArrayRCP aggregateSizesArrayRCP(aggregateSizesHost_.data(), 0, aggregateSizesHost_.extent(0), false); - return aggregateSizesArrayRCP; - } + return aggregateSizesArrayRCP; +} - template - typename Aggregates >::local_graph_type - Aggregates >::GetGraph() const { - using row_map_type = typename local_graph_type::row_map_type; - using entries_type = typename local_graph_type::entries_type; - using size_type = typename local_graph_type::size_type; +template +typename Aggregates>::local_graph_type +Aggregates>::GetGraph() const { + using row_map_type = typename local_graph_type::row_map_type; + using entries_type = typename local_graph_type::entries_type; + using size_type = typename local_graph_type::size_type; - auto numAggregates = numAggregates_; + auto numAggregates = numAggregates_; - if (static_cast(graph_.numRows()) == numAggregates) - return graph_; + if (static_cast(graph_.numRows()) == numAggregates) + return graph_; - auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly); - auto procWinner = procWinner_ ->getDeviceLocalView(Xpetra::Access::ReadOnly); - auto sizes = ComputeAggregateSizes(); + auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly); + auto procWinner = procWinner_->getDeviceLocalView(Xpetra::Access::ReadOnly); + auto sizes = ComputeAggregateSizes(); - // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + rows(0) = 0. - typename row_map_type::non_const_type rows("Agg_rows", numAggregates+1); // rows(0) = 0 automatically + // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + rows(0) = 0. + typename row_map_type::non_const_type rows("Agg_rows", numAggregates + 1); // rows(0) = 0 automatically - // parallel_scan (exclusive) - Kokkos::parallel_scan("MueLu:Aggregates:GetGraph:compute_rows", range_type(0, numAggregates), + // parallel_scan (exclusive) + Kokkos::parallel_scan( + "MueLu:Aggregates:GetGraph:compute_rows", range_type(0, numAggregates), KOKKOS_LAMBDA(const LO i, LO& update, const bool& final_pass) { update += sizes(i); if (final_pass) - rows(i+1) = update; + rows(i + 1) = update; }); - decltype(rows) offsets(Kokkos::ViewAllocateWithoutInitializing("Agg_offsets"), numAggregates+1); // +1 is just for ease - Kokkos::deep_copy(offsets, rows); + decltype(rows) offsets(Kokkos::ViewAllocateWithoutInitializing("Agg_offsets"), numAggregates + 1); // +1 is just for ease + Kokkos::deep_copy(offsets, rows); - int myPID = GetMap()->getComm()->getRank(); + int myPID = GetMap()->getComm()->getRank(); - size_type numNNZ; - { - Kokkos::View numNNZ_device = Kokkos::subview(rows, numAggregates); - typename Kokkos::View::HostMirror numNNZ_host = Kokkos::create_mirror_view(numNNZ_device); - Kokkos::deep_copy(numNNZ_host, numNNZ_device); - numNNZ = numNNZ_host(); - } - typename entries_type::non_const_type cols(Kokkos::ViewAllocateWithoutInitializing("Agg_cols"), numNNZ); - size_t realnnz = 0; - Kokkos::parallel_reduce("MueLu:Aggregates:GetGraph:compute_cols", range_type(0, procWinner.size()), + size_type numNNZ; + { + Kokkos::View numNNZ_device = Kokkos::subview(rows, numAggregates); + typename Kokkos::View::HostMirror numNNZ_host = Kokkos::create_mirror_view(numNNZ_device); + Kokkos::deep_copy(numNNZ_host, numNNZ_device); + numNNZ = numNNZ_host(); + } + typename entries_type::non_const_type cols(Kokkos::ViewAllocateWithoutInitializing("Agg_cols"), numNNZ); + size_t realnnz = 0; + Kokkos::parallel_reduce( + "MueLu:Aggregates:GetGraph:compute_cols", range_type(0, procWinner.size()), KOKKOS_LAMBDA(const LO i, size_t& nnz) { if (procWinner(i, 0) == myPID) { - typedef typename std::remove_reference< decltype( offsets(0) ) >::type atomic_incr_type; - auto idx = Kokkos::atomic_fetch_add( &offsets(vertex2AggId(i,0)), atomic_incr_type(1)); + typedef typename std::remove_reference::type atomic_incr_type; + auto idx = Kokkos::atomic_fetch_add(&offsets(vertex2AggId(i, 0)), atomic_incr_type(1)); cols(idx) = i; nnz++; } - }, realnnz); - TEUCHOS_TEST_FOR_EXCEPTION(realnnz != numNNZ, Exceptions::RuntimeError, - "MueLu: Internal error: Something is wrong with aggregates graph construction: numNNZ = " << numNNZ << " != " << realnnz << " = realnnz"); - - graph_ = local_graph_type(cols, rows); - - return graph_; - } - - template - void - Aggregates >::ComputeNodesInAggregate(LO_view & aggPtr, LO_view & aggNodes, LO_view & unaggregated) const { - LO numAggs = GetNumAggregates(); - LO numNodes = vertex2AggId_->getLocalLength(); - auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly); - typename aggregates_sizes_type::const_type aggSizes = ComputeAggregateSizes(true); - LO INVALID = Teuchos::OrdinalTraits::invalid(); - - aggPtr = LO_view("aggPtr",numAggs+1); - aggNodes = LO_view("aggNodes",numNodes); - LO_view aggCurr("agg curr",numAggs+1); - - // Construct the "rowptr" and the counter - Kokkos::parallel_scan("MueLu:Aggregates:ComputeNodesInAggregate:scan", range_type(0,numAggs+1), + }, + realnnz); + TEUCHOS_TEST_FOR_EXCEPTION(realnnz != numNNZ, Exceptions::RuntimeError, + "MueLu: Internal error: Something is wrong with aggregates graph construction: numNNZ = " << numNNZ << " != " << realnnz << " = realnnz"); + + graph_ = local_graph_type(cols, rows); + + return graph_; +} + +template +void Aggregates>::ComputeNodesInAggregate(LO_view& aggPtr, LO_view& aggNodes, LO_view& unaggregated) const { + LO numAggs = GetNumAggregates(); + LO numNodes = vertex2AggId_->getLocalLength(); + auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly); + typename aggregates_sizes_type::const_type aggSizes = ComputeAggregateSizes(true); + LO INVALID = Teuchos::OrdinalTraits::invalid(); + + aggPtr = LO_view("aggPtr", numAggs + 1); + aggNodes = LO_view("aggNodes", numNodes); + LO_view aggCurr("agg curr", numAggs + 1); + + // Construct the "rowptr" and the counter + Kokkos::parallel_scan( + "MueLu:Aggregates:ComputeNodesInAggregate:scan", range_type(0, numAggs + 1), KOKKOS_LAMBDA(const LO aggIdx, LO& aggOffset, bool final_pass) { LO count = 0; - if(aggIdx < numAggs) + if (aggIdx < numAggs) count = aggSizes(aggIdx); - if(final_pass) { - aggPtr(aggIdx) = aggOffset; + if (final_pass) { + aggPtr(aggIdx) = aggOffset; aggCurr(aggIdx) = aggOffset; - if(aggIdx==numAggs) - aggCurr(numAggs) = 0; // use this for counting unaggregated nodes + if (aggIdx == numAggs) + aggCurr(numAggs) = 0; // use this for counting unaggregated nodes } aggOffset += count; }); - // Preallocate unaggregated to the correct size - LO numUnaggregated = 0; - Kokkos::parallel_reduce("MueLu:Aggregates:ComputeNodesInAggregate:unaggregatedSize", range_type(0,numNodes), - KOKKOS_LAMBDA(const LO nodeIdx, LO & count) { - if(vertex2AggId(nodeIdx,0)==INVALID) + // Preallocate unaggregated to the correct size + LO numUnaggregated = 0; + Kokkos::parallel_reduce( + "MueLu:Aggregates:ComputeNodesInAggregate:unaggregatedSize", range_type(0, numNodes), + KOKKOS_LAMBDA(const LO nodeIdx, LO& count) { + if (vertex2AggId(nodeIdx, 0) == INVALID) count++; - }, numUnaggregated); - unaggregated = LO_view("unaggregated",numUnaggregated); + }, + numUnaggregated); + unaggregated = LO_view("unaggregated", numUnaggregated); - // Stick the nodes in each aggregate's spot - Kokkos::parallel_for("MueLu:Aggregates:ComputeNodesInAggregate:for", range_type(0,numNodes), + // Stick the nodes in each aggregate's spot + Kokkos::parallel_for( + "MueLu:Aggregates:ComputeNodesInAggregate:for", range_type(0, numNodes), KOKKOS_LAMBDA(const LO nodeIdx) { - LO aggIdx = vertex2AggId(nodeIdx,0); - if(aggIdx != INVALID) { + LO aggIdx = vertex2AggId(nodeIdx, 0); + if (aggIdx != INVALID) { // atomic postincrement aggCurr(aggIdx) each time - aggNodes(Kokkos::atomic_fetch_add(&aggCurr(aggIdx),1)) = nodeIdx; + aggNodes(Kokkos::atomic_fetch_add(&aggCurr(aggIdx), 1)) = nodeIdx; } else { // same, but using last entry of aggCurr for unaggregated nodes - unaggregated(Kokkos::atomic_fetch_add(&aggCurr(numAggs),1)) = nodeIdx; + unaggregated(Kokkos::atomic_fetch_add(&aggCurr(numAggs), 1)) = nodeIdx; } }); - - } - - template - std::string Aggregates >::description() const { - if (numGlobalAggregates_ == -1) return BaseClass::description() + "{nGlobalAggregates = not computed}"; - else return BaseClass::description() + "{nGlobalAggregates = " + toString(numGlobalAggregates_) + "}"; +} + +template +std::string Aggregates>::description() const { + if (numGlobalAggregates_ == -1) + return BaseClass::description() + "{nGlobalAggregates = not computed}"; + else + return BaseClass::description() + "{nGlobalAggregates = " + toString(numGlobalAggregates_) + "}"; +} + +template +void Aggregates>::print(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel) const { + MUELU_DESCRIBE; + + if (verbLevel & Statistics1) { + if (numGlobalAggregates_ == -1) + out0 << "Global number of aggregates: not computed " << std::endl; + else + out0 << "Global number of aggregates: " << numGlobalAggregates_ << std::endl; } - - template - void Aggregates >::print(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel) const { - MUELU_DESCRIBE; - - if (verbLevel & Statistics1) { - if (numGlobalAggregates_ == -1) out0 << "Global number of aggregates: not computed " << std::endl; - else out0 << "Global number of aggregates: " << numGlobalAggregates_ << std::endl; - } +} + +template +GlobalOrdinal Aggregates>::GetNumGlobalAggregatesComputeIfNeeded() { + if (numGlobalAggregates_ != -1) { + LO nAggregates = GetNumAggregates(); + GO nGlobalAggregates; + MueLu_sumAll(vertex2AggId_->getMap()->getComm(), (GO)nAggregates, nGlobalAggregates); + SetNumGlobalAggregates(nGlobalAggregates); } + return numGlobalAggregates_; +} - template - GlobalOrdinal Aggregates >::GetNumGlobalAggregatesComputeIfNeeded() { - - if (numGlobalAggregates_ != -1) { - LO nAggregates = GetNumAggregates(); - GO nGlobalAggregates; - MueLu_sumAll(vertex2AggId_->getMap()->getComm(), (GO)nAggregates, nGlobalAggregates); - SetNumGlobalAggregates(nGlobalAggregates); - } - return numGlobalAggregates_; - } - - template - const RCP> > - Aggregates>::GetMap() const { - return vertex2AggId_->getMap(); - } +template +const RCP>> +Aggregates>::GetMap() const { + return vertex2AggId_->getMap(); +} -} //namespace MueLu +} // namespace MueLu -#endif // MUELU_AGGREGATES_DEF_HPP +#endif // MUELU_AGGREGATES_DEF_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_GraphBase.hpp b/packages/muelu/src/Graph/Containers/MueLu_GraphBase.hpp index 462f8aef3153..eadb45c6fa36 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_GraphBase.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_GraphBase.hpp @@ -46,7 +46,7 @@ #ifndef MUELU_GRAPHBASE_HPP #define MUELU_GRAPHBASE_HPP -#include // global_size_t +#include // global_size_t #include #include "MueLu_ConfigDefs.hpp" @@ -61,69 +61,68 @@ namespace MueLu { Pure virtual base class for MueLu representations of graphs. */ - template - class GraphBase - : public BaseClass { +template +class GraphBase + : public BaseClass { #undef MUELU_GRAPHBASE_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - // For Zoltan2 compatibility - using lno_t = LocalOrdinal; - using gno_t = GlobalOrdinal; - using node_t = Node; + public: + // For Zoltan2 compatibility + using lno_t = LocalOrdinal; + using gno_t = GlobalOrdinal; + using node_t = Node; - //! @name Constructors/Destructors. - //@{ - virtual ~GraphBase() {}; - //@} + //! @name Constructors/Destructors. + //@{ + virtual ~GraphBase(){}; + //@} - virtual const RCP > GetComm() const = 0; - virtual const RCP GetDomainMap() const = 0; - virtual const RCP GetImportMap() const = 0; + virtual const RCP > GetComm() const = 0; + virtual const RCP GetDomainMap() const = 0; + virtual const RCP GetImportMap() const = 0; - //! @name Query graph attributes. - //@{ + //! @name Query graph attributes. + //@{ - //! Return number of vertices owned by the calling node. - virtual size_t GetNodeNumVertices() const = 0; + //! Return number of vertices owned by the calling node. + virtual size_t GetNodeNumVertices() const = 0; - //! Return number of edges owned by the calling node. - virtual size_t GetNodeNumEdges() const = 0; + //! Return number of edges owned by the calling node. + virtual size_t GetNodeNumEdges() const = 0; - virtual void SetBoundaryNodeMap(const ArrayRCP & boundaryArray) = 0; + virtual void SetBoundaryNodeMap(const ArrayRCP &boundaryArray) = 0; - virtual size_t getLocalMaxNumRowEntries() const = 0; + virtual size_t getLocalMaxNumRowEntries() const = 0; - virtual const ArrayRCP GetBoundaryNodeMap() const = 0; + virtual const ArrayRCP GetBoundaryNodeMap() const = 0; - //FIXME is this necessary? - //! Return number of global edges in the graph. - virtual Xpetra::global_size_t GetGlobalNumEdges() const = 0; + // FIXME is this necessary? + //! Return number of global edges in the graph. + virtual Xpetra::global_size_t GetGlobalNumEdges() const = 0; - //! Return the list of vertices adjacent to the vertex 'v'. - virtual Teuchos::ArrayView getNeighborVertices(LocalOrdinal v) const = 0; + //! Return the list of vertices adjacent to the vertex 'v'. + virtual Teuchos::ArrayView getNeighborVertices(LocalOrdinal v) const = 0; - //! Return true if vertex with local id 'v' is on current process. - virtual bool isLocalNeighborVertex(LocalOrdinal v) const = 0; - //@} + //! Return true if vertex with local id 'v' is on current process. + virtual bool isLocalNeighborVertex(LocalOrdinal v) const = 0; + //@} - //! @name Print graph. - //@{ - /// Return a simple one-line description of the Graph. - virtual std::string description() const = 0; + //! @name Print graph. + //@{ + /// Return a simple one-line description of the Graph. + virtual std::string description() const = 0; - //! Print the Graph with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; - virtual void print(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const = 0; - //@} + //! Print the Graph with some verbosity level to an FancyOStream object. + // using MueLu::Describable::describe; // overloading, not hiding + // void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; + virtual void print(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const = 0; + //@} +}; - }; - -} // namespace MueLu +} // namespace MueLu #define MUELU_GRAPHBASE_SHORT -#endif // MUELU_GRAPHBASE_HPP +#endif // MUELU_GRAPHBASE_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_Graph_decl.hpp b/packages/muelu/src/Graph/Containers/MueLu_Graph_decl.hpp index 5ecc7c87e000..2136b9320d7f 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_Graph_decl.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_Graph_decl.hpp @@ -46,8 +46,8 @@ #ifndef MUELU_GRAPH_DECL_HPP #define MUELU_GRAPH_DECL_HPP -#include // global_size_t -#include // inline functions requires class declaration +#include // global_size_t +#include // inline functions requires class declaration #include #include "MueLu_ConfigDefs.hpp" @@ -64,78 +64,76 @@ namespace MueLu { This class holds an underlying Xpetra_CrsGraph. This class can be considered a facade, as MueLu needs only limited functionality for aggregation. */ - template - class Graph - : public MueLu::GraphBase { //FIXME shortnames isn't working +template +class Graph + : public MueLu::GraphBase { // FIXME shortnames isn't working #undef MUELU_GRAPH_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: + public: + //! @name Constructors/Destructors. + //@{ + Graph(const RCP& graph, const std::string& /* objectLabel */ = ""); - //! @name Constructors/Destructors. - //@{ - Graph(const RCP & graph, const std::string & /* objectLabel */=""); + virtual ~Graph() {} + //@} - virtual ~Graph() {} - //@} + size_t GetNodeNumVertices() const { return graph_->getLocalNumRows(); } + size_t GetNodeNumEdges() const { return graph_->getLocalNumEntries(); } - size_t GetNodeNumVertices() const { return graph_->getLocalNumRows(); } - size_t GetNodeNumEdges() const { return graph_->getLocalNumEntries(); } + Xpetra::global_size_t GetGlobalNumEdges() const { return graph_->getGlobalNumEntries(); } - Xpetra::global_size_t GetGlobalNumEdges() const { return graph_->getGlobalNumEntries(); } + const RCP > GetComm() const { return graph_->getComm(); } + const RCP GetDomainMap() const { return graph_->getDomainMap(); } + //! Returns overlapping import map (nodes). + const RCP GetImportMap() const { return graph_->getColMap(); } - const RCP > GetComm() const { return graph_->getComm(); } - const RCP GetDomainMap() const { return graph_->getDomainMap(); } - //! Returns overlapping import map (nodes). - const RCP GetImportMap() const { return graph_->getColMap(); } + const RCP GetGraph() const { return graph_; } - const RCP GetGraph() const {return graph_;} + //! Set map with local ids of boundary nodes. + void SetBoundaryNodeMap(const ArrayRCP& localDirichletNodes) { localDirichletNodes_ = localDirichletNodes; } - //! Set map with local ids of boundary nodes. - void SetBoundaryNodeMap(const ArrayRCP& localDirichletNodes) { localDirichletNodes_ = localDirichletNodes; } + //! Returns map with local ids of boundary nodes. + const ArrayRCP GetBoundaryNodeMap() const { return localDirichletNodes_; } - //! Returns map with local ids of boundary nodes. - const ArrayRCP GetBoundaryNodeMap() const { return localDirichletNodes_; } + //! Returns the maximum number of entries across all rows/columns on this node + size_t getLocalMaxNumRowEntries() const { return graph_->getLocalMaxNumRowEntries(); } - //! Returns the maximum number of entries across all rows/columns on this node - size_t getLocalMaxNumRowEntries () const { return graph_->getLocalMaxNumRowEntries(); } + //! Return the list of vertices adjacent to the vertex 'v'. + ArrayView getNeighborVertices(LO i) const { + ArrayView rowView; + graph_->getLocalRowView(i, rowView); + return rowView; + } - //! Return the list of vertices adjacent to the vertex 'v'. - ArrayView getNeighborVertices(LO i) const { - ArrayView rowView; - graph_->getLocalRowView(i, rowView); - return rowView; - } - - //! Return true if vertex with local id 'v' is on current process. - bool isLocalNeighborVertex(LO i) const { return i >= minLocalIndex_ && i <= maxLocalIndex_; } + //! Return true if vertex with local id 'v' is on current process. + bool isLocalNeighborVertex(LO i) const { return i >= minLocalIndex_ && i <= maxLocalIndex_; } #ifdef MUELU_UNUSED - size_t GetNodeNumGhost() const; + size_t GetNodeNumGhost() const; #endif - /// Return a simple one-line description of the Graph. - std::string description() const { return "MueLu.description()"; } - - //! Print the Graph with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; - void print(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const; + /// Return a simple one-line description of the Graph. + std::string description() const { return "MueLu.description()"; } - private: + //! Print the Graph with some verbosity level to an FancyOStream object. + // using MueLu::Describable::describe; // overloading, not hiding + // void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; + void print(Teuchos::FancyOStream& out, const VerbLevel verbLevel = Default) const; - RCP graph_; + private: + RCP graph_; - //! Vector of Dirichlet boundary node IDs on current process. - ArrayRCP localDirichletNodes_; + //! Vector of Dirichlet boundary node IDs on current process. + ArrayRCP localDirichletNodes_; - // local index boundaries (cached from domain map) - LO minLocalIndex_, maxLocalIndex_; - }; + // local index boundaries (cached from domain map) + LO minLocalIndex_, maxLocalIndex_; +}; -} // namespace MueLu +} // namespace MueLu #define MUELU_GRAPH_SHORT -#endif // MUELU_GRAPH_DECL_HPP +#endif // MUELU_GRAPH_DECL_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_Graph_def.hpp b/packages/muelu/src/Graph/Containers/MueLu_Graph_def.hpp index 7d9bf76e1cf3..d2c7240beafb 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_Graph_def.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_Graph_def.hpp @@ -54,51 +54,52 @@ namespace MueLu { - template - Graph::Graph(const RCP & graph, const std::string & /* objectLabel */) : graph_(graph) { - minLocalIndex_ = graph_->getDomainMap()->getMinLocalIndex(); - maxLocalIndex_ = graph_->getDomainMap()->getMaxLocalIndex(); - } +template +Graph::Graph(const RCP &graph, const std::string & /* objectLabel */) + : graph_(graph) { + minLocalIndex_ = graph_->getDomainMap()->getMinLocalIndex(); + maxLocalIndex_ = graph_->getDomainMap()->getMaxLocalIndex(); +} #ifdef MUELU_UNUSED - template - size_t Graph::GetNodeNumGhost() const { - /* - Ray's comments about nGhost: - Graph->NGhost == graph_->RowMatrixColMap()->NumMyElements() - graph_->MatrixDomainMap()->NumMyElements() - is basically right. But we've had some issues about how epetra handles empty columns. - Probably worth discussing this with Jonathan and Chris to see if this is ALWAYS right. - */ - size_t nGhost = graph_->getColMap()->getLocalNumElements() - graph_->getDomainMap()->getLocalNumElements(); - if (nGhost < 0) nGhost = 0; // FIXME: size_t is unsigned. +template +size_t Graph::GetNodeNumGhost() const { + /* + Ray's comments about nGhost: + Graph->NGhost == graph_->RowMatrixColMap()->NumMyElements() - graph_->MatrixDomainMap()->NumMyElements() + is basically right. But we've had some issues about how epetra handles empty columns. + Probably worth discussing this with Jonathan and Chris to see if this is ALWAYS right. + */ + size_t nGhost = graph_->getColMap()->getLocalNumElements() - graph_->getDomainMap()->getLocalNumElements(); + if (nGhost < 0) nGhost = 0; // FIXME: size_t is unsigned. - return nGhost; - } + return nGhost; +} #endif - //! Print the object with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const { - template - void Graph::print(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { - MUELU_DESCRIBE; - - if (verbLevel & Parameters0) { - //out0 << "Prec. type: " << type_ << std::endl; - } +//! Print the object with some verbosity level to an FancyOStream object. +// using MueLu::Describable::describe; // overloading, not hiding +// void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const { +template +void Graph::print(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { + MUELU_DESCRIBE; - if (verbLevel & Parameters1) { - //out0 << "Linear Algebra: " << toString(lib_) << std::endl; - //out0 << "PrecType: " << type_ << std::endl; - //out0 << "Parameter list: " << std::endl; { Teuchos::OSTab tab2(out); out << paramList_; } - //out0 << "Overlap: " << overlap_ << std::endl; - } + if (verbLevel & Parameters0) { + // out0 << "Prec. type: " << type_ << std::endl; + } - if (verbLevel & Debug) { - graph_->describe(out0, Teuchos::VERB_EXTREME); - } + if (verbLevel & Parameters1) { + // out0 << "Linear Algebra: " << toString(lib_) << std::endl; + // out0 << "PrecType: " << type_ << std::endl; + // out0 << "Parameter list: " << std::endl; { Teuchos::OSTab tab2(out); out << paramList_; } + // out0 << "Overlap: " << overlap_ << std::endl; } + if (verbLevel & Debug) { + graph_->describe(out0, Teuchos::VERB_EXTREME); + } } -#endif // MUELU_GRAPH_DEF_HPP +} // namespace MueLu + +#endif // MUELU_GRAPH_DEF_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_decl.hpp b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_decl.hpp index f6b3c8338cbe..2292b98a2795 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_decl.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_decl.hpp @@ -46,8 +46,8 @@ #ifndef MUELU_LWGRAPH_DECL_HPP #define MUELU_LWGRAPH_DECL_HPP -#include // global_size_t -#include // inline functions requires class declaration +#include // global_size_t +#include // inline functions requires class declaration #include #include "MueLu_ConfigDefs.hpp" @@ -66,117 +66,117 @@ namespace MueLu { fillComplete. TODO handle systems */ - template - class LWGraph : public MueLu::GraphBase { +template +class LWGraph : public MueLu::GraphBase { #undef MUELU_LWGRAPH_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - - //! @name Constructors/Destructors. - //@{ - - //! LWGraph constructor - // - // @param[in] rowPtrs: Array containing row offsets (CSR format) - // @param[in] colPtrs: Array containing local column indices (CSR format) - // @param[in] domainMap: non-overlapping (domain) map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container - // @param[in] importMap: overlapping map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container - // @param[in] objectLabel: label string - LWGraph(const ArrayRCP& rowPtrs, const ArrayRCP& colPtrs, - const RCP& domainMap, const RCP& importMap, const std::string& objectLabel = "") - : rows_(rowPtrs), columns_(colPtrs), domainMap_(domainMap), importMap_(importMap), domainMapRef_(*domainMap), objectLabel_(objectLabel) - { - minLocalIndex_ = domainMapRef_.getMinLocalIndex(); - maxLocalIndex_ = domainMapRef_.getMaxLocalIndex(); - - maxNumRowEntries_ = 0; - - LO nRows = as(rowPtrs.size()-1); - for (LO i = 0; i < nRows; i++) - maxNumRowEntries_ = std::max(maxNumRowEntries_, as(rowPtrs[i+1] - rowPtrs[i])); - } - - virtual ~LWGraph() {} - //@} - - size_t GetNodeNumVertices() const { return rows_.size()-1; } - size_t GetNodeNumEdges() const { return rows_[rows_.size()-1]; } - - // TODO: do we really need this function - // It is being called from CoupledAggregation, but do we need it there? - Xpetra::global_size_t GetGlobalNumEdges() const { - Xpetra::global_size_t in = GetNodeNumEdges(), out; - Teuchos::reduceAll(*domainMap_->getComm(), Teuchos::REDUCE_SUM, in, Teuchos::outArg(out)); - return out; - } - - const RCP > GetComm() const { return domainMap_->getComm(); } - const RCP GetDomainMap() const { return domainMap_; } - //! Returns overlapping import map (nodes). - const RCP GetImportMap() const { return importMap_; } - - void SetBoundaryNodeMap(RCP const &/* map */) { throw Exceptions::NotImplemented("LWGraph: Boundary node map not implemented."); } - - //! Return the list of vertices adjacent to the vertex 'v'. - Teuchos::ArrayView getNeighborVertices(LO i) const { return columns_.view(rows_[i], rows_[i+1]-rows_[i]); } - - //! Return true if vertex with local id 'v' is on current process. - bool isLocalNeighborVertex(LO i) const { return i >= minLocalIndex_ && i <= maxLocalIndex_; } - - //! Set boolean array indicating which rows correspond to Dirichlet boundaries. - void SetBoundaryNodeMap(const ArrayRCP& bndry) { dirichletBoundaries_ = bndry; } - - //! Returns the maximum number of entries across all rows/columns on this node - size_t getLocalMaxNumRowEntries () const { return maxNumRowEntries_; } - - //! Returns map with global ids of boundary nodes. - const ArrayRCP GetBoundaryNodeMap() const { return dirichletBoundaries_; } - - - /// Return a simple one-line description of the Graph. - std::string description() const { return "MueLu.description()"; } //FIXME use object's label - - //! Return the row pointers of the local graph - const ArrayRCP getRowPtrs() const { - return rows_; - } - - //! Return the list entries in the local graph - const ArrayRCP getEntries() const { - return columns_; - } - - //! Print the Graph with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; - void print(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const; - - - RCP GetCrsGraph() const; - - private: - - //! Indices into columns_ array. Part of local graph information. - const ArrayRCP rows_; - //! Columns corresponding to connections. Part of local graph information. - const ArrayRCP columns_; - //! Graph maps - const RCP domainMap_, importMap_; - const Map& domainMapRef_; - //! Name of this graph. - const std::string objectLabel_; - //! Boolean array marking Dirichlet rows. - ArrayRCP dirichletBoundaries_; - - // local index boundaries (cached from domain map) - LO minLocalIndex_, maxLocalIndex_; - size_t maxNumRowEntries_; - }; - -} // namespace MueLu + public: + //! @name Constructors/Destructors. + //@{ + + //! LWGraph constructor + // + // @param[in] rowPtrs: Array containing row offsets (CSR format) + // @param[in] colPtrs: Array containing local column indices (CSR format) + // @param[in] domainMap: non-overlapping (domain) map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container + // @param[in] importMap: overlapping map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container + // @param[in] objectLabel: label string + LWGraph(const ArrayRCP& rowPtrs, const ArrayRCP& colPtrs, + const RCP& domainMap, const RCP& importMap, const std::string& objectLabel = "") + : rows_(rowPtrs) + , columns_(colPtrs) + , domainMap_(domainMap) + , importMap_(importMap) + , domainMapRef_(*domainMap) + , objectLabel_(objectLabel) { + minLocalIndex_ = domainMapRef_.getMinLocalIndex(); + maxLocalIndex_ = domainMapRef_.getMaxLocalIndex(); + + maxNumRowEntries_ = 0; + + LO nRows = as(rowPtrs.size() - 1); + for (LO i = 0; i < nRows; i++) + maxNumRowEntries_ = std::max(maxNumRowEntries_, as(rowPtrs[i + 1] - rowPtrs[i])); + } + + virtual ~LWGraph() {} + //@} + + size_t GetNodeNumVertices() const { return rows_.size() - 1; } + size_t GetNodeNumEdges() const { return rows_[rows_.size() - 1]; } + + // TODO: do we really need this function + // It is being called from CoupledAggregation, but do we need it there? + Xpetra::global_size_t GetGlobalNumEdges() const { + Xpetra::global_size_t in = GetNodeNumEdges(), out; + Teuchos::reduceAll(*domainMap_->getComm(), Teuchos::REDUCE_SUM, in, Teuchos::outArg(out)); + return out; + } + + const RCP > GetComm() const { return domainMap_->getComm(); } + const RCP GetDomainMap() const { return domainMap_; } + //! Returns overlapping import map (nodes). + const RCP GetImportMap() const { return importMap_; } + + void SetBoundaryNodeMap(RCP const& /* map */) { throw Exceptions::NotImplemented("LWGraph: Boundary node map not implemented."); } + + //! Return the list of vertices adjacent to the vertex 'v'. + Teuchos::ArrayView getNeighborVertices(LO i) const { return columns_.view(rows_[i], rows_[i + 1] - rows_[i]); } + + //! Return true if vertex with local id 'v' is on current process. + bool isLocalNeighborVertex(LO i) const { return i >= minLocalIndex_ && i <= maxLocalIndex_; } + + //! Set boolean array indicating which rows correspond to Dirichlet boundaries. + void SetBoundaryNodeMap(const ArrayRCP& bndry) { dirichletBoundaries_ = bndry; } + + //! Returns the maximum number of entries across all rows/columns on this node + size_t getLocalMaxNumRowEntries() const { return maxNumRowEntries_; } + + //! Returns map with global ids of boundary nodes. + const ArrayRCP GetBoundaryNodeMap() const { return dirichletBoundaries_; } + + /// Return a simple one-line description of the Graph. + std::string description() const { return "MueLu.description()"; } // FIXME use object's label + + //! Return the row pointers of the local graph + const ArrayRCP getRowPtrs() const { + return rows_; + } + + //! Return the list entries in the local graph + const ArrayRCP getEntries() const { + return columns_; + } + + //! Print the Graph with some verbosity level to an FancyOStream object. + // using MueLu::Describable::describe; // overloading, not hiding + // void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; + void print(Teuchos::FancyOStream& out, const VerbLevel verbLevel = Default) const; + + RCP GetCrsGraph() const; + + private: + //! Indices into columns_ array. Part of local graph information. + const ArrayRCP rows_; + //! Columns corresponding to connections. Part of local graph information. + const ArrayRCP columns_; + //! Graph maps + const RCP domainMap_, importMap_; + const Map& domainMapRef_; + //! Name of this graph. + const std::string objectLabel_; + //! Boolean array marking Dirichlet rows. + ArrayRCP dirichletBoundaries_; + + // local index boundaries (cached from domain map) + LO minLocalIndex_, maxLocalIndex_; + size_t maxNumRowEntries_; +}; + +} // namespace MueLu #define MUELU_LWGRAPH_SHORT -#endif // MUELU_LWGRAPH_DECL_HPP +#endif // MUELU_LWGRAPH_DECL_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_def.hpp b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_def.hpp index 230a6c908587..1f0e907adaa9 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_def.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_def.hpp @@ -52,46 +52,45 @@ namespace MueLu { - //! Print the object with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const { - template - void LWGraph::print(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { - // MUELU_DESCRIBE; +//! Print the object with some verbosity level to an FancyOStream object. +// using MueLu::Describable::describe; // overloading, not hiding +// void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const { +template +void LWGraph::print(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { + // MUELU_DESCRIBE; - if (verbLevel & Parameters0) { - //out0 << "Prec. type: " << type_ << std::endl; - } - - if (verbLevel & Parameters1) { - //out0 << "Linear Algebra: " << toString(lib_) << std::endl; - //out0 << "PrecType: " << type_ << std::endl; - //out0 << "Parameter list: " << std::endl; { Teuchos::OSTab tab2(out); out << paramList_; } - //out0 << "Overlap: " << overlap_ << std::endl; - } - - if (verbLevel & Debug) { - RCP col_map = importMap_.is_null() ? domainMap_ : importMap_; + if (verbLevel & Parameters0) { + // out0 << "Prec. type: " << type_ << std::endl; + } - for (LO i = 0; i < rows_.size()-1; i++) { - for (LO j = rows_[i]; j < rows_[i+1]; j++) - out<< domainMap_->getGlobalElement(i) << " " << col_map->getGlobalElement(columns_[j])< col_map = importMap_.is_null() ? domainMap_ : importMap_; - template - RCP > LWGraph::GetCrsGraph() const { - ArrayRCP rowPtrs; - rowPtrs.resize(rows_.size()); - for (size_t i=0; i(rows_.size()); i++) - rowPtrs[i] = rows_[i]; - auto graph = Xpetra::CrsGraphFactory::Build(GetDomainMap(), GetImportMap(), rowPtrs, Teuchos::arcp_const_cast(getEntries())); - graph->fillComplete(); - return graph; + for (LO i = 0; i < rows_.size() - 1; i++) { + for (LO j = rows_[i]; j < rows_[i + 1]; j++) + out << domainMap_->getGlobalElement(i) << " " << col_map->getGlobalElement(columns_[j]) << std::endl; } + } +} +template +RCP > LWGraph::GetCrsGraph() const { + ArrayRCP rowPtrs; + rowPtrs.resize(rows_.size()); + for (size_t i = 0; i < Teuchos::as(rows_.size()); i++) + rowPtrs[i] = rows_[i]; + auto graph = Xpetra::CrsGraphFactory::Build(GetDomainMap(), GetImportMap(), rowPtrs, Teuchos::arcp_const_cast(getEntries())); + graph->fillComplete(); + return graph; } -#endif // MUELU_LWGRAPH_DEF_HPP +} // namespace MueLu + +#endif // MUELU_LWGRAPH_DEF_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_decl.hpp b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_decl.hpp index dd8a0231b15a..6debffc42f98 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_decl.hpp @@ -51,7 +51,7 @@ #include #include -#include // global_size_t +#include // global_size_t #include #include "MueLu_VerbosityLevel.hpp" @@ -62,110 +62,111 @@ namespace MueLu { - /*! - @class LWGraph_kokkos - @brief Lightweight MueLu representation of a compressed row storage graph - - This class is lightweight in the sense that it holds to local graph - information. These were built without using fillComplete. - */ - template - class LWGraph_kokkos; - - // Partial specialization for DeviceType - template - class LWGraph_kokkos> { - public: - using local_ordinal_type = LocalOrdinal; - using global_ordinal_type = GlobalOrdinal; - using execution_space = typename DeviceType::execution_space; - using memory_space = typename DeviceType::memory_space; - using device_type = Kokkos::Device; - using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; - using local_lw_graph_type = MueLu::LocalLWGraph_kokkos; - using size_type = size_t; - - using map_type = Xpetra::Map; - using local_graph_type = typename local_lw_graph_type::local_graph_type; - using boundary_nodes_type = typename local_lw_graph_type::boundary_nodes_type; - - private: - // For compatibility - typedef node_type Node; +/*! + @class LWGraph_kokkos + @brief Lightweight MueLu representation of a compressed row storage graph + + This class is lightweight in the sense that it holds to local graph + information. These were built without using fillComplete. + */ +template +class LWGraph_kokkos; + +// Partial specialization for DeviceType +template +class LWGraph_kokkos> { + public: + using local_ordinal_type = LocalOrdinal; + using global_ordinal_type = GlobalOrdinal; + using execution_space = typename DeviceType::execution_space; + using memory_space = typename DeviceType::memory_space; + using device_type = Kokkos::Device; + using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; + using local_lw_graph_type = MueLu::LocalLWGraph_kokkos; + using size_type = size_t; + + using map_type = Xpetra::Map; + using local_graph_type = typename local_lw_graph_type::local_graph_type; + using boundary_nodes_type = typename local_lw_graph_type::boundary_nodes_type; + + private: + // For compatibility + typedef node_type Node; #undef MUELU_LWGRAPH_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - - //! @name Constructors/Destructors. - //@{ - - //! LWGraph constructor - // - // @param[in] graph: local graph of type Kokkos::StaticCrsGraph containing CRS data - // @param[in] domainMap: non-overlapping (domain) map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container - // @param[in] importMap: overlapping map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container - // @param[in] objectLabel: label string - LWGraph_kokkos(const local_graph_type& graph, - const RCP& domainMap, - const RCP& importMap, - const std::string& objectLabel = "") - : lclLWGraph_(graph, domainMap), domainMap_(domainMap), importMap_(importMap), objectLabel_(objectLabel) { } - - ~LWGraph_kokkos() = default; - //@} - - const RCP > GetComm() const { - return domainMap_->getComm(); - } - const RCP GetDomainMap() const { - return domainMap_; - } - //! Return overlapping import map (nodes). - const RCP GetImportMap() const { - return importMap_; - } - - //! Return number of graph vertices - KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const { - return lclLWGraph_.GetNodeNumVertices(); - } - //! Return number of graph edges - KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const { - return lclLWGraph_.GetNodeNumEdges(); - } - - //! Returns the maximum number of entries across all rows/columns on this node - KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries () const { - return lclLWGraph_.getLocalMaxNumRowEntries(); - } - - /// Return a simple one-line description of the Graph. - std::string description() const { - return "LWGraph (" + objectLabel_ + ")"; - } - - //! Print the Graph with some verbosity level to an FancyOStream object. - void print(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const; - - local_lw_graph_type& getLocalLWGraph() const { - return lclLWGraph_; - } - - private: - - //! Underlying graph (with label) - mutable local_lw_graph_type lclLWGraph_; - - //! Graph maps - const RCP domainMap_; - const RCP importMap_; - - //! Name of this graph. - const std::string objectLabel_; - }; - -} + public: + //! @name Constructors/Destructors. + //@{ + + //! LWGraph constructor + // + // @param[in] graph: local graph of type Kokkos::StaticCrsGraph containing CRS data + // @param[in] domainMap: non-overlapping (domain) map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container + // @param[in] importMap: overlapping map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container + // @param[in] objectLabel: label string + LWGraph_kokkos(const local_graph_type& graph, + const RCP& domainMap, + const RCP& importMap, + const std::string& objectLabel = "") + : lclLWGraph_(graph, domainMap) + , domainMap_(domainMap) + , importMap_(importMap) + , objectLabel_(objectLabel) {} + + ~LWGraph_kokkos() = default; + //@} + + const RCP> GetComm() const { + return domainMap_->getComm(); + } + const RCP GetDomainMap() const { + return domainMap_; + } + //! Return overlapping import map (nodes). + const RCP GetImportMap() const { + return importMap_; + } + + //! Return number of graph vertices + KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const { + return lclLWGraph_.GetNodeNumVertices(); + } + //! Return number of graph edges + KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const { + return lclLWGraph_.GetNodeNumEdges(); + } + + //! Returns the maximum number of entries across all rows/columns on this node + KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries() const { + return lclLWGraph_.getLocalMaxNumRowEntries(); + } + + /// Return a simple one-line description of the Graph. + std::string description() const { + return "LWGraph (" + objectLabel_ + ")"; + } + + //! Print the Graph with some verbosity level to an FancyOStream object. + void print(Teuchos::FancyOStream& out, const VerbLevel verbLevel = Default) const; + + local_lw_graph_type& getLocalLWGraph() const { + return lclLWGraph_; + } + + private: + //! Underlying graph (with label) + mutable local_lw_graph_type lclLWGraph_; + + //! Graph maps + const RCP domainMap_; + const RCP importMap_; + + //! Name of this graph. + const std::string objectLabel_; +}; + +} // namespace MueLu #define MUELU_LWGRAPH_KOKKOS_SHORT -#endif // MUELU_LWGRAPH_KOKKOS_DECL_HPP +#endif // MUELU_LWGRAPH_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_def.hpp b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_def.hpp index 4d164f1b8f50..03a55d4fa301 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_def.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_def.hpp @@ -55,38 +55,37 @@ namespace MueLu { - template - void LWGraph_kokkos>:: - print(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { +template +void LWGraph_kokkos>:: + print(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { + if (verbLevel & Debug) { + auto graph = lclLWGraph_.getGraph(); + RCP col_map = importMap_.is_null() ? domainMap_ : importMap_; + int mypid = col_map->getComm()->getRank(); - if (verbLevel & Debug) { - auto graph = lclLWGraph_.getGraph(); - RCP col_map = importMap_.is_null() ? domainMap_ : importMap_; - int mypid = col_map->getComm()->getRank(); - - { + { std::ostringstream ss; ss << "[pid " << mypid << "] num entries=" << graph.entries.size(); out << ss.str() << std::endl; - } + } - const size_t numRows = graph.numRows(); - auto rowPtrs = graph.row_map; - auto columns = graph.entries; - for (size_t i=0; i < numRows; ++i) { - std::ostringstream ss; - ss << "[pid " << mypid << "] row " << domainMap_->getGlobalElement(i) << ":"; - ss << " (numEntries=" << rowPtrs(i+1)-rowPtrs(i) << ")"; + const size_t numRows = graph.numRows(); + auto rowPtrs = graph.row_map; + auto columns = graph.entries; + for (size_t i = 0; i < numRows; ++i) { + std::ostringstream ss; + ss << "[pid " << mypid << "] row " << domainMap_->getGlobalElement(i) << ":"; + ss << " (numEntries=" << rowPtrs(i + 1) - rowPtrs(i) << ")"; - auto rowView = graph.rowConst(i); - for (LO j = 0; j < rowView.length; j++) { - ss << " " << col_map->getGlobalElement(rowView.colidx(j)); - } - out << ss.str() << std::endl; + auto rowView = graph.rowConst(i); + for (LO j = 0; j < rowView.length; j++) { + ss << " " << col_map->getGlobalElement(rowView.colidx(j)); } + out << ss.str() << std::endl; } } +} -} //namespace MueLu +} // namespace MueLu -#endif // MUELU_LWGRAPH_KOKKOS_DEF_HPP +#endif // MUELU_LWGRAPH_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LinkedList.cpp b/packages/muelu/src/Graph/Containers/MueLu_LinkedList.cpp index 04192eee8d72..b1c15756ccb3 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LinkedList.cpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LinkedList.cpp @@ -43,51 +43,53 @@ // *********************************************************************** // // @HEADER -#include // for NULL +#include // for NULL #include "MueLu_LinkedList.hpp" namespace MueLu { - LinkedList::LinkedList() : nodeHead(NULL), nodeTail(NULL) { } +LinkedList::LinkedList() + : nodeHead(NULL) + , nodeTail(NULL) {} - LinkedList::~LinkedList() { - while (nodeHead != NULL) - DeleteHead(); - } +LinkedList::~LinkedList() { + while (nodeHead != NULL) + DeleteHead(); +} - bool LinkedList::IsEmpty() { - return nodeHead == NULL; - } +bool LinkedList::IsEmpty() { + return nodeHead == NULL; +} - void LinkedList::Add(int iNode) { - MueLu_Node *newNode = new MueLu_Node; - newNode->nodeId = iNode; - newNode->next = NULL; - if (nodeHead == NULL) { - nodeHead = newNode; - nodeTail = newNode; - } else { - nodeTail->next = newNode; - nodeTail = newNode; - } +void LinkedList::Add(int iNode) { + MueLu_Node *newNode = new MueLu_Node; + newNode->nodeId = iNode; + newNode->next = NULL; + if (nodeHead == NULL) { + nodeHead = newNode; + nodeTail = newNode; + } else { + nodeTail->next = newNode; + nodeTail = newNode; } +} - int LinkedList::Pop() { // get head and remove first node - if (IsEmpty()) return -1; - - int iNode = nodeHead->nodeId; - DeleteHead(); - return iNode; - } +int LinkedList::Pop() { // get head and remove first node + if (IsEmpty()) return -1; - void LinkedList::DeleteHead() { - if (IsEmpty()) return; + int iNode = nodeHead->nodeId; + DeleteHead(); + return iNode; +} - MueLu_Node *newNode = nodeHead; - nodeHead = newNode->next; - delete newNode; - } +void LinkedList::DeleteHead() { + if (IsEmpty()) return; + MueLu_Node *newNode = nodeHead; + nodeHead = newNode->next; + delete newNode; } -//TODO: nodeTail unused -> remove? +} // namespace MueLu + +// TODO: nodeTail unused -> remove? diff --git a/packages/muelu/src/Graph/Containers/MueLu_LinkedList.hpp b/packages/muelu/src/Graph/Containers/MueLu_LinkedList.hpp index ba8d95e30751..8546ec3ccfec 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LinkedList.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LinkedList.hpp @@ -52,33 +52,30 @@ namespace MueLu { - typedef struct MueLu_Node_Struct - { - int nodeId; - struct MueLu_Node_Struct *next; - } MueLu_Node; +typedef struct MueLu_Node_Struct { + int nodeId; + struct MueLu_Node_Struct *next; +} MueLu_Node; - class LinkedList { +class LinkedList { + public: + LinkedList(); - public: - LinkedList(); + ~LinkedList(); - ~LinkedList(); + bool IsEmpty(); - bool IsEmpty(); + void Add(int iNode); - void Add(int iNode); + int Pop(); - int Pop(); + private: + MueLu_Node *nodeHead; + MueLu_Node *nodeTail; - private: - MueLu_Node *nodeHead; - MueLu_Node *nodeTail; + void DeleteHead(); +}; - void DeleteHead(); +} // namespace MueLu - }; - -} - -#endif // MUELU_LINKEDLIST_HPP +#endif // MUELU_LINKEDLIST_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_decl.hpp b/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_decl.hpp index 294fe160e530..80f24eea4bbb 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_decl.hpp @@ -51,7 +51,7 @@ #include #include -#include // global_size_t +#include // global_size_t #include #include "MueLu_VerbosityLevel.hpp" @@ -61,125 +61,122 @@ namespace MueLu { - /*! - @class LocalLWGraph_kokkos - @brief Lightweight MueLu representation of a compressed row storage graph - - This class is lightweight in the sense that it holds to local graph - information. These were built without using fillComplete. - */ - template - class LocalLWGraph_kokkos; - - // Partial specialization for DeviceType - template - class LocalLWGraph_kokkos> { - public: - using local_ordinal_type = LocalOrdinal; - using global_ordinal_type = GlobalOrdinal; - using execution_space = typename DeviceType::execution_space; - using memory_space = typename DeviceType::memory_space; - using device_type = Kokkos::Device; - using range_type = Kokkos::RangePolicy; - using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; - using size_type = size_t; - - using local_graph_type = Kokkos::StaticCrsGraph; - using boundary_nodes_type = Kokkos::View; - using row_type = Kokkos::View; - using map_type = Xpetra::Map; - - private: - // For compatibility - typedef node_type Node; +/*! + @class LocalLWGraph_kokkos + @brief Lightweight MueLu representation of a compressed row storage graph + + This class is lightweight in the sense that it holds to local graph + information. These were built without using fillComplete. + */ +template +class LocalLWGraph_kokkos; + +// Partial specialization for DeviceType +template +class LocalLWGraph_kokkos> { + public: + using local_ordinal_type = LocalOrdinal; + using global_ordinal_type = GlobalOrdinal; + using execution_space = typename DeviceType::execution_space; + using memory_space = typename DeviceType::memory_space; + using device_type = Kokkos::Device; + using range_type = Kokkos::RangePolicy; + using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; + using size_type = size_t; + + using local_graph_type = Kokkos::StaticCrsGraph; + using boundary_nodes_type = Kokkos::View; + using row_type = Kokkos::View; + using map_type = Xpetra::Map; + + private: + // For compatibility + typedef node_type Node; #undef MUELU_LOCALLWGRAPH_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - - //! @name Constructors/Destructors. - //@{ - - //! LocalLWGraph constructor - // - // @param[in] graph: local graph of type Kokkos::StaticCrsGraph containing CRS data - LocalLWGraph_kokkos(const local_graph_type& graph, - const RCP& domainMap); - - ~LocalLWGraph_kokkos() = default; - //@} - - //! Return number of graph vertices - KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const { - return graph_.numRows(); - } - //! Return number of graph edges - KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const { - return graph_.row_map(GetNodeNumVertices()); - } - - //! Returns the maximum number of entries across all rows/columns on this node - KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries () const { - return maxNumRowEntries_; - } - - //! Return the row pointers of the local graph - KOKKOS_INLINE_FUNCTION typename local_graph_type::row_map_type getRowPtrs() const { - return graph_.row_map; - } - - //! Return the list entries in the local graph - KOKKOS_INLINE_FUNCTION typename local_graph_type::entries_type getEntries() const { - return graph_.entries; - } - - //! Return the list of vertices adjacent to the vertex 'v'. - // Unfortunately, C++11 does not support the following: - // auto getNeighborVertices(LO i) const -> decltype(rowView) - // auto return with decltype was only introduced in C++14 - KOKKOS_INLINE_FUNCTION - Kokkos::GraphRowViewConst getNeighborVertices(LO i) const { - auto rowView = graph_.rowConst(i); - - return rowView; - } - - //! Return true if vertex with local id 'v' is on current process. - KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const { - return i >= minLocalIndex_ && i <= maxLocalIndex_; - } - - //! Set boolean array indicating which rows correspond to Dirichlet boundaries. - KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry) { - dirichletBoundaries_ = bndry; - } - - //! Returns map with global ids of boundary nodes. - KOKKOS_INLINE_FUNCTION const boundary_nodes_type GetBoundaryNodeMap() const { - return dirichletBoundaries_; - } - - const local_graph_type& getGraph() const { - return graph_; - } - - private: - - //! Underlying graph (with label) - const local_graph_type graph_; - - //! Boolean array marking Dirichlet rows. - boundary_nodes_type dirichletBoundaries_; - - //! Local index boundaries (cached from domain map) - LO minLocalIndex_, maxLocalIndex_; - size_type maxNumRowEntries_; - - }; - -} + public: + //! @name Constructors/Destructors. + //@{ + + //! LocalLWGraph constructor + // + // @param[in] graph: local graph of type Kokkos::StaticCrsGraph containing CRS data + LocalLWGraph_kokkos(const local_graph_type& graph, + const RCP& domainMap); + + ~LocalLWGraph_kokkos() = default; + //@} + + //! Return number of graph vertices + KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const { + return graph_.numRows(); + } + //! Return number of graph edges + KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const { + return graph_.row_map(GetNodeNumVertices()); + } + + //! Returns the maximum number of entries across all rows/columns on this node + KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries() const { + return maxNumRowEntries_; + } + + //! Return the row pointers of the local graph + KOKKOS_INLINE_FUNCTION typename local_graph_type::row_map_type getRowPtrs() const { + return graph_.row_map; + } + + //! Return the list entries in the local graph + KOKKOS_INLINE_FUNCTION typename local_graph_type::entries_type getEntries() const { + return graph_.entries; + } + + //! Return the list of vertices adjacent to the vertex 'v'. + // Unfortunately, C++11 does not support the following: + // auto getNeighborVertices(LO i) const -> decltype(rowView) + // auto return with decltype was only introduced in C++14 + KOKKOS_INLINE_FUNCTION + Kokkos::GraphRowViewConst getNeighborVertices(LO i) const { + auto rowView = graph_.rowConst(i); + + return rowView; + } + + //! Return true if vertex with local id 'v' is on current process. + KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const { + return i >= minLocalIndex_ && i <= maxLocalIndex_; + } + + //! Set boolean array indicating which rows correspond to Dirichlet boundaries. + KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry) { + dirichletBoundaries_ = bndry; + } + + //! Returns map with global ids of boundary nodes. + KOKKOS_INLINE_FUNCTION const boundary_nodes_type GetBoundaryNodeMap() const { + return dirichletBoundaries_; + } + + const local_graph_type& getGraph() const { + return graph_; + } + + private: + //! Underlying graph (with label) + const local_graph_type graph_; + + //! Boolean array marking Dirichlet rows. + boundary_nodes_type dirichletBoundaries_; + + //! Local index boundaries (cached from domain map) + LO minLocalIndex_, maxLocalIndex_; + size_type maxNumRowEntries_; +}; + +} // namespace MueLu #define MUELU_LOCALLWGRAPH_KOKKOS_SHORT -#endif // MUELU_LOCALLWGRAPH_KOKKOS_DECL_HPP +#endif // MUELU_LOCALLWGRAPH_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_def.hpp b/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_def.hpp index 785706b1a002..2edf949396a4 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_def.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_def.hpp @@ -55,49 +55,49 @@ namespace MueLu { - namespace { // anonymous +namespace { // anonymous - template - class MaxNumRowEntriesFunctor { - public: - MaxNumRowEntriesFunctor(RowType rowPointers) : rowPointers_(rowPointers) { } +template +class MaxNumRowEntriesFunctor { + public: + MaxNumRowEntriesFunctor(RowType rowPointers) + : rowPointers_(rowPointers) {} - KOKKOS_INLINE_FUNCTION - void operator()(const LocalOrdinal i, size_t& maxLength) const { - size_t d = rowPointers_(i+1) - rowPointers_(i); + KOKKOS_INLINE_FUNCTION + void operator()(const LocalOrdinal i, size_t& maxLength) const { + size_t d = rowPointers_(i + 1) - rowPointers_(i); - maxLength = (d > maxLength ? d : maxLength); - } + maxLength = (d > maxLength ? d : maxLength); + } + + KOKKOS_INLINE_FUNCTION + void join(volatile size_t& dest, const volatile size_t& src) { + dest = (dest > src ? dest : src); + } - KOKKOS_INLINE_FUNCTION - void join(volatile size_t& dest, const volatile size_t& src) { - dest = (dest > src ? dest : src); - } + KOKKOS_INLINE_FUNCTION + void init(size_t& initValue) { + initValue = 0; + } - KOKKOS_INLINE_FUNCTION - void init(size_t& initValue) { - initValue = 0; - } + private: + RowType rowPointers_; +}; - private: - RowType rowPointers_; - }; +} // namespace - } +template +LocalLWGraph_kokkos>:: + LocalLWGraph_kokkos(const local_graph_type& graph, + const RCP& domainMap) + : graph_(graph) { + minLocalIndex_ = domainMap->getMinLocalIndex(); + maxLocalIndex_ = domainMap->getMaxLocalIndex(); - template - LocalLWGraph_kokkos>:: - LocalLWGraph_kokkos(const local_graph_type& graph, - const RCP& domainMap) - : graph_(graph) - { - minLocalIndex_ = domainMap->getMinLocalIndex(); - maxLocalIndex_ = domainMap->getMaxLocalIndex(); - - MaxNumRowEntriesFunctor maxNumRowEntriesFunctor(graph_.row_map); - Kokkos::parallel_reduce("MueLu:LocalLWGraph:LWGraph:maxnonzeros", range_type(0,graph_.numRows()), maxNumRowEntriesFunctor, maxNumRowEntries_); - } + MaxNumRowEntriesFunctor maxNumRowEntriesFunctor(graph_.row_map); + Kokkos::parallel_reduce("MueLu:LocalLWGraph:LWGraph:maxnonzeros", range_type(0, graph_.numRows()), maxNumRowEntriesFunctor, maxNumRowEntries_); +} -} //namespace MueLu +} // namespace MueLu -#endif // MUELU_LWGRAPH_KOKKOS_DEF_HPP +#endif // MUELU_LWGRAPH_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_Zoltan2GraphAdapter.hpp b/packages/muelu/src/Graph/Containers/MueLu_Zoltan2GraphAdapter.hpp index 655e8663550d..e8398e08809b 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_Zoltan2GraphAdapter.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_Zoltan2GraphAdapter.hpp @@ -61,70 +61,63 @@ #include #include "MueLu_GraphBase.hpp" - - // Zoltab2 InputTraits for MueLu Graph objects namespace Zoltan2 { template -struct InputTraits > -{ +struct InputTraits > { typedef Zoltan2::default_scalar_t scalar_t; - typedef LocalOrdinal lno_t; + typedef LocalOrdinal lno_t; typedef GlobalOrdinal gno_t; typedef size_t offset_t; - typedef Zoltan2::default_part_t part_t; - typedef Node node_t; - static inline std::string name() {return "MueLu::Graph";} + typedef Zoltan2::default_part_t part_t; + typedef Node node_t; + static inline std::string name() { return "MueLu::Graph"; } - Z2_STATIC_ASSERT_TYPES // validate the types + Z2_STATIC_ASSERT_TYPES // validate the types }; -}//end namespace Zoltan2 - +} // end namespace Zoltan2 namespace MueLu { -template -class MueLuGraphBaseAdapter : public Zoltan2::GraphAdapter { -public: - +template +class MueLuGraphBaseAdapter : public Zoltan2::GraphAdapter { + public: #ifndef DOXYGEN_SHOULD_SKIP_THIS - typedef typename Zoltan2::InputTraits::scalar_t scalar_t; - typedef typename Zoltan2::InputTraits::offset_t offset_t; - typedef typename Zoltan2::InputTraits::lno_t lno_t; - typedef typename Zoltan2::InputTraits::gno_t gno_t; - typedef typename Zoltan2::InputTraits::part_t part_t; - typedef typename Zoltan2::InputTraits::node_t node_t; + typedef typename Zoltan2::InputTraits::scalar_t scalar_t; + typedef typename Zoltan2::InputTraits::offset_t offset_t; + typedef typename Zoltan2::InputTraits::lno_t lno_t; + typedef typename Zoltan2::InputTraits::gno_t gno_t; + typedef typename Zoltan2::InputTraits::part_t part_t; + typedef typename Zoltan2::InputTraits::node_t node_t; typedef User xgraph_t; typedef User user_t; typedef UserCoord userCoord_t; #endif //! MueLu::GraphBase Compatibility Layer - const Teuchos::RCP< const Teuchos::Comm< int > > getComm() const { return graph_->GetComm();} - const Teuchos::RCP< const Xpetra::Map > getRowMap() const { return graph_->GetDomainMap();} - const RCP< const Xpetra::Map > getColMap() const { + const Teuchos::RCP > getComm() const { return graph_->GetComm(); } + const Teuchos::RCP > getRowMap() const { return graph_->GetDomainMap(); } + const RCP > getColMap() const { // For some GraphBases' this is a ColMap, in others it is a seperate map that is // only non-null in parallel. - Teuchos::RCP > map = graph_->GetImportMap(); - if(map.is_null()) map = graph_->GetDomainMap(); + Teuchos::RCP > map = graph_->GetImportMap(); + if (map.is_null()) map = graph_->GetDomainMap(); return map; } - size_t getLocalNumEntries() const { return graph_->GetNodeNumEdges();} - size_t getLocalNumRows() const { return getRowMap()->getLocalNumElements();} - size_t getLocalNumCols() const { return getColMap()->getLocalNumElements();} + size_t getLocalNumEntries() const { return graph_->GetNodeNumEdges(); } + size_t getLocalNumRows() const { return getRowMap()->getLocalNumElements(); } + size_t getLocalNumCols() const { return getColMap()->getLocalNumElements(); } - void getLocalRowView(lno_t LocalRow, Teuchos::ArrayView< const lno_t > &indices) const { - indices = graph_->getNeighborVertices(LocalRow); + void getLocalRowView(lno_t LocalRow, Teuchos::ArrayView &indices) const { + indices = graph_->getNeighborVertices(LocalRow); } - - /*! \brief Destructor */ - ~MueLuGraphBaseAdapter() { } + ~MueLuGraphBaseAdapter() {} /*! \brief Constructor for graph with no weights or coordinates. * \param ingraph the Epetra_CrsGraph, Tpetra::CrsGraph or Xpetra::CrsGraph @@ -135,8 +128,8 @@ class MueLuGraphBaseAdapter : public Zoltan2::GraphAdapter { * one does because the user is obviously a Trilinos user. */ - MueLuGraphBaseAdapter(const RCP &ingraph, - int nVtxWeights=0, int nEdgeWeights=0); + MueLuGraphBaseAdapter(const RCP &ingraph, + int nVtxWeights = 0, int nEdgeWeights = 0); /*! \brief Provide a pointer to weights for the primary entity type. * \param val A pointer to the weights for index \c idx. @@ -227,8 +220,7 @@ class MueLuGraphBaseAdapter : public Zoltan2::GraphAdapter { // TODO: Need to add option for columns or nonzeros? size_t getLocalNumVertices() const { return getLocalNumRows(); } - void getVertexIDsView(const gno_t *&ids) const - { + void getVertexIDsView(const gno_t *&ids) const { ids = NULL; if (getLocalNumVertices()) ids = getRowMap()->getLocalElementList().getRawPtr(); @@ -236,67 +228,57 @@ class MueLuGraphBaseAdapter : public Zoltan2::GraphAdapter { size_t getLocalNumEdges() const { return getLocalNumEntries(); } - void getEdgesView(const offset_t *&offsets, const gno_t *&adjIds) const - { + void getEdgesView(const offset_t *&offsets, const gno_t *&adjIds) const { offsets = offs_.getRawPtr(); - adjIds = (getLocalNumEdges() ? adjids_.getRawPtr() : NULL); + adjIds = (getLocalNumEdges() ? adjids_.getRawPtr() : NULL); } - int getNumWeightsPerVertex() const { return nWeightsPerVertex_;} + int getNumWeightsPerVertex() const { return nWeightsPerVertex_; } void getVertexWeightsView(const scalar_t *&weights, int &stride, - int idx) const - { - if(idx<0 || idx >= nWeightsPerVertex_) - { + int idx) const { + if (idx < 0 || idx >= nWeightsPerVertex_) { std::ostringstream emsg; emsg << __FILE__ << ":" << __LINE__ << " Invalid vertex weight index " << idx << std::endl; throw std::runtime_error(emsg.str()); } - size_t length; vertexWeights_[idx].getStridedList(length, weights, stride); } - bool useDegreeAsVertexWeight(int idx) const {return vertexDegreeWeight_[idx];} + bool useDegreeAsVertexWeight(int idx) const { return vertexDegreeWeight_[idx]; } - int getNumWeightsPerEdge() const { return nWeightsPerEdge_;} + int getNumWeightsPerEdge() const { return nWeightsPerEdge_; } - void getEdgeWeightsView(const scalar_t *&weights, int &stride, int idx) const - { - if(idx<0 || idx >= nWeightsPerEdge_) - { + void getEdgeWeightsView(const scalar_t *&weights, int &stride, int idx) const { + if (idx < 0 || idx >= nWeightsPerEdge_) { std::ostringstream emsg; emsg << __FILE__ << ":" << __LINE__ << " Invalid edge weight index " << idx << std::endl; throw std::runtime_error(emsg.str()); } - size_t length; edgeWeights_[idx].getStridedList(length, weights, stride); } - template void applyPartitioningSolution(const User &in, User *&out, const Zoltan2::PartitioningSolution &solution) const { - TEUCHOS_TEST_FOR_EXCEPTION(1, std::invalid_argument,"applyPartitionlingSolution not implemeneted"); -} + TEUCHOS_TEST_FOR_EXCEPTION(1, std::invalid_argument, "applyPartitionlingSolution not implemeneted"); + } template void applyPartitioningSolution(const User &in, RCP &out, const Zoltan2::PartitioningSolution &solution) const { - TEUCHOS_TEST_FOR_EXCEPTION(1, std::invalid_argument,"applyPartitionlingSolution not implemeneted"); + TEUCHOS_TEST_FOR_EXCEPTION(1, std::invalid_argument, "applyPartitionlingSolution not implemeneted"); } - -private: - - RCP ingraph_; - RCP graph_; + private: + RCP ingraph_; + RCP graph_; RCP > comm_; ArrayRCP offs_; @@ -311,67 +293,69 @@ class MueLuGraphBaseAdapter : public Zoltan2::GraphAdapter { int coordinateDim_; ArrayRCP > coords_; - }; - ///////////////////////////////////////////////////////////////// // Definitions ///////////////////////////////////////////////////////////////// template - MueLuGraphBaseAdapter::MueLuGraphBaseAdapter( - const RCP &ingraph, int nVtxWgts, int nEdgeWgts): - ingraph_(ingraph), graph_(), comm_() , offs_(), adjids_(), - nWeightsPerVertex_(nVtxWgts), vertexWeights_(), vertexDegreeWeight_(), - nWeightsPerEdge_(nEdgeWgts), edgeWeights_(), - coordinateDim_(0), coords_() -{ - typedef Zoltan2::StridedData input_t; +MueLuGraphBaseAdapter::MueLuGraphBaseAdapter( + const RCP &ingraph, int nVtxWgts, int nEdgeWgts) + : ingraph_(ingraph) + , graph_() + , comm_() + , offs_() + , adjids_() + , nWeightsPerVertex_(nVtxWgts) + , vertexWeights_() + , vertexDegreeWeight_() + , nWeightsPerEdge_(nEdgeWgts) + , edgeWeights_() + , coordinateDim_(0) + , coords_() { + typedef Zoltan2::StridedData input_t; graph_ = ingraph; - comm_ = getRowMap()->getComm(); - size_t nvtx = getLocalNumRows(); + comm_ = getRowMap()->getComm(); + size_t nvtx = getLocalNumRows(); size_t nedges = getLocalNumEntries(); // Unfortunately we have to copy the offsets and edge Ids // because edge Ids are not usually stored in vertex id order. size_t n = nvtx + 1; offs_.resize(n); - offset_t* offs = const_cast(offs_.getRawPtr()); - gno_t* adjids=0; - if(nedges > 0) { + offset_t *offs = const_cast(offs_.getRawPtr()); + gno_t *adjids = 0; + if (nedges > 0) { adjids_.resize(nedges); - adjids = const_cast(adjids_.getRawPtr()); + adjids = const_cast(adjids_.getRawPtr()); } offs[0] = 0; - for (size_t v=0; v < nvtx; v++){ + for (size_t v = 0; v < nvtx; v++) { ArrayView nbors; getLocalRowView(v, nbors); - offs[v+1] = offs[v] + nbors.size(); - for (offset_t e=offs[v], i=0; e < offs[v+1]; e++) { + offs[v + 1] = offs[v] + nbors.size(); + for (offset_t e = offs[v], i = 0; e < offs[v + 1]; e++) { adjids[e] = getColMap()->getGlobalElement(nbors[i++]); } } if (nWeightsPerVertex_ > 0) { vertexWeights_ = - arcp(new input_t[nWeightsPerVertex_], 0, nWeightsPerVertex_, true); + arcp(new input_t[nWeightsPerVertex_], 0, nWeightsPerVertex_, true); vertexDegreeWeight_ = - arcp(new bool[nWeightsPerVertex_], 0, nWeightsPerVertex_, true); - for (int i=0; i < nWeightsPerVertex_; i++) + arcp(new bool[nWeightsPerVertex_], 0, nWeightsPerVertex_, true); + for (int i = 0; i < nWeightsPerVertex_; i++) vertexDegreeWeight_[i] = false; } - - } //////////////////////////////////////////////////////////////////////////// template - void MueLuGraphBaseAdapter::setWeights( - const scalar_t *weightVal, int stride, int idx) -{ +void MueLuGraphBaseAdapter::setWeights( + const scalar_t *weightVal, int stride, int idx) { if (this->getPrimaryEntityType() == Zoltan2::GRAPH_VERTEX) setVertexWeights(weightVal, stride, idx); else @@ -380,29 +364,26 @@ template //////////////////////////////////////////////////////////////////////////// template - void MueLuGraphBaseAdapter::setVertexWeights( - const scalar_t *weightVal, int stride, int idx) -{ - typedef Zoltan2::StridedData input_t; +void MueLuGraphBaseAdapter::setVertexWeights( + const scalar_t *weightVal, int stride, int idx) { + typedef Zoltan2::StridedData input_t; - if(idx<0 || idx >= nWeightsPerVertex_) - { - std::ostringstream emsg; - emsg << __FILE__ << ":" << __LINE__ - << " Invalid vertex weight index " << idx << std::endl; - throw std::runtime_error(emsg.str()); + if (idx < 0 || idx >= nWeightsPerVertex_) { + std::ostringstream emsg; + emsg << __FILE__ << ":" << __LINE__ + << " Invalid vertex weight index " << idx << std::endl; + throw std::runtime_error(emsg.str()); } size_t nvtx = getLocalNumVertices(); - ArrayRCP weightV(weightVal, 0, nvtx*stride, false); + ArrayRCP weightV(weightVal, 0, nvtx * stride, false); vertexWeights_[idx] = input_t(weightV, stride); } //////////////////////////////////////////////////////////////////////////// template - void MueLuGraphBaseAdapter::setWeightIsDegree( - int idx) -{ +void MueLuGraphBaseAdapter::setWeightIsDegree( + int idx) { if (this->getPrimaryEntityType() == Zoltan2::GRAPH_VERTEX) setVertexWeightIsDegree(idx); else { @@ -416,15 +397,13 @@ template //////////////////////////////////////////////////////////////////////////// template - void MueLuGraphBaseAdapter::setVertexWeightIsDegree( - int idx) -{ - if(idx<0 || idx >= nWeightsPerVertex_) - { - std::ostringstream emsg; - emsg << __FILE__ << ":" << __LINE__ - << " Invalid vertex weight index " << idx << std::endl; - throw std::runtime_error(emsg.str()); +void MueLuGraphBaseAdapter::setVertexWeightIsDegree( + int idx) { + if (idx < 0 || idx >= nWeightsPerVertex_) { + std::ostringstream emsg; + emsg << __FILE__ << ":" << __LINE__ + << " Invalid vertex weight index " << idx << std::endl; + throw std::runtime_error(emsg.str()); } vertexDegreeWeight_[idx] = true; @@ -432,28 +411,24 @@ template //////////////////////////////////////////////////////////////////////////// template - void MueLuGraphBaseAdapter::setEdgeWeights( - const scalar_t *weightVal, int stride, int idx) -{ - typedef Zoltan2::StridedData input_t; +void MueLuGraphBaseAdapter::setEdgeWeights( + const scalar_t *weightVal, int stride, int idx) { + typedef Zoltan2::StridedData input_t; - if(idx<0 || idx >= nWeightsPerEdge_) - { - std::ostringstream emsg; - emsg << __FILE__ << ":" << __LINE__ - << " Invalid edge weight index " << idx << std::endl; - throw std::runtime_error(emsg.str()); + if (idx < 0 || idx >= nWeightsPerEdge_) { + std::ostringstream emsg; + emsg << __FILE__ << ":" << __LINE__ + << " Invalid edge weight index " << idx << std::endl; + throw std::runtime_error(emsg.str()); } size_t nedges = getLocalNumEdges(); - ArrayRCP weightV(weightVal, 0, nedges*stride, false); + ArrayRCP weightV(weightVal, 0, nedges * stride, false); edgeWeights_[idx] = input_t(weightV, stride); } +} // namespace MueLu -} //namespace MueLu - +#endif // HAVE_MUELU_ZOLTAN2 -#endif// HAVE_MUELU_ZOLTAN2 - #endif diff --git a/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_decl.hpp b/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_decl.hpp index 879332ca0d44..4e2c5302bbd1 100644 --- a/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_decl.hpp +++ b/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_decl.hpp @@ -46,7 +46,6 @@ #ifndef MUELU_HYBRIDAGGREGATIONFACTORY_DECL_HPP_ #define MUELU_HYBRIDAGGREGATIONFACTORY_DECL_HPP_ - #include #include "MueLu_ConfigDefs.hpp" @@ -129,59 +128,58 @@ namespace MueLu { | Aggregates | HybridAggregationFactory | Container class with aggregation information. See also Aggregates. */ - template - class HybridAggregationFactory : public SingleLevelFactoryBase { +template +class HybridAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_HYBRIDAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - HybridAggregationFactory(); + public: + //! @name Constructors/Destructors. + //@{ - //! Destructor. - virtual ~HybridAggregationFactory() { } + //! Constructor. + HybridAggregationFactory(); - RCP GetValidParameterList() const; + //! Destructor. + virtual ~HybridAggregationFactory() {} - //@} + RCP GetValidParameterList() const; - //! Input - //@{ + //@} - void DeclareInput(Level ¤tLevel) const; + //! Input + //@{ - //@} + void DeclareInput(Level& currentLevel) const; - //! @name Build methods. - //@{ + //@} - /*! @brief Build aggregates. */ - void Build(Level ¤tLevel) const; + //! @name Build methods. + //@{ - /*! @brief Specifically build aggregates along interfaces */ - void BuildInterfaceAggregates(Level& currentLevel, RCP aggregates, - std::vector& aggStat, LO& numNonAggregatedNodes, - Array coarseRate) const; + /*! @brief Build aggregates. */ + void Build(Level& currentLevel) const; - //@} + /*! @brief Specifically build aggregates along interfaces */ + void BuildInterfaceAggregates(Level& currentLevel, RCP aggregates, + std::vector& aggStat, LO& numNonAggregatedNodes, + Array coarseRate) const; - private: + //@} - //! aggregation algorithms - // will be filled in Build routine - mutable std::vector > > algos_; + private: + //! aggregation algorithms + // will be filled in Build routine + mutable std::vector > > algos_; - //! boolean flag: definition phase - //! if true, the aggregation algorithms still can be set and changed. - //! if false, no change in aggregation algorithms is possible any more - mutable bool bDefinitionPhase_; + //! boolean flag: definition phase + //! if true, the aggregation algorithms still can be set and changed. + //! if false, no change in aggregation algorithms is possible any more + mutable bool bDefinitionPhase_; - }; // class HybridAggregationFactory +}; // class HybridAggregationFactory -} +} // namespace MueLu #define MUELU_HYBRIDAGGREGATIONFACTORY_SHORT #endif /* MUELU_HYBRIDAGGREGATIONFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_def.hpp b/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_def.hpp index d0ac7bcc7496..36ba2ec572f8 100644 --- a/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_def.hpp @@ -77,265 +77,258 @@ #include "MueLu_MasterList.hpp" #include "MueLu_Monitor.hpp" - namespace MueLu { - template - HybridAggregationFactory:: - HybridAggregationFactory() : bDefinitionPhase_(true) - { } +template +HybridAggregationFactory:: + HybridAggregationFactory() + : bDefinitionPhase_(true) {} - template - RCP HybridAggregationFactory:: - GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP HybridAggregationFactory:: + GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; + typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - // From UncoupledAggregationFactory - SET_VALID_ENTRY("aggregation: max agg size"); - SET_VALID_ENTRY("aggregation: min agg size"); - SET_VALID_ENTRY("aggregation: max selected neighbors"); - SET_VALID_ENTRY("aggregation: ordering"); - validParamList->getEntry("aggregation: ordering").setValidator( - rcp(new validatorType(Teuchos::tuple("natural", "graph", "random"), "aggregation: ordering"))); - SET_VALID_ENTRY("aggregation: enable phase 1"); - SET_VALID_ENTRY("aggregation: enable phase 2a"); - SET_VALID_ENTRY("aggregation: enable phase 2b"); - SET_VALID_ENTRY("aggregation: enable phase 3"); - SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); - SET_VALID_ENTRY("aggregation: allow user-specified singletons"); - SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); - SET_VALID_ENTRY("aggregation: match ML phase2a"); - SET_VALID_ENTRY("aggregation: phase2a agg factor"); - SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); - - // From StructuredAggregationFactory - SET_VALID_ENTRY("aggregation: coarsening rate"); - SET_VALID_ENTRY("aggregation: coarsening order"); - SET_VALID_ENTRY("aggregation: number of spatial dimensions"); - - // From HybridAggregationFactory - SET_VALID_ENTRY("aggregation: use interface aggregation"); -#undef SET_VALID_ENTRY - - /* From UncoupledAggregation */ - // general variables needed in AggregationFactory - validParamList->set< RCP >("Graph", null, "Generating factory of the graph"); - validParamList->set< RCP >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); - // special variables necessary for OnePtAggregationAlgorithm - validParamList->set ("OnePt aggregate map name", "", - "Name of input map for single node aggregates. (default='')"); - validParamList->set ("OnePt aggregate map factory", "", - "Generating factory of (DOF) map for single node aggregates."); - - // InterfaceAggregation parameters - validParamList->set ("Interface aggregate map name", "", - "Name of input map for interface aggregates. (default='')"); - validParamList->set ("Interface aggregate map factory", "", - "Generating factory of (DOF) map for interface aggregates."); - validParamList->set > ("interfacesDimensions", Teuchos::null, - "Describes the dimensions of all the interfaces on this rank."); - validParamList->set > ("nodeOnInterface", Teuchos::null, - "List the LIDs of the nodes on any interface."); - - /* From StructuredAggregation */ - // general variables needed in AggregationFactory - validParamList->set >("numDimensions", Teuchos::null, - "Number of spatial dimension provided by CoordinatesTransferFactory."); - validParamList->set >("lNodesPerDim", Teuchos::null, - "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory."); - - - // Hybrid Aggregation Params - validParamList->set > ("aggregationRegionType", Teuchos::null, - "Type of aggregation to use on the region (\"structured\" or \"uncoupled\")"); - - return validParamList; + // From UncoupledAggregationFactory + SET_VALID_ENTRY("aggregation: max agg size"); + SET_VALID_ENTRY("aggregation: min agg size"); + SET_VALID_ENTRY("aggregation: max selected neighbors"); + SET_VALID_ENTRY("aggregation: ordering"); + validParamList->getEntry("aggregation: ordering").setValidator(rcp(new validatorType(Teuchos::tuple("natural", "graph", "random"), "aggregation: ordering"))); + SET_VALID_ENTRY("aggregation: enable phase 1"); + SET_VALID_ENTRY("aggregation: enable phase 2a"); + SET_VALID_ENTRY("aggregation: enable phase 2b"); + SET_VALID_ENTRY("aggregation: enable phase 3"); + SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); + SET_VALID_ENTRY("aggregation: allow user-specified singletons"); + SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); + SET_VALID_ENTRY("aggregation: match ML phase2a"); + SET_VALID_ENTRY("aggregation: phase2a agg factor"); + SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); + + // From StructuredAggregationFactory + SET_VALID_ENTRY("aggregation: coarsening rate"); + SET_VALID_ENTRY("aggregation: coarsening order"); + SET_VALID_ENTRY("aggregation: number of spatial dimensions"); + + // From HybridAggregationFactory + SET_VALID_ENTRY("aggregation: use interface aggregation"); +#undef SET_VALID_ENTRY + + /* From UncoupledAggregation */ + // general variables needed in AggregationFactory + validParamList->set >("Graph", null, "Generating factory of the graph"); + validParamList->set >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); + // special variables necessary for OnePtAggregationAlgorithm + validParamList->set("OnePt aggregate map name", "", + "Name of input map for single node aggregates. (default='')"); + validParamList->set("OnePt aggregate map factory", "", + "Generating factory of (DOF) map for single node aggregates."); + + // InterfaceAggregation parameters + validParamList->set("Interface aggregate map name", "", + "Name of input map for interface aggregates. (default='')"); + validParamList->set("Interface aggregate map factory", "", + "Generating factory of (DOF) map for interface aggregates."); + validParamList->set >("interfacesDimensions", Teuchos::null, + "Describes the dimensions of all the interfaces on this rank."); + validParamList->set >("nodeOnInterface", Teuchos::null, + "List the LIDs of the nodes on any interface."); + + /* From StructuredAggregation */ + // general variables needed in AggregationFactory + validParamList->set >("numDimensions", Teuchos::null, + "Number of spatial dimension provided by CoordinatesTransferFactory."); + validParamList->set >("lNodesPerDim", Teuchos::null, + "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory."); + + // Hybrid Aggregation Params + validParamList->set >("aggregationRegionType", Teuchos::null, + "Type of aggregation to use on the region (\"structured\" or \"uncoupled\")"); + + return validParamList; +} + +template +void HybridAggregationFactory:: + DeclareInput(Level& currentLevel) const { + Input(currentLevel, "Graph"); + + ParameterList pL = GetParameterList(); + + /* StructuredAggregation */ + + // Request the local number of nodes per dimensions + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("aggregationRegionType", NoFactory::get())) { + currentLevel.DeclareInput("aggregationRegionType", NoFactory::get(), this); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(!currentLevel.IsAvailable("aggregationRegionType", NoFactory::get()), + Exceptions::RuntimeError, + "Aggregation region type was not provided by the user!"); + } + if (currentLevel.IsAvailable("numDimensions", NoFactory::get())) { + currentLevel.DeclareInput("numDimensions", NoFactory::get(), this); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("numDimensions", NoFactory::get()), + Exceptions::RuntimeError, + "numDimensions was not provided by the user on level0!"); + } + if (currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { + currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get()), + Exceptions::RuntimeError, + "lNodesPerDim was not provided by the user on level0!"); + } + } else { + Input(currentLevel, "aggregationRegionType"); + Input(currentLevel, "numDimensions"); + Input(currentLevel, "lNodesPerDim"); } - template - void HybridAggregationFactory:: - DeclareInput(Level& currentLevel) const { - Input(currentLevel, "Graph"); - - ParameterList pL = GetParameterList(); - - + /* UncoupledAggregation */ + Input(currentLevel, "DofsPerNode"); - /* StructuredAggregation */ - - // Request the local number of nodes per dimensions - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("aggregationRegionType", NoFactory::get())) { - currentLevel.DeclareInput("aggregationRegionType", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(!currentLevel.IsAvailable("aggregationRegionType",NoFactory::get()), - Exceptions::RuntimeError, - "Aggregation region type was not provided by the user!"); - } - if(currentLevel.IsAvailable("numDimensions", NoFactory::get())) { - currentLevel.DeclareInput("numDimensions", NoFactory::get(), this); + // request special data necessary for InterfaceAggregation + if (pL.get("aggregation: use interface aggregation") == true) { + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("interfacesDimensions", NoFactory::get())) { + currentLevel.DeclareInput("interfacesDimensions", NoFactory::get(), this); } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("numDimensions", NoFactory::get()), + TEUCHOS_TEST_FOR_EXCEPTION(!currentLevel.IsAvailable("interfacesDimensions", NoFactory::get()), Exceptions::RuntimeError, - "numDimensions was not provided by the user on level0!"); + "interfacesDimensions was not provided by the user on level0!"); } - if(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { - currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); + if (currentLevel.IsAvailable("nodeOnInterface", NoFactory::get())) { + currentLevel.DeclareInput("nodeOnInterface", NoFactory::get(), this); } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get()), + TEUCHOS_TEST_FOR_EXCEPTION(!currentLevel.IsAvailable("nodeOnInterface", NoFactory::get()), Exceptions::RuntimeError, - "lNodesPerDim was not provided by the user on level0!"); + "nodeOnInterface was not provided by the user on level0!"); } } else { - Input(currentLevel, "aggregationRegionType"); - Input(currentLevel, "numDimensions"); - Input(currentLevel, "lNodesPerDim"); - } - - - - /* UncoupledAggregation */ - Input(currentLevel, "DofsPerNode"); - - // request special data necessary for InterfaceAggregation - if (pL.get("aggregation: use interface aggregation") == true){ - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("interfacesDimensions", NoFactory::get())) { - currentLevel.DeclareInput("interfacesDimensions", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(!currentLevel.IsAvailable("interfacesDimensions", NoFactory::get()), - Exceptions::RuntimeError, - "interfacesDimensions was not provided by the user on level0!"); - } - if(currentLevel.IsAvailable("nodeOnInterface", NoFactory::get())) { - currentLevel.DeclareInput("nodeOnInterface", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(!currentLevel.IsAvailable("nodeOnInterface", NoFactory::get()), - Exceptions::RuntimeError, - "nodeOnInterface was not provided by the user on level0!"); - } - } else { - Input(currentLevel, "interfacesDimensions"); - Input(currentLevel, "nodeOnInterface"); - } + Input(currentLevel, "interfacesDimensions"); + Input(currentLevel, "nodeOnInterface"); } + } - // request special data necessary for OnePtAggregationAlgorithm - std::string mapOnePtName = pL.get("OnePt aggregate map name"); - if (mapOnePtName.length() > 0) { - std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); - if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { - currentLevel.DeclareInput(mapOnePtName, NoFactory::get()); - } else { - RCP mapOnePtFact = GetFactory(mapOnePtFactName); - currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get()); - } + // request special data necessary for OnePtAggregationAlgorithm + std::string mapOnePtName = pL.get("OnePt aggregate map name"); + if (mapOnePtName.length() > 0) { + std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); + if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { + currentLevel.DeclareInput(mapOnePtName, NoFactory::get()); + } else { + RCP mapOnePtFact = GetFactory(mapOnePtFactName); + currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get()); } - } // DeclareInput() + } +} // DeclareInput() + +template +void HybridAggregationFactory:: + Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + RCP out; + if (const char* dbg = std::getenv("MUELU_HYBRIDAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } - template - void HybridAggregationFactory:: - Build(Level ¤tLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); + *out << "Entering hybrid aggregation" << std::endl; - RCP out; - if(const char* dbg = std::getenv("MUELU_HYBRIDAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } + ParameterList pL = GetParameterList(); + bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed - *out << "Entering hybrid aggregation" << std::endl; + if (pL.get("aggregation: max agg size") == -1) + pL.set("aggregation: max agg size", INT_MAX); - ParameterList pL = GetParameterList(); - bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed + // define aggregation algorithms + RCP graphFact = GetFactory("Graph"); - if (pL.get("aggregation: max agg size") == -1) - pL.set("aggregation: max agg size", INT_MAX); + // General problem informations are gathered from data stored in the problem matix. + RCP graph = Get >(currentLevel, "Graph"); + RCP fineMap = graph->GetDomainMap(); + const int myRank = fineMap->getComm()->getRank(); + const int numRanks = fineMap->getComm()->getSize(); - // define aggregation algorithms - RCP graphFact = GetFactory("Graph"); + out->setProcRankAndSize(graph->GetImportMap()->getComm()->getRank(), + graph->GetImportMap()->getComm()->getSize()); - // General problem informations are gathered from data stored in the problem matix. - RCP graph = Get< RCP >(currentLevel, "Graph"); - RCP fineMap = graph->GetDomainMap(); - const int myRank = fineMap->getComm()->getRank(); - const int numRanks = fineMap->getComm()->getSize(); + // Build aggregates + RCP aggregates = rcp(new Aggregates(*graph)); + aggregates->setObjectLabel("HB"); - out->setProcRankAndSize(graph->GetImportMap()->getComm()->getRank(), - graph->GetImportMap()->getComm()->getSize()); + // construct aggStat information + const LO numRows = graph->GetNodeNumVertices(); + std::vector aggStat(numRows, READY); - // Build aggregates - RCP aggregates = rcp(new Aggregates(*graph)); - aggregates->setObjectLabel("HB"); + // Get aggregation type for region + std::string regionType; + if (currentLevel.GetLevelID() == 0) { + // On level 0, data is provided by applications and has no associated factory. + regionType = currentLevel.Get("aggregationRegionType", NoFactory::get()); + } else { + // On level > 0, data is provided directly by generating factories. + regionType = Get(currentLevel, "aggregationRegionType"); + } - // construct aggStat information - const LO numRows = graph->GetNodeNumVertices(); - std::vector aggStat(numRows, READY); + int numDimensions = 0; + if (currentLevel.GetLevelID() == 0) { + // On level 0, data is provided by applications and has no associated factory. + numDimensions = currentLevel.Get("numDimensions", NoFactory::get()); + } else { + // On level > 0, data is provided directly by generating factories. + numDimensions = Get(currentLevel, "numDimensions"); + } - // Get aggregation type for region - std::string regionType; - if(currentLevel.GetLevelID() == 0) { + // Get the coarsening rate (potentially used for both structured and uncoupled aggregation if interface) + std::string coarseningRate = pL.get("aggregation: coarsening rate"); + Teuchos::Array coarseRate; + try { + coarseRate = Teuchos::fromStringToArray(coarseningRate); + } catch (const Teuchos::InvalidArrayStringRepresentation& e) { + GetOStream(Errors, -1) << " *** \"aggregation: coarsening rate\" must be a string convertible into an array! *** " + << std::endl; + throw e; + } + TEUCHOS_TEST_FOR_EXCEPTION((coarseRate.size() > 1) && (coarseRate.size() < numDimensions), + Exceptions::RuntimeError, + "\"aggregation: coarsening rate\" must have at least as many" + " components as the number of spatial dimensions in the problem."); + + algos_.clear(); + LO numNonAggregatedNodes = numRows; + if (regionType == "structured") { + // Add AggregationStructuredAlgorithm + algos_.push_back(rcp(new AggregationStructuredAlgorithm(graphFact))); + + // Since we want to operate on nodes and not dof, we need to modify the rowMap in order to + // obtain a nodeMap. + const int interpolationOrder = pL.get("aggregation: coarsening order"); + Array lFineNodesPerDir(3); + if (currentLevel.GetLevelID() == 0) { // On level 0, data is provided by applications and has no associated factory. - regionType = currentLevel.Get("aggregationRegionType", NoFactory::get()); + lFineNodesPerDir = currentLevel.Get >("lNodesPerDim", NoFactory::get()); } else { // On level > 0, data is provided directly by generating factories. - regionType = Get< std::string >(currentLevel, "aggregationRegionType"); + lFineNodesPerDir = Get >(currentLevel, "lNodesPerDim"); } - int numDimensions = 0; - if(currentLevel.GetLevelID() == 0) { - // On level 0, data is provided by applications and has no associated factory. - numDimensions = currentLevel.Get("numDimensions", NoFactory::get()); - } else { - // On level > 0, data is provided directly by generating factories. - numDimensions = Get(currentLevel, "numDimensions"); + // Set lFineNodesPerDir to 1 for directions beyond numDimensions + for (int dim = numDimensions; dim < 3; ++dim) { + lFineNodesPerDir[dim] = 1; } - // Get the coarsening rate (potentially used for both structured and uncoupled aggregation if interface) - std::string coarseningRate = pL.get("aggregation: coarsening rate"); - Teuchos::Array coarseRate; - try { - coarseRate = Teuchos::fromStringToArray(coarseningRate); - } catch(const Teuchos::InvalidArrayStringRepresentation& e) { - GetOStream(Errors,-1) << " *** \"aggregation: coarsening rate\" must be a string convertible into an array! *** " - << std::endl; - throw e; - } - TEUCHOS_TEST_FOR_EXCEPTION((coarseRate.size() > 1) && (coarseRate.size() < numDimensions), - Exceptions::RuntimeError, - "\"aggregation: coarsening rate\" must have at least as many" - " components as the number of spatial dimensions in the problem."); - - algos_.clear(); - LO numNonAggregatedNodes = numRows; - if (regionType == "structured") { - // Add AggregationStructuredAlgorithm - algos_.push_back(rcp(new AggregationStructuredAlgorithm(graphFact))); - - // Since we want to operate on nodes and not dof, we need to modify the rowMap in order to - // obtain a nodeMap. - const int interpolationOrder = pL.get("aggregation: coarsening order"); - Array lFineNodesPerDir(3); - if(currentLevel.GetLevelID() == 0) { - // On level 0, data is provided by applications and has no associated factory. - lFineNodesPerDir = currentLevel.Get >("lNodesPerDim", NoFactory::get()); - } else { - // On level > 0, data is provided directly by generating factories. - lFineNodesPerDir = Get >(currentLevel, "lNodesPerDim"); - } - - // Set lFineNodesPerDir to 1 for directions beyond numDimensions - for(int dim = numDimensions; dim < 3; ++dim) { - lFineNodesPerDir[dim] = 1; - } - - // Now that we have extracted info from the level, create the IndexManager - RCP > geoData; - geoData = rcp(new MueLu::UncoupledIndexManager(fineMap->getComm(), + // Now that we have extracted info from the level, create the IndexManager + RCP > geoData; + geoData = rcp(new MueLu::UncoupledIndexManager(fineMap->getComm(), false, numDimensions, interpolationOrder, @@ -345,241 +338,245 @@ namespace MueLu { lFineNodesPerDir, coarseRate, false)); - TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getLocalNumElements() - != static_cast(geoData->getNumLocalFineNodes()), - Exceptions::RuntimeError, - "The local number of elements in the graph's map is not equal to " - "the number of nodes given by: lNodesPerDim!"); - - aggregates->SetIndexManager(geoData); - aggregates->SetNumAggregates(geoData->getNumLocalCoarseNodes()); - - Set(currentLevel, "lCoarseNodesPerDim", geoData->getLocalCoarseNodesPerDir()); - - } // end structured aggregation setup - - if (regionType == "uncoupled"){ - // Add unstructred aggregation phases - algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm(graphFact))); - if (pL.get("aggregation: use interface aggregation") == true) algos_.push_back(rcp(new InterfaceAggregationAlgorithm (graphFact))); - if (pL.get("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm (graphFact))); - if (pL.get("aggregation: enable phase 1" ) == true) algos_.push_back(rcp(new AggregationPhase1Algorithm (graphFact))); - if (pL.get("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm (graphFact))); - if (pL.get("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm (graphFact))); - if (pL.get("aggregation: enable phase 3" ) == true) algos_.push_back(rcp(new AggregationPhase3Algorithm (graphFact))); - - *out << " Build interface aggregates" << std::endl; - // interface - if (pL.get("aggregation: use interface aggregation") == true) { - BuildInterfaceAggregates(currentLevel, aggregates, aggStat, numNonAggregatedNodes, - coarseRate); - } + TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getLocalNumElements() != static_cast(geoData->getNumLocalFineNodes()), + Exceptions::RuntimeError, + "The local number of elements in the graph's map is not equal to " + "the number of nodes given by: lNodesPerDim!"); + + aggregates->SetIndexManager(geoData); + aggregates->SetNumAggregates(geoData->getNumLocalCoarseNodes()); + + Set(currentLevel, "lCoarseNodesPerDim", geoData->getLocalCoarseNodesPerDir()); + + } // end structured aggregation setup + + if (regionType == "uncoupled") { + // Add unstructred aggregation phases + algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm(graphFact))); + if (pL.get("aggregation: use interface aggregation") == true) algos_.push_back(rcp(new InterfaceAggregationAlgorithm(graphFact))); + if (pL.get("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm(graphFact))); + if (pL.get("aggregation: enable phase 1") == true) algos_.push_back(rcp(new AggregationPhase1Algorithm(graphFact))); + if (pL.get("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm(graphFact))); + if (pL.get("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm(graphFact))); + if (pL.get("aggregation: enable phase 3") == true) algos_.push_back(rcp(new AggregationPhase3Algorithm(graphFact))); + + *out << " Build interface aggregates" << std::endl; + // interface + if (pL.get("aggregation: use interface aggregation") == true) { + BuildInterfaceAggregates(currentLevel, aggregates, aggStat, numNonAggregatedNodes, + coarseRate); + } - *out << "Treat Dirichlet BC" << std::endl; - // Dirichlet boundary - ArrayRCP dirichletBoundaryMap = graph->GetBoundaryNodeMap(); - if (dirichletBoundaryMap != Teuchos::null) - for (LO i = 0; i < numRows; i++) - if (dirichletBoundaryMap[i] == true) - aggStat[i] = BOUNDARY; - - // OnePt aggregation - std::string mapOnePtName = pL.get("OnePt aggregate map name"); - RCP OnePtMap = Teuchos::null; - if (mapOnePtName.length()) { - std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); - if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { - OnePtMap = currentLevel.Get >(mapOnePtName, NoFactory::get()); - } else { - RCP mapOnePtFact = GetFactory(mapOnePtFactName); - OnePtMap = currentLevel.Get >(mapOnePtName, mapOnePtFact.get()); - } - } + *out << "Treat Dirichlet BC" << std::endl; + // Dirichlet boundary + ArrayRCP dirichletBoundaryMap = graph->GetBoundaryNodeMap(); + if (dirichletBoundaryMap != Teuchos::null) + for (LO i = 0; i < numRows; i++) + if (dirichletBoundaryMap[i] == true) + aggStat[i] = BOUNDARY; - LO nDofsPerNode = Get(currentLevel, "DofsPerNode"); - GO indexBase = graph->GetDomainMap()->getIndexBase(); - if (OnePtMap != Teuchos::null) { - for (LO i = 0; i < numRows; i++) { - // reconstruct global row id (FIXME only works for contiguous maps) - GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase; - for (LO kr = 0; kr < nDofsPerNode; kr++) - if (OnePtMap->isNodeGlobalElement(grid + kr)) - aggStat[i] = ONEPT; - } + // OnePt aggregation + std::string mapOnePtName = pL.get("OnePt aggregate map name"); + RCP OnePtMap = Teuchos::null; + if (mapOnePtName.length()) { + std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); + if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { + OnePtMap = currentLevel.Get >(mapOnePtName, NoFactory::get()); + } else { + RCP mapOnePtFact = GetFactory(mapOnePtFactName); + OnePtMap = currentLevel.Get >(mapOnePtName, mapOnePtFact.get()); } + } - // Create a fake lCoarseNodesPerDir for CoordinatesTranferFactory - Array lCoarseNodesPerDir(3,-1); - Set(currentLevel, "lCoarseNodesPerDim", lCoarseNodesPerDir); - } // end uncoupled aggregation setup - - aggregates->AggregatesCrossProcessors(false); // No coupled aggregation + LO nDofsPerNode = Get(currentLevel, "DofsPerNode"); + GO indexBase = graph->GetDomainMap()->getIndexBase(); + if (OnePtMap != Teuchos::null) { + for (LO i = 0; i < numRows; i++) { + // reconstruct global row id (FIXME only works for contiguous maps) + GO grid = (graph->GetDomainMap()->getGlobalElement(i) - indexBase) * nDofsPerNode + indexBase; + for (LO kr = 0; kr < nDofsPerNode; kr++) + if (OnePtMap->isNodeGlobalElement(grid + kr)) + aggStat[i] = ONEPT; + } + } - *out << "Run all the algorithms on the local rank" << std::endl; - for (size_t a = 0; a < algos_.size(); a++) { - std::string phase = algos_[a]->description(); - SubFactoryMonitor sfm(*this, "Algo \"" + phase + "\"", currentLevel); - *out << regionType <<" | Executing phase " << a << std::endl; + // Create a fake lCoarseNodesPerDir for CoordinatesTranferFactory + Array lCoarseNodesPerDir(3, -1); + Set(currentLevel, "lCoarseNodesPerDim", lCoarseNodesPerDir); + } // end uncoupled aggregation setup - int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose()); - algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); - algos_[a]->SetProcRankVerbose(oldRank); - *out << regionType <<" | Done Executing phase " << a << std::endl; - } + aggregates->AggregatesCrossProcessors(false); // No coupled aggregation - *out << "Compute statistics on aggregates" << std::endl; - aggregates->ComputeAggregateSizes(true/*forceRecompute*/); + *out << "Run all the algorithms on the local rank" << std::endl; + for (size_t a = 0; a < algos_.size(); a++) { + std::string phase = algos_[a]->description(); + SubFactoryMonitor sfm(*this, "Algo \"" + phase + "\"", currentLevel); + *out << regionType << " | Executing phase " << a << std::endl; - Set(currentLevel, "Aggregates", aggregates); - Set(currentLevel, "numDimensions", numDimensions); - Set(currentLevel, "aggregationRegionTypeCoarse", regionType); + int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose()); + algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); + algos_[a]->SetProcRankVerbose(oldRank); + *out << regionType << " | Done Executing phase " << a << std::endl; + } - GetOStream(Statistics1) << aggregates->description() << std::endl; - *out << "HybridAggregation done!" << std::endl; + *out << "Compute statistics on aggregates" << std::endl; + aggregates->ComputeAggregateSizes(true /*forceRecompute*/); + + Set(currentLevel, "Aggregates", aggregates); + Set(currentLevel, "numDimensions", numDimensions); + Set(currentLevel, "aggregationRegionTypeCoarse", regionType); + + GetOStream(Statistics1) << aggregates->description() << std::endl; + *out << "HybridAggregation done!" << std::endl; +} + +template +void HybridAggregationFactory:: + BuildInterfaceAggregates(Level& currentLevel, RCP aggregates, + std::vector& aggStat, LO& numNonAggregatedNodes, + Array coarseRate) const { + FactoryMonitor m(*this, "BuildInterfaceAggregates", currentLevel); + + RCP out; + if (const char* dbg = std::getenv("MUELU_HYBRIDAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); } - template - void HybridAggregationFactory:: - BuildInterfaceAggregates(Level& currentLevel, RCP aggregates, - std::vector& aggStat, LO& numNonAggregatedNodes, - Array coarseRate) const { - FactoryMonitor m(*this, "BuildInterfaceAggregates", currentLevel); - - RCP out; - if(const char* dbg = std::getenv("MUELU_HYBRIDAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + // Extract and format input data for algo + if (coarseRate.size() == 1) { + coarseRate.resize(3, coarseRate[0]); + } + ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates->GetProcWinner()->getDataNonConst(0); + Array interfacesDimensions = Get >(currentLevel, "interfacesDimensions"); + Array nodesOnInterfaces = Get >(currentLevel, "nodeOnInterface"); + const int numInterfaces = interfacesDimensions.size() / 3; + const int myRank = aggregates->GetMap()->getComm()->getRank(); + + // Create coarse level container to gather data on the fly + Array coarseInterfacesDimensions(interfacesDimensions.size()); + Array nodesOnCoarseInterfaces; + { // Scoping the temporary variables... + LO endRate, totalNumCoarseNodes = 0, numCoarseNodes; + for (int interfaceIdx = 0; interfaceIdx < numInterfaces; ++interfaceIdx) { + numCoarseNodes = 1; + for (int dim = 0; dim < 3; ++dim) { + endRate = (interfacesDimensions[3 * interfaceIdx + dim] - 1) % coarseRate[dim]; + if (interfacesDimensions[3 * interfaceIdx + dim] == 1) { + coarseInterfacesDimensions[3 * interfaceIdx + dim] = 1; + } else { + coarseInterfacesDimensions[3 * interfaceIdx + dim] = (interfacesDimensions[3 * interfaceIdx + dim] - 1) / coarseRate[dim] + 2; + if (endRate == 0) { + coarseInterfacesDimensions[3 * interfaceIdx + dim]--; + } + } + numCoarseNodes *= coarseInterfacesDimensions[3 * interfaceIdx + dim]; + } + totalNumCoarseNodes += numCoarseNodes; } + nodesOnCoarseInterfaces.resize(totalNumCoarseNodes, -1); + } - // Extract and format input data for algo - if(coarseRate.size() == 1) {coarseRate.resize(3, coarseRate[0]);} - ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates->GetProcWinner() ->getDataNonConst(0); - Array interfacesDimensions = Get >(currentLevel, "interfacesDimensions"); - Array nodesOnInterfaces = Get >(currentLevel, "nodeOnInterface"); - const int numInterfaces = interfacesDimensions.size() / 3; - const int myRank = aggregates->GetMap()->getComm()->getRank(); - - // Create coarse level container to gather data on the fly - Array coarseInterfacesDimensions(interfacesDimensions.size()); - Array nodesOnCoarseInterfaces; - { // Scoping the temporary variables... - LO endRate, totalNumCoarseNodes = 0, numCoarseNodes; - for(int interfaceIdx = 0; interfaceIdx < numInterfaces; ++interfaceIdx) { - numCoarseNodes = 1; - for(int dim = 0; dim < 3; ++dim) { - endRate = (interfacesDimensions[3*interfaceIdx + dim] - 1) % coarseRate[dim]; - if(interfacesDimensions[3*interfaceIdx + dim] == 1) { - coarseInterfacesDimensions[3*interfaceIdx + dim] = 1; - } else { - coarseInterfacesDimensions[3*interfaceIdx + dim] - = (interfacesDimensions[3*interfaceIdx+dim]-1) / coarseRate[dim] + 2; - if(endRate==0){ coarseInterfacesDimensions[3*interfaceIdx + dim]--;} - } - numCoarseNodes *= coarseInterfacesDimensions[3*interfaceIdx + dim]; + Array endRate(3); + LO interfaceOffset = 0, aggregateCount = 0, coarseNodeCount = 0; + for (int interfaceIdx = 0; interfaceIdx < numInterfaces; ++interfaceIdx) { + ArrayView fineNodesPerDim = interfacesDimensions(3 * interfaceIdx, 3); + ArrayView coarseNodesPerDim = coarseInterfacesDimensions(3 * interfaceIdx, 3); + LO numInterfaceNodes = 1, numCoarseNodes = 1; + for (int dim = 0; dim < 3; ++dim) { + numInterfaceNodes *= fineNodesPerDim[dim]; + numCoarseNodes *= coarseNodesPerDim[dim]; + endRate[dim] = (fineNodesPerDim[dim] - 1) % coarseRate[dim]; + } + ArrayView interfaceNodes = nodesOnInterfaces(interfaceOffset, numInterfaceNodes); + + interfaceOffset += numInterfaceNodes; + + LO rem, rate, fineNodeIdx; + Array nodeIJK(3), coarseIJK(3), rootIJK(3); + // First find treat coarse nodes as they generate the aggregate IDs + // and they might be repeated on multiple interfaces (think corners and edges). + for (LO coarseNodeIdx = 0; coarseNodeIdx < numCoarseNodes; ++coarseNodeIdx) { + coarseIJK[2] = coarseNodeIdx / (coarseNodesPerDim[0] * coarseNodesPerDim[1]); + rem = coarseNodeIdx % (coarseNodesPerDim[0] * coarseNodesPerDim[1]); + coarseIJK[1] = rem / coarseNodesPerDim[0]; + coarseIJK[0] = rem % coarseNodesPerDim[0]; + + for (LO dim = 0; dim < 3; ++dim) { + if (coarseIJK[dim] == coarseNodesPerDim[dim] - 1) { + nodeIJK[dim] = fineNodesPerDim[dim] - 1; + } else { + nodeIJK[dim] = coarseIJK[dim] * coarseRate[dim]; } - totalNumCoarseNodes += numCoarseNodes; } - nodesOnCoarseInterfaces.resize(totalNumCoarseNodes, -1); + fineNodeIdx = (nodeIJK[2] * fineNodesPerDim[1] + nodeIJK[1]) * fineNodesPerDim[0] + nodeIJK[0]; + + if (aggStat[interfaceNodes[fineNodeIdx]] == READY) { + vertex2AggId[interfaceNodes[fineNodeIdx]] = aggregateCount; + procWinner[interfaceNodes[fineNodeIdx]] = myRank; + aggStat[interfaceNodes[fineNodeIdx]] = AGGREGATED; + ++aggregateCount; + --numNonAggregatedNodes; + } + nodesOnCoarseInterfaces[coarseNodeCount] = vertex2AggId[interfaceNodes[fineNodeIdx]]; + ++coarseNodeCount; } - Array endRate(3); - LO interfaceOffset = 0, aggregateCount = 0, coarseNodeCount = 0; - for(int interfaceIdx = 0; interfaceIdx < numInterfaces; ++interfaceIdx) { - ArrayView fineNodesPerDim = interfacesDimensions(3*interfaceIdx, 3); - ArrayView coarseNodesPerDim = coarseInterfacesDimensions(3*interfaceIdx, 3); - LO numInterfaceNodes = 1, numCoarseNodes = 1; - for(int dim = 0; dim < 3; ++dim) { - numInterfaceNodes *= fineNodesPerDim[dim]; - numCoarseNodes *= coarseNodesPerDim[dim]; - endRate[dim] = (fineNodesPerDim[dim]-1) % coarseRate[dim]; + // Now loop over all the node on the interface + // skip the coarse nodes as they are already aggregated + // and find the appropriate aggregate ID for the fine nodes. + for (LO nodeIdx = 0; nodeIdx < numInterfaceNodes; ++nodeIdx) { + // If the node is already aggregated skip it! + if (aggStat[interfaceNodes[nodeIdx]] == AGGREGATED) { + continue; } - ArrayView interfaceNodes = nodesOnInterfaces(interfaceOffset, numInterfaceNodes); - - interfaceOffset += numInterfaceNodes; - - LO rem, rate, fineNodeIdx; - Array nodeIJK(3), coarseIJK(3), rootIJK(3); - // First find treat coarse nodes as they generate the aggregate IDs - // and they might be repeated on multiple interfaces (think corners and edges). - for(LO coarseNodeIdx = 0; coarseNodeIdx < numCoarseNodes; ++coarseNodeIdx) { - coarseIJK[2] = coarseNodeIdx / (coarseNodesPerDim[0]*coarseNodesPerDim[1]); - rem = coarseNodeIdx % (coarseNodesPerDim[0]*coarseNodesPerDim[1]); - coarseIJK[1] = rem / coarseNodesPerDim[0]; - coarseIJK[0] = rem % coarseNodesPerDim[0]; - - for(LO dim = 0; dim < 3; ++dim) { - if(coarseIJK[dim] == coarseNodesPerDim[dim] - 1) { - nodeIJK[dim] = fineNodesPerDim[dim] - 1; - } else { - nodeIJK[dim] = coarseIJK[dim]*coarseRate[dim]; - } + + nodeIJK[2] = nodeIdx / (fineNodesPerDim[0] * fineNodesPerDim[1]); + rem = nodeIdx % (fineNodesPerDim[0] * fineNodesPerDim[1]); + nodeIJK[1] = rem / fineNodesPerDim[0]; + nodeIJK[0] = rem % fineNodesPerDim[0]; + + for (int dim = 0; dim < 3; ++dim) { + coarseIJK[dim] = nodeIJK[dim] / coarseRate[dim]; + rem = nodeIJK[dim] % coarseRate[dim]; + if (nodeIJK[dim] < fineNodesPerDim[dim] - endRate[dim]) { + rate = coarseRate[dim]; + } else { + rate = endRate[dim]; } - fineNodeIdx = (nodeIJK[2]*fineNodesPerDim[1] + nodeIJK[1])*fineNodesPerDim[0] + nodeIJK[0]; - - if(aggStat[interfaceNodes[fineNodeIdx]] == READY) { - vertex2AggId[interfaceNodes[fineNodeIdx]] = aggregateCount; - procWinner[interfaceNodes[fineNodeIdx]] = myRank; - aggStat[interfaceNodes[fineNodeIdx]] = AGGREGATED; - ++aggregateCount; - --numNonAggregatedNodes; + if (rem > (rate / 2)) { + ++coarseIJK[dim]; } - nodesOnCoarseInterfaces[coarseNodeCount] = vertex2AggId[interfaceNodes[fineNodeIdx]]; - ++coarseNodeCount; } - // Now loop over all the node on the interface - // skip the coarse nodes as they are already aggregated - // and find the appropriate aggregate ID for the fine nodes. - for(LO nodeIdx = 0; nodeIdx < numInterfaceNodes; ++nodeIdx) { - - // If the node is already aggregated skip it! - if(aggStat[interfaceNodes[nodeIdx]] == AGGREGATED) {continue;} - - nodeIJK[2] = nodeIdx / (fineNodesPerDim[0]*fineNodesPerDim[1]); - rem = nodeIdx % (fineNodesPerDim[0]*fineNodesPerDim[1]); - nodeIJK[1] = rem / fineNodesPerDim[0]; - nodeIJK[0] = rem % fineNodesPerDim[0]; - - for(int dim = 0; dim < 3; ++dim) { - coarseIJK[dim] = nodeIJK[dim] / coarseRate[dim]; - rem = nodeIJK[dim] % coarseRate[dim]; - if(nodeIJK[dim] < fineNodesPerDim[dim] - endRate[dim]) { - rate = coarseRate[dim]; - } else { - rate = endRate[dim]; - } - if(rem > (rate / 2)) {++coarseIJK[dim];} - } - - for(LO dim = 0; dim < 3; ++dim) { - if(coarseIJK[dim] == coarseNodesPerDim[dim] - 1) { - nodeIJK[dim] = fineNodesPerDim[dim] - 1; - } else { - nodeIJK[dim] = coarseIJK[dim]*coarseRate[dim]; - } + for (LO dim = 0; dim < 3; ++dim) { + if (coarseIJK[dim] == coarseNodesPerDim[dim] - 1) { + nodeIJK[dim] = fineNodesPerDim[dim] - 1; + } else { + nodeIJK[dim] = coarseIJK[dim] * coarseRate[dim]; } - fineNodeIdx = (nodeIJK[2]*fineNodesPerDim[1] + nodeIJK[1])*fineNodesPerDim[0] + nodeIJK[0]; - - vertex2AggId[interfaceNodes[nodeIdx]] = vertex2AggId[interfaceNodes[fineNodeIdx]]; - procWinner[interfaceNodes[nodeIdx]] = myRank; - aggStat[interfaceNodes[nodeIdx]] = AGGREGATED; - --numNonAggregatedNodes; - } // Loop over interface nodes - } // Loop over the interfaces + } + fineNodeIdx = (nodeIJK[2] * fineNodesPerDim[1] + nodeIJK[1]) * fineNodesPerDim[0] + nodeIJK[0]; - // Update aggregates information before subsequent aggregation algorithms - aggregates->SetNumAggregates(aggregateCount); + vertex2AggId[interfaceNodes[nodeIdx]] = vertex2AggId[interfaceNodes[fineNodeIdx]]; + procWinner[interfaceNodes[nodeIdx]] = myRank; + aggStat[interfaceNodes[nodeIdx]] = AGGREGATED; + --numNonAggregatedNodes; + } // Loop over interface nodes + } // Loop over the interfaces - // Set coarse data for next level - Set(currentLevel, "coarseInterfacesDimensions", coarseInterfacesDimensions); - Set(currentLevel, "nodeOnCoarseInterface", nodesOnCoarseInterfaces); + // Update aggregates information before subsequent aggregation algorithms + aggregates->SetNumAggregates(aggregateCount); - } // BuildInterfaceAggregates() + // Set coarse data for next level + Set(currentLevel, "coarseInterfacesDimensions", coarseInterfacesDimensions); + Set(currentLevel, "nodeOnCoarseInterface", nodesOnCoarseInterfaces); -} //namespace MueLu +} // BuildInterfaceAggregates() +} // namespace MueLu #endif /* MUELU_HYBRIDAGGREGATIONFACTORY_DEF_HPP */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_decl.hpp index 7cca1e45f86e..b6daf462bb42 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_decl.hpp @@ -58,76 +58,74 @@ namespace MueLu { - /*! - @class AmalgamationFactory - @brief AmalgamationFactory for subblocks of strided map based amalgamation data +/*! + @class AmalgamationFactory + @brief AmalgamationFactory for subblocks of strided map based amalgamation data - Class generates unamalgamation information using matrix A with strided maps. - It stores the output information within an AmalgamationInfo object as "UnAmalgamationInfo". - This object contains + Class generates unamalgamation information using matrix A with strided maps. + It stores the output information within an AmalgamationInfo object as "UnAmalgamationInfo". + This object contains - \li \c nodegid2dofgids_ a map of all node ids of which the current proc has corresponding DOF gids (used by \c TentativePFactory). - \li \c gNodeIds vector of all node ids on the current proc (may be less than nodegid2dofgids_.size()). These nodes are stored on the current proc. + \li \c nodegid2dofgids_ a map of all node ids of which the current proc has corresponding DOF gids (used by \c TentativePFactory). + \li \c gNodeIds vector of all node ids on the current proc (may be less than nodegid2dofgids_.size()). These nodes are stored on the current proc. - */ +*/ - template - class AmalgamationFactory : public SingleLevelFactoryBase { +template +class AmalgamationFactory : public SingleLevelFactoryBase { #undef MUELU_AMALGAMATIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: + public: + //! @name Constructors/Destructors. + //@{ - //! @name Constructors/Destructors. - //@{ + //! Constructor + AmalgamationFactory() = default; - //! Constructor - AmalgamationFactory() = default; + //! Destructor + virtual ~AmalgamationFactory() = default; - //! Destructor - virtual ~AmalgamationFactory() = default; + RCP GetValidParameterList() const override; - RCP GetValidParameterList() const override; + //@} - //@} + //! Input + //@{ - //! Input - //@{ + void DeclareInput(Level& currentLevel) const override; - void DeclareInput(Level ¤tLevel) const override; + //@} - //@} + void Build(Level& currentLevel) const override; - void Build(Level ¤tLevel) const override; + /*! @brief Translate global (row/column) id to global amalgamation block id + * + * @note Assume that the node map has the same \c indexBase as the dof map + * + * @param gid (GlobalOrdinal): input global id (row gid or column gid) + * @param blockSize (LocalOrdinal): block size (needed for constant block size) + * @param offset (GlobalOrdinal): global offset for dofs (stored in strided map, default = 0) + * @param indexBase (GlobalOrdinal): indexBase for DOF map (and node map, default = 0) + */ + static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset /*= 0*/, + const GlobalOrdinal indexBase /* = 0*/); - /*! @brief Translate global (row/column) id to global amalgamation block id - * - * @note Assume that the node map has the same \c indexBase as the dof map - * - * @param gid (GlobalOrdinal): input global id (row gid or column gid) - * @param blockSize (LocalOrdinal): block size (needed for constant block size) - * @param offset (GlobalOrdinal): global offset for dofs (stored in strided map, default = 0) - * @param indexBase (GlobalOrdinal): indexBase for DOF map (and node map, default = 0) - */ - static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset /*= 0*/, - const GlobalOrdinal indexBase/* = 0*/); + /*! @brief Method to create merged map for systems of PDEs. + * + * @param sourceMap (const Map&): source map with dofs which shall be amalgamated to a node map + * @param A (const Matrix&): operator A (matrix) with striding information (if available) + * @param amalgamatedMap (const Map&): amalgamated node based map + * @param translation (Array&): array storing local node ids given local dof ids (needed in CoalesceDropFactory) + */ + static void AmalgamateMap(const Map& sourceMap, const Matrix& A, RCP& amalgamatedMap, Array& translation); - /*! @brief Method to create merged map for systems of PDEs. - * - * @param sourceMap (const Map&): source map with dofs which shall be amalgamated to a node map - * @param A (const Matrix&): operator A (matrix) with striding information (if available) - * @param amalgamatedMap (const Map&): amalgamated node based map - * @param translation (Array&): array storing local node ids given local dof ids (needed in CoalesceDropFactory) - */ - static void AmalgamateMap(const Map& sourceMap, const Matrix& A, RCP& amalgamatedMap, Array& translation); +}; // class AmalgamationFactory - - }; //class AmalgamationFactory - -} //namespace MueLu +} // namespace MueLu #define MUELU_AMALGAMATIONFACTORY_SHORT -#endif // MUELU_AMALGAMATIONFACTORY_DECL_HPP +#endif // MUELU_AMALGAMATIONFACTORY_DECL_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_def.hpp index 60baf9efe0fe..b8dbfb162304 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_def.hpp @@ -56,186 +56,182 @@ namespace MueLu { - template - RCP AmalgamationFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - return validParamList; - } - - template - void AmalgamationFactory::DeclareInput(Level ¤tLevel) const { - Input(currentLevel, "A"); // sub-block from blocked A - } - - template - void AmalgamationFactory::Build(Level ¤tLevel) const - { - FactoryMonitor m(*this, "Build", currentLevel); - - RCP A = Get< RCP >(currentLevel, "A"); - - /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme. - fullblocksize is the number of storage blocks that must kept together during the amalgamation process. - - Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold: - - numPDEs = fullblocksize * storageblocksize. - - If numPDEs==1 - Matrix is point storage (classical CRS storage). storageblocksize=1 and fullblocksize=1 - No other values makes sense. - - If numPDEs>1 - If matrix uses point storage, then storageblocksize=1 and fullblockssize=numPDEs. - If matrix uses block storage, with block size of n, then storageblocksize=n, and fullblocksize=numPDEs/n. - Thus far, only storageblocksize=numPDEs and fullblocksize=1 has been tested. - */ - - - LO fullblocksize = 1; // block dim for fixed size blocks - GO offset = 0; // global offset of dof gids - LO blockid = -1; // block id in strided map - LO nStridedOffset = 0; // DOF offset for strided block id "blockid" (default = 0) - LO stridedblocksize = fullblocksize; // size of strided block id "blockid" (default = fullblocksize, only if blockid!=-1 stridedblocksize <= fullblocksize) - LO storageblocksize = A->GetStorageBlockSize(); - // GO indexBase = A->getRowMap()->getIndexBase(); // index base for maps (unused) - - // 1) check for blocking/striding information - - if (A->IsView("stridedMaps") && Teuchos::rcp_dynamic_cast(A->getRowMap("stridedMaps")) != Teuchos::null) { - Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // NOTE: "stridedMaps are always non-overlapping (correspond to range and domain maps!) - RCP stridedRowMap = Teuchos::rcp_dynamic_cast(A->getRowMap()); - TEUCHOS_TEST_FOR_EXCEPTION(stridedRowMap == Teuchos::null,Exceptions::BadCast,"MueLu::CoalesceFactory::Build: cast to strided row map failed."); - fullblocksize = stridedRowMap->getFixedBlockSize(); - offset = stridedRowMap->getOffset(); - blockid = stridedRowMap->getStridedBlockId(); - - if (blockid > -1) { - std::vector stridingInfo = stridedRowMap->getStridingData(); - for (size_t j = 0; j < Teuchos::as(blockid); j++) - nStridedOffset += stridingInfo[j]; - stridedblocksize = Teuchos::as(stridingInfo[blockid]); - - } else { - stridedblocksize = fullblocksize; - } - // Correct for the storageblocksize - // NOTE: Before this point fullblocksize is actually numPDEs - TEUCHOS_TEST_FOR_EXCEPTION(fullblocksize % storageblocksize != 0,Exceptions::RuntimeError,"AmalgamationFactory: fullblocksize needs to be a multiple of A->GetStorageBlockSize()"); - fullblocksize /= storageblocksize; - stridedblocksize /= storageblocksize; - - oldView = A->SwitchToView(oldView); - GetOStream(Runtime1) << "AmalagamationFactory::Build():" << " found fullblocksize=" << fullblocksize << " and stridedblocksize=" << stridedblocksize << " from strided maps. offset=" << offset << std::endl; +template +RCP AmalgamationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A"); + return validParamList; +} + +template +void AmalgamationFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); // sub-block from blocked A +} + +template +void AmalgamationFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + RCP A = Get >(currentLevel, "A"); + + /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme. + fullblocksize is the number of storage blocks that must kept together during the amalgamation process. + + Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold: + + numPDEs = fullblocksize * storageblocksize. + + If numPDEs==1 + Matrix is point storage (classical CRS storage). storageblocksize=1 and fullblocksize=1 + No other values makes sense. + + If numPDEs>1 + If matrix uses point storage, then storageblocksize=1 and fullblockssize=numPDEs. + If matrix uses block storage, with block size of n, then storageblocksize=n, and fullblocksize=numPDEs/n. + Thus far, only storageblocksize=numPDEs and fullblocksize=1 has been tested. + */ + + LO fullblocksize = 1; // block dim for fixed size blocks + GO offset = 0; // global offset of dof gids + LO blockid = -1; // block id in strided map + LO nStridedOffset = 0; // DOF offset for strided block id "blockid" (default = 0) + LO stridedblocksize = fullblocksize; // size of strided block id "blockid" (default = fullblocksize, only if blockid!=-1 stridedblocksize <= fullblocksize) + LO storageblocksize = A->GetStorageBlockSize(); + // GO indexBase = A->getRowMap()->getIndexBase(); // index base for maps (unused) + + // 1) check for blocking/striding information + + if (A->IsView("stridedMaps") && Teuchos::rcp_dynamic_cast(A->getRowMap("stridedMaps")) != Teuchos::null) { + Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // NOTE: "stridedMaps are always non-overlapping (correspond to range and domain maps!) + RCP stridedRowMap = Teuchos::rcp_dynamic_cast(A->getRowMap()); + TEUCHOS_TEST_FOR_EXCEPTION(stridedRowMap == Teuchos::null, Exceptions::BadCast, "MueLu::CoalesceFactory::Build: cast to strided row map failed."); + fullblocksize = stridedRowMap->getFixedBlockSize(); + offset = stridedRowMap->getOffset(); + blockid = stridedRowMap->getStridedBlockId(); + + if (blockid > -1) { + std::vector stridingInfo = stridedRowMap->getStridingData(); + for (size_t j = 0; j < Teuchos::as(blockid); j++) + nStridedOffset += stridingInfo[j]; + stridedblocksize = Teuchos::as(stridingInfo[blockid]); } else { - GetOStream(Warnings0) << "AmalagamationFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl; - } - - - // build node row map (uniqueMap) and node column map (nonUniqueMap) - // the arrays rowTranslation and colTranslation contain the local node id - // given a local dof id. They are only necessary for the CoalesceDropFactory if - // fullblocksize > 1 - RCP uniqueMap, nonUniqueMap; - RCP amalgamationData; - RCP > rowTranslation = Teuchos::null; - RCP > colTranslation = Teuchos::null; - - if (fullblocksize > 1) { - // mfh 14 Apr 2015: These need to have different names than - // rowTranslation and colTranslation, in order to avoid - // shadowing warnings (-Wshadow with GCC). Alternately, it - // looks like you could just assign to the existing variables in - // this scope, rather than creating new ones. - RCP > theRowTranslation = rcp(new Array); - RCP > theColTranslation = rcp(new Array); - AmalgamateMap(*(A->getRowMap()), *A, uniqueMap, *theRowTranslation); - AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, *theColTranslation); - - amalgamationData = rcp(new AmalgamationInfo(theRowTranslation, - theColTranslation, - uniqueMap, - nonUniqueMap, - A->getColMap(), - fullblocksize, - offset, - blockid, - nStridedOffset, - stridedblocksize) ); - } else { - amalgamationData = rcp(new AmalgamationInfo(rowTranslation, // Teuchos::null - colTranslation, // Teuchos::null - A->getRowMap(), // unique map of graph - A->getColMap(), // non-unique map of graph - A->getColMap(), // column map of A - fullblocksize, - offset, - blockid, - nStridedOffset, - stridedblocksize) ); + stridedblocksize = fullblocksize; } + // Correct for the storageblocksize + // NOTE: Before this point fullblocksize is actually numPDEs + TEUCHOS_TEST_FOR_EXCEPTION(fullblocksize % storageblocksize != 0, Exceptions::RuntimeError, "AmalgamationFactory: fullblocksize needs to be a multiple of A->GetStorageBlockSize()"); + fullblocksize /= storageblocksize; + stridedblocksize /= storageblocksize; + + oldView = A->SwitchToView(oldView); + GetOStream(Runtime1) << "AmalagamationFactory::Build():" + << " found fullblocksize=" << fullblocksize << " and stridedblocksize=" << stridedblocksize << " from strided maps. offset=" << offset << std::endl; + + } else { + GetOStream(Warnings0) << "AmalagamationFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl; + } - // store (un)amalgamation information on current level - Set(currentLevel, "UnAmalgamationInfo", amalgamationData); + // build node row map (uniqueMap) and node column map (nonUniqueMap) + // the arrays rowTranslation and colTranslation contain the local node id + // given a local dof id. They are only necessary for the CoalesceDropFactory if + // fullblocksize > 1 + RCP uniqueMap, nonUniqueMap; + RCP amalgamationData; + RCP > rowTranslation = Teuchos::null; + RCP > colTranslation = Teuchos::null; + + if (fullblocksize > 1) { + // mfh 14 Apr 2015: These need to have different names than + // rowTranslation and colTranslation, in order to avoid + // shadowing warnings (-Wshadow with GCC). Alternately, it + // looks like you could just assign to the existing variables in + // this scope, rather than creating new ones. + RCP > theRowTranslation = rcp(new Array); + RCP > theColTranslation = rcp(new Array); + AmalgamateMap(*(A->getRowMap()), *A, uniqueMap, *theRowTranslation); + AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, *theColTranslation); + + amalgamationData = rcp(new AmalgamationInfo(theRowTranslation, + theColTranslation, + uniqueMap, + nonUniqueMap, + A->getColMap(), + fullblocksize, + offset, + blockid, + nStridedOffset, + stridedblocksize)); + } else { + amalgamationData = rcp(new AmalgamationInfo(rowTranslation, // Teuchos::null + colTranslation, // Teuchos::null + A->getRowMap(), // unique map of graph + A->getColMap(), // non-unique map of graph + A->getColMap(), // column map of A + fullblocksize, + offset, + blockid, + nStridedOffset, + stridedblocksize)); } - template - void AmalgamationFactory::AmalgamateMap(const Map& sourceMap, const Matrix& A, RCP& amalgamatedMap, Array& translation) { - typedef typename ArrayView::size_type size_type; - typedef std::unordered_map container; - - GO indexBase = sourceMap.getIndexBase(); - ArrayView elementAList = sourceMap.getLocalElementList(); - size_type numElements = elementAList.size(); - container filter; - - GO offset = 0; - LO blkSize = A.GetFixedBlockSize() / A.GetStorageBlockSize(); - if (A.IsView("stridedMaps") == true) { - Teuchos::RCP myMap = A.getRowMap("stridedMaps"); - Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); - offset = strMap->getOffset(); - blkSize = Teuchos::as(strMap->getFixedBlockSize()); - } + // store (un)amalgamation information on current level + Set(currentLevel, "UnAmalgamationInfo", amalgamationData); +} + +template +void AmalgamationFactory::AmalgamateMap(const Map& sourceMap, const Matrix& A, RCP& amalgamatedMap, Array& translation) { + typedef typename ArrayView::size_type size_type; + typedef std::unordered_map container; + + GO indexBase = sourceMap.getIndexBase(); + ArrayView elementAList = sourceMap.getLocalElementList(); + size_type numElements = elementAList.size(); + container filter; + + GO offset = 0; + LO blkSize = A.GetFixedBlockSize() / A.GetStorageBlockSize(); + if (A.IsView("stridedMaps") == true) { + Teuchos::RCP myMap = A.getRowMap("stridedMaps"); + Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); + offset = strMap->getOffset(); + blkSize = Teuchos::as(strMap->getFixedBlockSize()); + } - Array elementList(numElements); - translation.resize(numElements); + Array elementList(numElements); + translation.resize(numElements); - size_type numRows = 0; - for (size_type id = 0; id < numElements; id++) { - GO dofID = elementAList[id]; - GO nodeID = AmalgamationFactory::DOFGid2NodeId(dofID, blkSize, offset, indexBase); + size_type numRows = 0; + for (size_type id = 0; id < numElements; id++) { + GO dofID = elementAList[id]; + GO nodeID = AmalgamationFactory::DOFGid2NodeId(dofID, blkSize, offset, indexBase); - typename container::iterator it = filter.find(nodeID); - if (it == filter.end()) { - filter[nodeID] = numRows; + typename container::iterator it = filter.find(nodeID); + if (it == filter.end()) { + filter[nodeID] = numRows; - translation[id] = numRows; - elementList[numRows] = nodeID; + translation[id] = numRows; + elementList[numRows] = nodeID; - numRows++; + numRows++; - } else { - translation[id] = it->second; - } + } else { + translation[id] = it->second; } - elementList.resize(numRows); - - amalgamatedMap = MapFactory::Build(sourceMap.lib(), Teuchos::OrdinalTraits::invalid(), elementList, indexBase, sourceMap.getComm()); - } + elementList.resize(numRows); - template - const GlobalOrdinal AmalgamationFactory::DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, - const GlobalOrdinal offset, const GlobalOrdinal indexBase) { - GlobalOrdinal globalblockid = ((GlobalOrdinal) gid - offset - indexBase) / blockSize + indexBase; - return globalblockid; - } + amalgamatedMap = MapFactory::Build(sourceMap.lib(), Teuchos::OrdinalTraits::invalid(), elementList, indexBase, sourceMap.getComm()); +} -} //namespace MueLu +template +const GlobalOrdinal AmalgamationFactory::DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, + const GlobalOrdinal offset, const GlobalOrdinal indexBase) { + GlobalOrdinal globalblockid = ((GlobalOrdinal)gid - offset - indexBase) / blockSize + indexBase; + return globalblockid; +} -#endif /* MUELU_SUBBLOCKUNAMALGAMATIONFACTORY_DEF_HPP */ +} // namespace MueLu +#endif /* MUELU_SUBBLOCKUNAMALGAMATIONFACTORY_DEF_HPP */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_decl.hpp index b14a14a1cc10..0d15843f3049 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_decl.hpp @@ -53,7 +53,7 @@ #ifndef MUELU_AMALGAMATIONINFO_DECL_HPP_ #define MUELU_AMALGAMATIONINFO_DECL_HPP_ -#include // global_size_t +#include // global_size_t #include #include #include @@ -75,164 +75,158 @@ namespace MueLu { current processor. That mapping is used for unamalgamation. */ - template - class AmalgamationInfo - : public BaseClass { +template +class AmalgamationInfo + : public BaseClass { #undef MUELU_AMALGAMATIONINFO_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - - /// Constructor - AmalgamationInfo(RCP > rowTranslation, - RCP > colTranslation, - RCP nodeRowMap, - RCP nodeColMap, - RCP const &columnMap, - LO fullblocksize, GO offset, LO blockid, LO nStridedOffset, LO stridedblocksize) : - rowTranslation_(rowTranslation), - colTranslation_(colTranslation), - nodeRowMap_(nodeRowMap), - nodeColMap_(nodeColMap), - columnMap_(columnMap), - fullblocksize_(fullblocksize), - offset_(offset), - blockid_(blockid), - nStridedOffset_(nStridedOffset), - stridedblocksize_(stridedblocksize), - indexBase_(columnMap->getIndexBase()) - {} - - /// Destructor - virtual ~AmalgamationInfo() {} - - /// Return a simple one-line description of this object. - std::string description() const { return "AmalgamationInfo"; } - - //! Print the object with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; - void print(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const; - - RCP getNodeRowMap() const { return nodeRowMap_; } //! < returns the node row map for the graph - RCP getNodeColMap() const { return nodeColMap_; } //! < returns the node column map for the graph - - /* @brief Translation arrays - * - * Returns translation arrays providing local node ids given local dof ids built from either - * the non-overlapping (unique) row map or the overlapping (non-unique) column map. - * The getColTranslation routine, e.g., is used for the MergeRows routine in CoalesceDropFactory. - */ - //@{ - RCP > getRowTranslation() const { return rowTranslation_; } - RCP > getColTranslation() const { return colTranslation_; } - //@} - - /*! @brief UnamalgamateAggregates - - Puts all dofs for aggregate \c i in aggToRowMap[\c i]. Also calculate aggregate sizes. - */ - void UnamalgamateAggregates(const Aggregates& aggregates, Teuchos::ArrayRCP& aggStart, Teuchos::ArrayRCP& aggToRowMap) const; - void UnamalgamateAggregatesLO(const Aggregates& aggregates, Teuchos::ArrayRCP& aggStart, Teuchos::ArrayRCP& aggToRowMap) const; - - /*! @brief ComputeUnamalgamatedImportDofMap - * build overlapping dof row map from aggregates needed for overlapping null space - */ - Teuchos::RCP< Xpetra::Map > ComputeUnamalgamatedImportDofMap(const Aggregates& aggregates) const; - - private: - - void UnamalgamateAggregates(const Teuchos::RCP &nodeMap, - const RCP &procWinnerVec, - const RCP &vertex2AggIdVec, + public: + /// Constructor + AmalgamationInfo(RCP > rowTranslation, + RCP > colTranslation, + RCP nodeRowMap, + RCP nodeColMap, + RCP const& columnMap, + LO fullblocksize, GO offset, LO blockid, LO nStridedOffset, LO stridedblocksize) + : rowTranslation_(rowTranslation) + , colTranslation_(colTranslation) + , nodeRowMap_(nodeRowMap) + , nodeColMap_(nodeColMap) + , columnMap_(columnMap) + , fullblocksize_(fullblocksize) + , offset_(offset) + , blockid_(blockid) + , nStridedOffset_(nStridedOffset) + , stridedblocksize_(stridedblocksize) + , indexBase_(columnMap->getIndexBase()) {} + + /// Destructor + virtual ~AmalgamationInfo() {} + + /// Return a simple one-line description of this object. + std::string description() const { return "AmalgamationInfo"; } + + //! Print the object with some verbosity level to an FancyOStream object. + // using MueLu::Describable::describe; // overloading, not hiding + // void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; + void print(Teuchos::FancyOStream& out, const VerbLevel verbLevel = Default) const; + + RCP getNodeRowMap() const { return nodeRowMap_; } //! < returns the node row map for the graph + RCP getNodeColMap() const { return nodeColMap_; } //! < returns the node column map for the graph + + /* @brief Translation arrays + * + * Returns translation arrays providing local node ids given local dof ids built from either + * the non-overlapping (unique) row map or the overlapping (non-unique) column map. + * The getColTranslation routine, e.g., is used for the MergeRows routine in CoalesceDropFactory. + */ + //@{ + RCP > getRowTranslation() const { return rowTranslation_; } + RCP > getColTranslation() const { return colTranslation_; } + //@} + + /*! @brief UnamalgamateAggregates + + Puts all dofs for aggregate \c i in aggToRowMap[\c i]. Also calculate aggregate sizes. + */ + void UnamalgamateAggregates(const Aggregates& aggregates, Teuchos::ArrayRCP& aggStart, Teuchos::ArrayRCP& aggToRowMap) const; + void UnamalgamateAggregatesLO(const Aggregates& aggregates, Teuchos::ArrayRCP& aggStart, Teuchos::ArrayRCP& aggToRowMap) const; + + /*! @brief ComputeUnamalgamatedImportDofMap + * build overlapping dof row map from aggregates needed for overlapping null space + */ + Teuchos::RCP > ComputeUnamalgamatedImportDofMap(const Aggregates& aggregates) const; + + private: + void UnamalgamateAggregates(const Teuchos::RCP& nodeMap, + const RCP& procWinnerVec, + const RCP& vertex2AggIdVec, + const GO numAggregates, + Teuchos::ArrayRCP& aggStart, + Teuchos::ArrayRCP& aggToRowMap) const; + + void UnamalgamateAggregatesLO(const Teuchos::RCP& nodeMap, + const RCP& procWinnerVec, + const RCP& vertex2AggIdVec, const GO numAggregates, Teuchos::ArrayRCP& aggStart, - Teuchos::ArrayRCP& aggToRowMap) const; - - void UnamalgamateAggregatesLO(const Teuchos::RCP &nodeMap, - const RCP &procWinnerVec, - const RCP &vertex2AggIdVec, - const GO numAggregates, - Teuchos::ArrayRCP& aggStart, - Teuchos::ArrayRCP& aggToRowMap) const; - - Teuchos::RCP< Xpetra::Map > ComputeUnamalgamatedImportDofMap(const Teuchos::RCP &nodeMap) const; - - public: - - /*! @brief ComputeGlobalDOF - * - * Return global dof id associated with global node id gNodeID and dof index k - * - * \note We assume that \c indexBase_ is valid for both the node and the dof map. - * - * @param (GO): global node id - * @param (LO): local dof index within node - * @return (GO): global dof id - */ - GO ComputeGlobalDOF(GO const &gNodeID, LO const &k=0) const; - - /*! @brief ComputeLocalDOF - * return locbal dof id associated with local node id lNodeID and dof index k - * - * @param (LO): local node id - * @param (LO): local dof index within node - * @return (LO): local dof id - */ - LO ComputeLocalDOF(LocalOrdinal const &lNodeID, LocalOrdinal const &k) const; - - LO ComputeLocalNode(LocalOrdinal const &ldofID) const; - - /*! Access routines */ - - /// returns offset of global dof ids - GO GlobalOffset() { return offset_; } - - /// returns striding information - void GetStridingInformation(LO& fullBlockSize, LO& blockID, LO& stridingOffset, LO& stridedBlockSize, GO& indexBase) { - fullBlockSize = fullblocksize_; - blockID = blockid_; - stridingOffset = nStridedOffset_; - stridedBlockSize = stridedblocksize_; - indexBase = indexBase_; - } - - private: - - //! @name amalgamation information variables - //@{ - - //! Arrays containing local node ids given local dof ids - RCP > rowTranslation_; - RCP > colTranslation_; - - //! node row and column map of graph (built from row and column map of A) - RCP nodeRowMap_; - RCP nodeColMap_; - - /*! @brief DOF map (really column map of A) - - We keep a RCP on the column map to make sure that the map is still valid when it is used. - */ - RCP columnMap_; - - //@} - - //! @name Strided map information. - //@{ - LO fullblocksize_; - GO offset_; - LO blockid_; - LO nStridedOffset_; - LO stridedblocksize_; - GO indexBase_; - //@} - - }; - -} // namespace MueLu + Teuchos::ArrayRCP& aggToRowMap) const; + + Teuchos::RCP > ComputeUnamalgamatedImportDofMap(const Teuchos::RCP& nodeMap) const; + + public: + /*! @brief ComputeGlobalDOF + * + * Return global dof id associated with global node id gNodeID and dof index k + * + * \note We assume that \c indexBase_ is valid for both the node and the dof map. + * + * @param (GO): global node id + * @param (LO): local dof index within node + * @return (GO): global dof id + */ + GO ComputeGlobalDOF(GO const& gNodeID, LO const& k = 0) const; + + /*! @brief ComputeLocalDOF + * return locbal dof id associated with local node id lNodeID and dof index k + * + * @param (LO): local node id + * @param (LO): local dof index within node + * @return (LO): local dof id + */ + LO ComputeLocalDOF(LocalOrdinal const& lNodeID, LocalOrdinal const& k) const; + + LO ComputeLocalNode(LocalOrdinal const& ldofID) const; + + /*! Access routines */ + + /// returns offset of global dof ids + GO GlobalOffset() { return offset_; } + + /// returns striding information + void GetStridingInformation(LO& fullBlockSize, LO& blockID, LO& stridingOffset, LO& stridedBlockSize, GO& indexBase) { + fullBlockSize = fullblocksize_; + blockID = blockid_; + stridingOffset = nStridedOffset_; + stridedBlockSize = stridedblocksize_; + indexBase = indexBase_; + } + + private: + //! @name amalgamation information variables + //@{ + + //! Arrays containing local node ids given local dof ids + RCP > rowTranslation_; + RCP > colTranslation_; + + //! node row and column map of graph (built from row and column map of A) + RCP nodeRowMap_; + RCP nodeColMap_; + + /*! @brief DOF map (really column map of A) + + We keep a RCP on the column map to make sure that the map is still valid when it is used. + */ + RCP columnMap_; + + //@} + + //! @name Strided map information. + //@{ + LO fullblocksize_; + GO offset_; + LO blockid_; + LO nStridedOffset_; + LO stridedblocksize_; + GO indexBase_; + //@} +}; + +} // namespace MueLu #define MUELU_AMALGAMATIONINFO_SHORT #endif /* MUELU_AMALGAMATIONINFO_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_def.hpp index f46746657142..28ba61ec70b3 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_def.hpp @@ -63,265 +63,257 @@ namespace MueLu { - template - void AmalgamationInfo:: - UnamalgamateAggregates(const Aggregates& aggregates, - Teuchos::ArrayRCP& aggStart, - Teuchos::ArrayRCP& aggToRowMap) const { - - UnamalgamateAggregates(aggregates.GetMap(), - aggregates.GetProcWinner(), - aggregates.GetVertex2AggId(), - aggregates.GetNumAggregates(), - aggStart, - aggToRowMap); - - } //UnamalgamateAggregates - - template - void AmalgamationInfo:: - UnamalgamateAggregates(const Teuchos::RCP &nodeMap, - const RCP &procWinnerVec, - const RCP &vertex2AggIdVec, - const GO numAggregates, - Teuchos::ArrayRCP& aggStart, - Teuchos::ArrayRCP& aggToRowMap) const { - - int myPid = nodeMap->getComm()->getRank(); - Teuchos::ArrayView nodeGlobalElts = nodeMap->getLocalElementList(); - Teuchos::ArrayRCP procWinner = procWinnerVec->getDataNonConst(0); - Teuchos::ArrayRCP vertex2AggId = vertex2AggIdVec->getDataNonConst(0); - const LO size = procWinner.size(); - - std::vector sizes(numAggregates); - if (stridedblocksize_ == 1) { - for (LO lnode = 0; lnode < size; ++lnode) { - LO myAgg = vertex2AggId[lnode]; - if (procWinner[lnode] == myPid) - sizes[myAgg] += 1; - } - } else { - for (LO lnode = 0; lnode < size; ++lnode) { - LO myAgg = vertex2AggId[lnode]; - if (procWinner[lnode] == myPid) { - GO gnodeid = nodeGlobalElts[lnode]; - for (LocalOrdinal k = 0; k < stridedblocksize_; k++) { - GlobalOrdinal gDofIndex = ComputeGlobalDOF(gnodeid,k); - if (columnMap_->isNodeGlobalElement(gDofIndex)) - sizes[myAgg] += 1; - } - } - } - } - aggStart = ArrayRCP(numAggregates+1,0); - aggStart[0] = Teuchos::ScalarTraits::zero(); - for (GO i=0; i +void AmalgamationInfo:: + UnamalgamateAggregates(const Aggregates &aggregates, + Teuchos::ArrayRCP &aggStart, + Teuchos::ArrayRCP &aggToRowMap) const { + UnamalgamateAggregates(aggregates.GetMap(), + aggregates.GetProcWinner(), + aggregates.GetVertex2AggId(), + aggregates.GetNumAggregates(), + aggStart, + aggToRowMap); + +} // UnamalgamateAggregates + +template +void AmalgamationInfo:: + UnamalgamateAggregates(const Teuchos::RCP &nodeMap, + const RCP &procWinnerVec, + const RCP &vertex2AggIdVec, + const GO numAggregates, + Teuchos::ArrayRCP &aggStart, + Teuchos::ArrayRCP &aggToRowMap) const { + int myPid = nodeMap->getComm()->getRank(); + Teuchos::ArrayView nodeGlobalElts = nodeMap->getLocalElementList(); + Teuchos::ArrayRCP procWinner = procWinnerVec->getDataNonConst(0); + Teuchos::ArrayRCP vertex2AggId = vertex2AggIdVec->getDataNonConst(0); + const LO size = procWinner.size(); + + std::vector sizes(numAggregates); + if (stridedblocksize_ == 1) { + for (LO lnode = 0; lnode < size; ++lnode) { + LO myAgg = vertex2AggId[lnode]; + if (procWinner[lnode] == myPid) + sizes[myAgg] += 1; } - aggToRowMap = ArrayRCP(aggStart[numAggregates],0); - - // count, how many dofs have been recorded for each aggregate so far - Array numDofs(numAggregates, 0); // empty array with number of Dofs for each aggregate - - if (stridedblocksize_ == 1) { - for (LO lnode = 0; lnode < size; ++lnode) { - LO myAgg = vertex2AggId[lnode]; - if (procWinner[lnode] == myPid) { - aggToRowMap[ aggStart[myAgg] + numDofs[myAgg] ] = ComputeGlobalDOF(nodeGlobalElts[lnode]); - ++(numDofs[myAgg]); - } - } - } else { - for (LO lnode = 0; lnode < size; ++lnode) { - LO myAgg = vertex2AggId[lnode]; - - if (procWinner[lnode] == myPid) { - GO gnodeid = nodeGlobalElts[lnode]; - for (LocalOrdinal k = 0; k < stridedblocksize_; k++) { - GlobalOrdinal gDofIndex = ComputeGlobalDOF(gnodeid,k); - if (columnMap_->isNodeGlobalElement(gDofIndex)) { - aggToRowMap[ aggStart[myAgg] + numDofs[myAgg] ] = gDofIndex; - ++(numDofs[myAgg]); - } - } + } else { + for (LO lnode = 0; lnode < size; ++lnode) { + LO myAgg = vertex2AggId[lnode]; + if (procWinner[lnode] == myPid) { + GO gnodeid = nodeGlobalElts[lnode]; + for (LocalOrdinal k = 0; k < stridedblocksize_; k++) { + GlobalOrdinal gDofIndex = ComputeGlobalDOF(gnodeid, k); + if (columnMap_->isNodeGlobalElement(gDofIndex)) + sizes[myAgg] += 1; } } } - // todo plausibility check: entry numDofs[k] == aggToRowMap[k].size() - - } //UnamalgamateAggregates - - template - void AmalgamationInfo:: - UnamalgamateAggregatesLO(const Aggregates& aggregates, - Teuchos::ArrayRCP& aggStart, - Teuchos::ArrayRCP& aggToRowMap) const { - UnamalgamateAggregatesLO(aggregates.GetMap(), - aggregates.GetProcWinner(), - aggregates.GetVertex2AggId(), - aggregates.GetNumAggregates(), - aggStart, - aggToRowMap); } + aggStart = ArrayRCP(numAggregates + 1, 0); + aggStart[0] = Teuchos::ScalarTraits::zero(); + for (GO i = 0; i < numAggregates; ++i) { + aggStart[i + 1] = aggStart[i] + sizes[i]; + } + aggToRowMap = ArrayRCP(aggStart[numAggregates], 0); - template - void AmalgamationInfo:: - UnamalgamateAggregatesLO(const Teuchos::RCP &nodeMap, - const RCP &procWinnerVec, - const RCP &vertex2AggIdVec, - const GO numAggregates, - Teuchos::ArrayRCP& aggStart, - Teuchos::ArrayRCP& aggToRowMap) const { - - int myPid = nodeMap->getComm()->getRank(); - Teuchos::ArrayView nodeGlobalElts = nodeMap->getLocalElementList(); - - Teuchos::ArrayRCP procWinner = procWinnerVec ->getDataNonConst(0); - Teuchos::ArrayRCP vertex2AggId = vertex2AggIdVec->getDataNonConst(0); - - - // FIXME: Do we need to compute size here? Or can we use existing? - const LO size = procWinner.size(); - - std::vector sizes(numAggregates); - if (stridedblocksize_ == 1) { - for (LO lnode = 0; lnode < size; lnode++) - if (procWinner[lnode] == myPid) - sizes[vertex2AggId[lnode]]++; - } else { - for (LO lnode = 0; lnode < size; lnode++) - if (procWinner[lnode] == myPid) { - GO nodeGID = nodeGlobalElts[lnode]; + // count, how many dofs have been recorded for each aggregate so far + Array numDofs(numAggregates, 0); // empty array with number of Dofs for each aggregate - for (LO k = 0; k < stridedblocksize_; k++) { - GO GID = ComputeGlobalDOF(nodeGID, k); - if (columnMap_->isNodeGlobalElement(GID)) - sizes[vertex2AggId[lnode]]++; - } - } + if (stridedblocksize_ == 1) { + for (LO lnode = 0; lnode < size; ++lnode) { + LO myAgg = vertex2AggId[lnode]; + if (procWinner[lnode] == myPid) { + aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = ComputeGlobalDOF(nodeGlobalElts[lnode]); + ++(numDofs[myAgg]); + } } + } else { + for (LO lnode = 0; lnode < size; ++lnode) { + LO myAgg = vertex2AggId[lnode]; - aggStart = ArrayRCP(numAggregates+1); // FIXME: useless initialization with zeros - aggStart[0] = 0; - for (GO i = 0; i < numAggregates; i++) - aggStart[i+1] = aggStart[i] + sizes[i]; - - aggToRowMap = ArrayRCP(aggStart[numAggregates], 0); - - // count, how many dofs have been recorded for each aggregate so far - Array numDofs(numAggregates, 0); // empty array with number of DOFs for each aggregate - if (stridedblocksize_ == 1) { - for (LO lnode = 0; lnode < size; ++lnode) - if (procWinner[lnode] == myPid) { - LO myAgg = vertex2AggId[lnode]; - aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = lnode; - numDofs[myAgg]++; - } - } else { - for (LO lnode = 0; lnode < size; ++lnode) - if (procWinner[lnode] == myPid) { - LO myAgg = vertex2AggId[lnode]; - GO nodeGID = nodeGlobalElts[lnode]; - - for (LO k = 0; k < stridedblocksize_; k++) { - GO GID = ComputeGlobalDOF(nodeGID, k); - if (columnMap_->isNodeGlobalElement(GID)) { - aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = lnode*stridedblocksize_ + k; - numDofs[myAgg]++; - } + if (procWinner[lnode] == myPid) { + GO gnodeid = nodeGlobalElts[lnode]; + for (LocalOrdinal k = 0; k < stridedblocksize_; k++) { + GlobalOrdinal gDofIndex = ComputeGlobalDOF(gnodeid, k); + if (columnMap_->isNodeGlobalElement(gDofIndex)) { + aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = gDofIndex; + ++(numDofs[myAgg]); } } + } } - // todo plausibility check: entry numDofs[k] == aggToRowMap[k].size() - - } //UnamalgamateAggregatesLO - - template - void AmalgamationInfo::print(Teuchos::FancyOStream &out, - const VerbLevel verbLevel) const - { - if (!(verbLevel & Debug)) - return; - - out << "AmalgamationInfo -- Striding information:" - << "\n fullBlockSize = " << fullblocksize_ - << "\n blockID = " << blockid_ - << "\n stridingOffset = " << nStridedOffset_ - << "\n stridedBlockSize = " << stridedblocksize_ - << "\n indexBase = " << indexBase_ - << std::endl; - - out << "AmalgamationInfo -- DOFs to nodes mapping:\n" - << " Mapping of row DOFs to row nodes:" << *rowTranslation_() - << "\n\n Mapping of column DOFs to column nodes:" << *colTranslation_() - << std::endl; - - out << "AmalgamationInfo -- row node map:" << std::endl; - nodeRowMap_->describe(out, Teuchos::VERB_EXTREME); - - out << "AmalgamationInfo -- column node map:" << std::endl; - nodeColMap_->describe(out, Teuchos::VERB_EXTREME); } + // todo plausibility check: entry numDofs[k] == aggToRowMap[k].size() - ///////////////////////////////////////////////////////////////////////////// +} // UnamalgamateAggregates - template - RCP > AmalgamationInfo:: - ComputeUnamalgamatedImportDofMap(const Aggregates& aggregates) const { - return ComputeUnamalgamatedImportDofMap(aggregates.GetMap()); +template +void AmalgamationInfo:: + UnamalgamateAggregatesLO(const Aggregates &aggregates, + Teuchos::ArrayRCP &aggStart, + Teuchos::ArrayRCP &aggToRowMap) const { + UnamalgamateAggregatesLO(aggregates.GetMap(), + aggregates.GetProcWinner(), + aggregates.GetVertex2AggId(), + aggregates.GetNumAggregates(), + aggStart, + aggToRowMap); +} + +template +void AmalgamationInfo:: + UnamalgamateAggregatesLO(const Teuchos::RCP &nodeMap, + const RCP &procWinnerVec, + const RCP &vertex2AggIdVec, + const GO numAggregates, + Teuchos::ArrayRCP &aggStart, + Teuchos::ArrayRCP &aggToRowMap) const { + int myPid = nodeMap->getComm()->getRank(); + Teuchos::ArrayView nodeGlobalElts = nodeMap->getLocalElementList(); + + Teuchos::ArrayRCP procWinner = procWinnerVec->getDataNonConst(0); + Teuchos::ArrayRCP vertex2AggId = vertex2AggIdVec->getDataNonConst(0); + + // FIXME: Do we need to compute size here? Or can we use existing? + const LO size = procWinner.size(); + + std::vector sizes(numAggregates); + if (stridedblocksize_ == 1) { + for (LO lnode = 0; lnode < size; lnode++) + if (procWinner[lnode] == myPid) + sizes[vertex2AggId[lnode]]++; + } else { + for (LO lnode = 0; lnode < size; lnode++) + if (procWinner[lnode] == myPid) { + GO nodeGID = nodeGlobalElts[lnode]; + + for (LO k = 0; k < stridedblocksize_; k++) { + GO GID = ComputeGlobalDOF(nodeGID, k); + if (columnMap_->isNodeGlobalElement(GID)) + sizes[vertex2AggId[lnode]]++; + } + } } - template - RCP > AmalgamationInfo:: - ComputeUnamalgamatedImportDofMap(const Teuchos::RCP &nodeMap) const { - - Teuchos::RCP > myDofGids = Teuchos::rcp(new std::vector); - Teuchos::ArrayView gEltList = nodeMap->getLocalElementList(); - LO nodeElements = Teuchos::as(nodeMap->getLocalNumElements()); - if (stridedblocksize_ == 1) { - for (LO n = 0; npush_back(gDofIndex); + aggStart = ArrayRCP(numAggregates + 1); // FIXME: useless initialization with zeros + aggStart[0] = 0; + for (GO i = 0; i < numAggregates; i++) + aggStart[i + 1] = aggStart[i] + sizes[i]; + + aggToRowMap = ArrayRCP(aggStart[numAggregates], 0); + + // count, how many dofs have been recorded for each aggregate so far + Array numDofs(numAggregates, 0); // empty array with number of DOFs for each aggregate + if (stridedblocksize_ == 1) { + for (LO lnode = 0; lnode < size; ++lnode) + if (procWinner[lnode] == myPid) { + LO myAgg = vertex2AggId[lnode]; + aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = lnode; + numDofs[myAgg]++; } - } else { - for (LO n = 0; nisNodeGlobalElement(gDofIndex)) - myDofGids->push_back(gDofIndex); + } else { + for (LO lnode = 0; lnode < size; ++lnode) + if (procWinner[lnode] == myPid) { + LO myAgg = vertex2AggId[lnode]; + GO nodeGID = nodeGlobalElts[lnode]; + + for (LO k = 0; k < stridedblocksize_; k++) { + GO GID = ComputeGlobalDOF(nodeGID, k); + if (columnMap_->isNodeGlobalElement(GID)) { + aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = lnode * stridedblocksize_ + k; + numDofs[myAgg]++; + } } } - } - - Teuchos::ArrayRCP arr_myDofGids = Teuchos::arcp( myDofGids ); - Teuchos::RCP importDofMap = MapFactory::Build(nodeMap->lib(), Teuchos::OrdinalTraits::invalid(), arr_myDofGids(), nodeMap->getIndexBase(), nodeMap->getComm()); - return importDofMap; - } - - ///////////////////////////////////////////////////////////////////////////// - - template - GlobalOrdinal AmalgamationInfo:: - ComputeGlobalDOF(GlobalOrdinal const &gNodeID, LocalOrdinal const &k) const { - // here, the assumption is, that the node map has the same indexBase as the dof map - // this is the node map index base this is the dof map index base - GlobalOrdinal gDofIndex = offset_ + (gNodeID-indexBase_)*fullblocksize_ + nStridedOffset_ + k + indexBase_; - return gDofIndex; } - - template - LocalOrdinal AmalgamationInfo::ComputeLocalDOF(LocalOrdinal const &lNodeID, LocalOrdinal const &k) const { - LocalOrdinal lDofIndex = lNodeID*fullblocksize_ + k; - return lDofIndex; - } - - - template - LocalOrdinal AmalgamationInfo::ComputeLocalNode(LocalOrdinal const &ldofID) const { - return (ldofID - ldofID%fullblocksize_) / fullblocksize_; + // todo plausibility check: entry numDofs[k] == aggToRowMap[k].size() + +} // UnamalgamateAggregatesLO + +template +void AmalgamationInfo::print(Teuchos::FancyOStream &out, + const VerbLevel verbLevel) const { + if (!(verbLevel & Debug)) + return; + + out << "AmalgamationInfo -- Striding information:" + << "\n fullBlockSize = " << fullblocksize_ + << "\n blockID = " << blockid_ + << "\n stridingOffset = " << nStridedOffset_ + << "\n stridedBlockSize = " << stridedblocksize_ + << "\n indexBase = " << indexBase_ + << std::endl; + + out << "AmalgamationInfo -- DOFs to nodes mapping:\n" + << " Mapping of row DOFs to row nodes:" << *rowTranslation_() + << "\n\n Mapping of column DOFs to column nodes:" << *colTranslation_() + << std::endl; + + out << "AmalgamationInfo -- row node map:" << std::endl; + nodeRowMap_->describe(out, Teuchos::VERB_EXTREME); + + out << "AmalgamationInfo -- column node map:" << std::endl; + nodeColMap_->describe(out, Teuchos::VERB_EXTREME); +} + +///////////////////////////////////////////////////////////////////////////// + +template +RCP > AmalgamationInfo:: + ComputeUnamalgamatedImportDofMap(const Aggregates &aggregates) const { + return ComputeUnamalgamatedImportDofMap(aggregates.GetMap()); +} + +template +RCP > AmalgamationInfo:: + ComputeUnamalgamatedImportDofMap(const Teuchos::RCP &nodeMap) const { + Teuchos::RCP > myDofGids = Teuchos::rcp(new std::vector); + Teuchos::ArrayView gEltList = nodeMap->getLocalElementList(); + LO nodeElements = Teuchos::as(nodeMap->getLocalNumElements()); + if (stridedblocksize_ == 1) { + for (LO n = 0; n < nodeElements; n++) { + GlobalOrdinal gDofIndex = ComputeGlobalDOF(gEltList[n]); + myDofGids->push_back(gDofIndex); + } + } else { + for (LO n = 0; n < nodeElements; n++) { + for (LocalOrdinal k = 0; k < stridedblocksize_; k++) { + GlobalOrdinal gDofIndex = ComputeGlobalDOF(gEltList[n], k); + if (columnMap_->isNodeGlobalElement(gDofIndex)) + myDofGids->push_back(gDofIndex); + } + } } -} //namespace - + Teuchos::ArrayRCP arr_myDofGids = Teuchos::arcp(myDofGids); + Teuchos::RCP importDofMap = MapFactory::Build(nodeMap->lib(), Teuchos::OrdinalTraits::invalid(), arr_myDofGids(), nodeMap->getIndexBase(), nodeMap->getComm()); + return importDofMap; +} + +///////////////////////////////////////////////////////////////////////////// + +template +GlobalOrdinal AmalgamationInfo:: + ComputeGlobalDOF(GlobalOrdinal const &gNodeID, LocalOrdinal const &k) const { + // here, the assumption is, that the node map has the same indexBase as the dof map + // this is the node map index base this is the dof map index base + GlobalOrdinal gDofIndex = offset_ + (gNodeID - indexBase_) * fullblocksize_ + nStridedOffset_ + k + indexBase_; + return gDofIndex; +} + +template +LocalOrdinal AmalgamationInfo::ComputeLocalDOF(LocalOrdinal const &lNodeID, LocalOrdinal const &k) const { + LocalOrdinal lDofIndex = lNodeID * fullblocksize_ + k; + return lDofIndex; +} + +template +LocalOrdinal AmalgamationInfo::ComputeLocalNode(LocalOrdinal const &ldofID) const { + return (ldofID - ldofID % fullblocksize_) / fullblocksize_; +} + +} // namespace MueLu #endif /* MUELU_AMALGAMATIONINFO_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_decl.hpp index 5e03e2fae3ef..b32aa784e053 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_decl.hpp @@ -52,7 +52,7 @@ #include #include #include -#include //TODO +#include //TODO #include #include @@ -72,116 +72,112 @@ namespace MueLu { - /*! - @class CoalesceDropFactory - @brief Factory for creating a graph based on a given matrix. +/*! + @class CoalesceDropFactory + @brief Factory for creating a graph based on a given matrix. - Factory for creating graphs from matrices with entries selectively dropped. + Factory for creating graphs from matrices with entries selectively dropped. - ## Code paths ## + ## Code paths ## - Both the classic dropping strategy as well as a coordinate-based distance laplacian method - is implemented. For performance reasons there are four distinctive code paths for the - classical method: + Both the classic dropping strategy as well as a coordinate-based distance laplacian method + is implemented. For performance reasons there are four distinctive code paths for the + classical method: - - one DOF per node without dropping (i.e. "aggregation: drop tol" = 0.0) - - one DOF per node with dropping (i.e. "aggregation: drop tol" > 0.0) - - number of DOFs per node > 1 withouth dropping - - number of DOFs per node > 1 with dropping + - one DOF per node without dropping (i.e. "aggregation: drop tol" = 0.0) + - one DOF per node with dropping (i.e. "aggregation: drop tol" > 0.0) + - number of DOFs per node > 1 withouth dropping + - number of DOFs per node > 1 with dropping - Additionally, there is a code path for the distance-laplacian mode. + Additionally, there is a code path for the distance-laplacian mode. - ## Input/output of CoalesceDropFactory ## + ## Input/output of CoalesceDropFactory ## - ### User parameters of CoalesceDropFactory ### - Parameter | type | default | master.xml | validated | requested | description - ----------|------|---------|:----------:|:---------:|:---------:|------------ - A | Factory | null | | * | * | Generating factory of the operator A - UnAmalgamationInfo | Factory | null | | * | * | Generating factory of type AmalgamationFactory which generates the variable 'UnAmalgamationInfo'. Do not change the default unless you know what you are doing. - Coordinates | Factory | null | | * | (*) | Generating factory for variable 'Coordinates'. The coordinates are only needed if "distance laplacian" is chosen for the parameter "aggregation: drop scheme" - "aggregation: drop scheme" | std::string | "classical" | * | * | | Coalescing algorithm. You can choose either "classical" (=default) or "distance laplacian" - "aggregation: drop tol" | double | 0.0 | * | * | | Threshold parameter for dropping small entries - "aggregation: Dirichlet threshold" | double | 0.0 | * | * | | Threshold for determining whether entries are zero during Dirichlet row detection - "lightweight wrap" | bool | true | | * | | hidden switch between fast implementation based on MueLu::LWGraph and a failsafe slower implementation based on Xpetra::Graph (for comparison). The user should not change the default value (=true) + ### User parameters of CoalesceDropFactory ### + Parameter | type | default | master.xml | validated | requested | description + ----------|------|---------|:----------:|:---------:|:---------:|------------ + A | Factory | null | | * | * | Generating factory of the operator A + UnAmalgamationInfo | Factory | null | | * | * | Generating factory of type AmalgamationFactory which generates the variable 'UnAmalgamationInfo'. Do not change the default unless you know what you are doing. + Coordinates | Factory | null | | * | (*) | Generating factory for variable 'Coordinates'. The coordinates are only needed if "distance laplacian" is chosen for the parameter "aggregation: drop scheme" + "aggregation: drop scheme" | std::string | "classical" | * | * | | Coalescing algorithm. You can choose either "classical" (=default) or "distance laplacian" + "aggregation: drop tol" | double | 0.0 | * | * | | Threshold parameter for dropping small entries + "aggregation: Dirichlet threshold" | double | 0.0 | * | * | | Threshold for determining whether entries are zero during Dirichlet row detection + "lightweight wrap" | bool | true | | * | | hidden switch between fast implementation based on MueLu::LWGraph and a failsafe slower implementation based on Xpetra::Graph (for comparison). The user should not change the default value (=true) - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see CoalesceDropFactory::GetValidParameters).
- The * in the @c requested column states that the data is requested as input with all dependencies (see CoalesceDropFactory::DeclareInput). + The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
+ The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see CoalesceDropFactory::GetValidParameters).
+ The * in the @c requested column states that the data is requested as input with all dependencies (see CoalesceDropFactory::DeclareInput). - ### Variables provided by UncoupledAggregationFactory ### + ### Variables provided by UncoupledAggregationFactory ### - After CoalesceDropFactory::Build the following data is available (if requested) + After CoalesceDropFactory::Build the following data is available (if requested) - Parameter | generated by | description - ----------|--------------|------------ - Graph | CoalesceDropFactory | Graph of matrix A - DofsPerNode | CoalesceDropFactory | number of DOFs per node. Note, that we assume a constant number of DOFs per node for all nodes associated with the operator A. + Parameter | generated by | description + ----------|--------------|------------ + Graph | CoalesceDropFactory | Graph of matrix A + DofsPerNode | CoalesceDropFactory | number of DOFs per node. Note, that we assume a constant number of DOFs per node for all nodes associated with the operator A. - ## Amalgamation process ## + ## Amalgamation process ## - The CoalesceDropFactory is internally using the AmalgamationFactory for amalgamating the dof-based maps to node-based maps. The AmalgamationFactory creates the "UnAmalgamationInfo" container - which basically stores all the necessary information for translating dof based data to node based data and vice versa. The container is used, since this way the amalgamation is only done once - and later reused by other factories. + The CoalesceDropFactory is internally using the AmalgamationFactory for amalgamating the dof-based maps to node-based maps. The AmalgamationFactory creates the "UnAmalgamationInfo" container + which basically stores all the necessary information for translating dof based data to node based data and vice versa. The container is used, since this way the amalgamation is only done once + and later reused by other factories. - Of course, often one does not need the information from the "UnAmalgamationInfo" container since the same information could be extracted of the "Graph" or the map from the "Coordinates" vector. - However, there are also some situations (e.g. when doing rebalancing based on HyperGraph partitioning without coordinate information) where one has not access to a "Graph" or "Coordinates" variable. - */ + Of course, often one does not need the information from the "UnAmalgamationInfo" container since the same information could be extracted of the "Graph" or the map from the "Coordinates" vector. + However, there are also some situations (e.g. when doing rebalancing based on HyperGraph partitioning without coordinate information) where one has not access to a "Graph" or "Coordinates" variable. +*/ - template - class CoalesceDropFactory : public SingleLevelFactoryBase { +template +class CoalesceDropFactory : public SingleLevelFactoryBase { #undef MUELU_COALESCEDROPFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: + public: + //! @name Constructors/Destructors. + //@{ - //! @name Constructors/Destructors. - //@{ + //! Constructor + CoalesceDropFactory(); - //! Constructor - CoalesceDropFactory(); + //! Destructor + virtual ~CoalesceDropFactory() {} - //! Destructor - virtual ~CoalesceDropFactory() { } + RCP GetValidParameterList() const; - RCP GetValidParameterList() const; + //@} - //@} + //! Input + //@{ - //! Input - //@{ + void DeclareInput(Level& currentLevel) const; - void DeclareInput(Level ¤tLevel) const; + /// set predrop function + void SetPreDropFunction(const RCP >& predrop) { predrop_ = predrop; } - /// set predrop function - void SetPreDropFunction(const RCP > &predrop) { predrop_ = predrop; } + //@} - //@} + void Build(Level& currentLevel) const; // Build - void Build(Level ¤tLevel) const; // Build + private: + // pre-drop function + mutable RCP predrop_; - private: + //! Method to merge rows of matrix for systems of PDEs. + void MergeRows(const Matrix& A, const LO row, Array& cols, const Array& translation) const; + void MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP& ghostedDiagVals, SC threshold, Array& cols, const Array& translation) const; - // pre-drop function - mutable - RCP predrop_; + // When we want to decouple a block diagonal system (returns Teuchos::null if generate_matrix is false) + Teuchos::RCP > BlockDiagonalize(Level& currentLevel, const RCP& A, bool generate_matrix) const; - //! Method to merge rows of matrix for systems of PDEs. - void MergeRows(const Matrix& A, const LO row, Array& cols, const Array& translation) const; - void MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP& ghostedDiagVals, SC threshold, Array& cols, const Array& translation) const; + // When we want to decouple a block diagonal system via a *graph* + void BlockDiagonalizeGraph(const RCP& inputGraph, const RCP& ghostedBlockNumber, RCP& outputGraph, RCP& importer) const; +}; // class CoalesceDropFactory - // When we want to decouple a block diagonal system (returns Teuchos::null if generate_matrix is false) - Teuchos::RCP > BlockDiagonalize(Level & currentLevel,const RCP & A, bool generate_matrix) const; - - // When we want to decouple a block diagonal system via a *graph* - void BlockDiagonalizeGraph(const RCP & inputGraph, const RCP & ghostedBlockNumber, RCP & outputGraph, RCP & importer) const; - - }; //class CoalesceDropFactory - -} //namespace MueLu +} // namespace MueLu #define MUELU_COALESCEDROPFACTORY_SHORT -#endif // MUELU_COALESCEDROPFACTORY_DECL_HPP +#endif // MUELU_COALESCEDROPFACTORY_DECL_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp index c4bf4178cbaf..34a2f3db77f4 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp @@ -87,1047 +87,1022 @@ // Should be removed once we are confident that this works. //#define DJS_READ_ENV_VARIABLES - namespace MueLu { - namespace Details { - template - struct DropTol { - - DropTol() = default; - DropTol(DropTol const&) = default; - DropTol(DropTol &&) = default; - - DropTol& operator=(DropTol const&) = default; - DropTol& operator=(DropTol &&) = default; - - DropTol(real_type val_, real_type diag_, LO col_, bool drop_) - : val{val_}, diag{diag_}, col{col_}, drop{drop_} - {} - - real_type val {Teuchos::ScalarTraits::zero()}; - real_type diag {Teuchos::ScalarTraits::zero()}; - LO col {Teuchos::OrdinalTraits::invalid()}; - bool drop {true}; - - // CMS: Auxillary information for debugging info - // real_type aux_val {Teuchos::ScalarTraits::nan()}; - }; - } - - - template - RCP CoalesceDropFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +namespace Details { +template +struct DropTol { + DropTol() = default; + DropTol(DropTol const&) = default; + DropTol(DropTol&&) = default; -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: drop tol"); - SET_VALID_ENTRY("aggregation: use ml scaling of drop tol"); - SET_VALID_ENTRY("aggregation: Dirichlet threshold"); - SET_VALID_ENTRY("aggregation: greedy Dirichlet"); - SET_VALID_ENTRY("aggregation: row sum drop tol"); - SET_VALID_ENTRY("aggregation: drop scheme"); - SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize"); - SET_VALID_ENTRY("aggregation: distance laplacian directional weights"); - SET_VALID_ENTRY("aggregation: dropping may create Dirichlet"); - - { - typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; - // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values) - validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new validatorType(Teuchos::tuple("signed classical sa","classical", "distance laplacian","signed classical","block diagonal","block diagonal classical","block diagonal distance laplacian","block diagonal signed classical","block diagonal colored signed classical"), "aggregation: drop scheme"))); - - } - SET_VALID_ENTRY("aggregation: distance laplacian algo"); - SET_VALID_ENTRY("aggregation: classical algo"); - SET_VALID_ENTRY("aggregation: coloring: localize color graph"); -#undef SET_VALID_ENTRY - validParamList->set< bool > ("lightweight wrap", true, "Experimental option for lightweight graph access"); - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< RCP >("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo"); - validParamList->set< RCP >("Coordinates", Teuchos::null, "Generating factory for Coordinates"); - validParamList->set< RCP >("BlockNumber", Teuchos::null, "Generating factory for BlockNUmber"); - - return validParamList; - } - - template - CoalesceDropFactory::CoalesceDropFactory() : predrop_(Teuchos::null) { } - - template - void CoalesceDropFactory::DeclareInput(Level ¤tLevel) const { - Input(currentLevel, "A"); - Input(currentLevel, "UnAmalgamationInfo"); - - const ParameterList& pL = GetParameterList(); - if (pL.get("lightweight wrap") == true) { - std::string algo = pL.get("aggregation: drop scheme"); - if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") { - Input(currentLevel, "Coordinates"); - } - if(algo == "signed classical sa") - ; - else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) { - Input(currentLevel, "BlockNumber"); - } - } - - } - - template - void CoalesceDropFactory::Build(Level ¤tLevel) const { - - FactoryMonitor m(*this, "Build", currentLevel); - - typedef Teuchos::ScalarTraits STS; - typedef typename STS::magnitudeType real_type; - typedef Xpetra::MultiVector RealValuedMultiVector; - typedef Xpetra::MultiVectorFactory RealValuedMultiVectorFactory; - - if (predrop_ != Teuchos::null) - GetOStream(Parameters0) << predrop_->description(); - - RCP realA = Get< RCP >(currentLevel, "A"); - RCP amalInfo = Get< RCP >(currentLevel, "UnAmalgamationInfo"); - const ParameterList & pL = GetParameterList(); - bool doExperimentalWrap = pL.get("lightweight wrap"); - - GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap << std::endl; - std::string algo = pL.get("aggregation: drop scheme"); - const bool aggregationMayCreateDirichlet = pL.get("aggregation: dropping may create Dirichlet"); - - RCP Coords; - RCP A; + DropTol& operator=(DropTol const&) = default; + DropTol& operator=(DropTol&&) = default; - bool use_block_algorithm=false; - LO interleaved_blocksize = as(pL.get("aggregation: block diagonal: interleaved blocksize")); - bool useSignedClassicalRS = false; - bool useSignedClassicalSA = false; - bool generateColoringGraph = false; + DropTol(real_type val_, real_type diag_, LO col_, bool drop_) + : val{val_} + , diag{diag_} + , col{col_} + , drop{drop_} {} - // NOTE: If we're doing blockDiagonal, we'll not want to do rowSum twice (we'll do it - // in the block diagonalization). So we'll clobber the rowSumTol with -1.0 in this case - typename STS::magnitudeType rowSumTol = as(pL.get("aggregation: row sum drop tol")); + real_type val{Teuchos::ScalarTraits::zero()}; + real_type diag{Teuchos::ScalarTraits::zero()}; + LO col{Teuchos::OrdinalTraits::invalid()}; + bool drop{true}; + // CMS: Auxillary information for debugging info + // real_type aux_val {Teuchos::ScalarTraits::nan()}; +}; +} // namespace Details - RCP ghostedBlockNumber; - ArrayRCP g_block_id; +template +RCP CoalesceDropFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - if(algo == "distance laplacian" ) { - // Grab the coordinates for distance laplacian - Coords = Get< RCP >(currentLevel, "Coordinates"); - A = realA; - } - else if(algo == "signed classical sa") { - useSignedClassicalSA = true; - algo = "classical"; - A = realA; +#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("aggregation: drop tol"); + SET_VALID_ENTRY("aggregation: use ml scaling of drop tol"); + SET_VALID_ENTRY("aggregation: Dirichlet threshold"); + SET_VALID_ENTRY("aggregation: greedy Dirichlet"); + SET_VALID_ENTRY("aggregation: row sum drop tol"); + SET_VALID_ENTRY("aggregation: drop scheme"); + SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize"); + SET_VALID_ENTRY("aggregation: distance laplacian directional weights"); + SET_VALID_ENTRY("aggregation: dropping may create Dirichlet"); + + { + typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; + // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values) + validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new validatorType(Teuchos::tuple("signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical"), "aggregation: drop scheme"))); + } + SET_VALID_ENTRY("aggregation: distance laplacian algo"); + SET_VALID_ENTRY("aggregation: classical algo"); + SET_VALID_ENTRY("aggregation: coloring: localize color graph"); +#undef SET_VALID_ENTRY + validParamList->set("lightweight wrap", true, "Experimental option for lightweight graph access"); + + validParamList->set>("A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo"); + validParamList->set>("Coordinates", Teuchos::null, "Generating factory for Coordinates"); + validParamList->set>("BlockNumber", Teuchos::null, "Generating factory for BlockNUmber"); + + return validParamList; +} + +template +CoalesceDropFactory::CoalesceDropFactory() + : predrop_(Teuchos::null) {} + +template +void CoalesceDropFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + Input(currentLevel, "UnAmalgamationInfo"); + + const ParameterList& pL = GetParameterList(); + if (pL.get("lightweight wrap") == true) { + std::string algo = pL.get("aggregation: drop scheme"); + if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") { + Input(currentLevel, "Coordinates"); } - else if(algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") { - useSignedClassicalRS = true; - // if(realA->GetFixedBlockSize() > 1) { - RCP BlockNumber = Get >(currentLevel, "BlockNumber"); - // Ghost the column block numbers if we need to - RCP importer = realA->getCrsGraph()->getImporter(); - if(!importer.is_null()) { - SubFactoryMonitor m1(*this, "Block Number import", currentLevel); - ghostedBlockNumber= Xpetra::VectorFactory::Build(importer->getTargetMap()); - ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT); - } - else { - ghostedBlockNumber = BlockNumber; - } - g_block_id = ghostedBlockNumber->getData(0); - // } - if(algo == "block diagonal colored signed classical") - generateColoringGraph=true; - algo = "classical"; - A = realA; - + if (algo == "signed classical sa") + ; + else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) { + Input(currentLevel, "BlockNumber"); } - else if(algo == "block diagonal") { - // Handle the "block diagonal" filtering and then leave - BlockDiagonalize(currentLevel,realA,false); - return; + } +} + +template +void CoalesceDropFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + typedef Xpetra::MultiVectorFactory RealValuedMultiVectorFactory; + + if (predrop_ != Teuchos::null) + GetOStream(Parameters0) << predrop_->description(); + + RCP realA = Get>(currentLevel, "A"); + RCP amalInfo = Get>(currentLevel, "UnAmalgamationInfo"); + const ParameterList& pL = GetParameterList(); + bool doExperimentalWrap = pL.get("lightweight wrap"); + + GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap << std::endl; + std::string algo = pL.get("aggregation: drop scheme"); + const bool aggregationMayCreateDirichlet = pL.get("aggregation: dropping may create Dirichlet"); + + RCP Coords; + RCP A; + + bool use_block_algorithm = false; + LO interleaved_blocksize = as(pL.get("aggregation: block diagonal: interleaved blocksize")); + bool useSignedClassicalRS = false; + bool useSignedClassicalSA = false; + bool generateColoringGraph = false; + + // NOTE: If we're doing blockDiagonal, we'll not want to do rowSum twice (we'll do it + // in the block diagonalization). So we'll clobber the rowSumTol with -1.0 in this case + typename STS::magnitudeType rowSumTol = as(pL.get("aggregation: row sum drop tol")); + + RCP ghostedBlockNumber; + ArrayRCP g_block_id; + + if (algo == "distance laplacian") { + // Grab the coordinates for distance laplacian + Coords = Get>(currentLevel, "Coordinates"); + A = realA; + } else if (algo == "signed classical sa") { + useSignedClassicalSA = true; + algo = "classical"; + A = realA; + } else if (algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") { + useSignedClassicalRS = true; + // if(realA->GetFixedBlockSize() > 1) { + RCP BlockNumber = Get>(currentLevel, "BlockNumber"); + // Ghost the column block numbers if we need to + RCP importer = realA->getCrsGraph()->getImporter(); + if (!importer.is_null()) { + SubFactoryMonitor m1(*this, "Block Number import", currentLevel); + ghostedBlockNumber = Xpetra::VectorFactory::Build(importer->getTargetMap()); + ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT); + } else { + ghostedBlockNumber = BlockNumber; } - else if (algo == "block diagonal classical" || algo == "block diagonal distance laplacian") { - // Handle the "block diagonal" filtering, and then continue onward - use_block_algorithm = true; - RCP filteredMatrix = BlockDiagonalize(currentLevel,realA,true); - if(algo == "block diagonal distance laplacian") { - // We now need to expand the coordinates by the interleaved blocksize - RCP OldCoords = Get< RCP >(currentLevel, "Coordinates"); - if (OldCoords->getLocalLength() != realA->getLocalNumRows()) { - LO dim = (LO) OldCoords->getNumVectors(); - Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(),dim); - for(LO k=0; k old_vec = OldCoords->getData(k); - ArrayRCP new_vec = Coords->getDataNonConst(k); - for(LO i=0; i <(LO)OldCoords->getLocalLength(); i++) { - LO new_base = i*dim; - for(LO j=0; jgetData(0); + // } + if (algo == "block diagonal colored signed classical") + generateColoringGraph = true; + algo = "classical"; + A = realA; + + } else if (algo == "block diagonal") { + // Handle the "block diagonal" filtering and then leave + BlockDiagonalize(currentLevel, realA, false); + return; + } else if (algo == "block diagonal classical" || algo == "block diagonal distance laplacian") { + // Handle the "block diagonal" filtering, and then continue onward + use_block_algorithm = true; + RCP filteredMatrix = BlockDiagonalize(currentLevel, realA, true); + if (algo == "block diagonal distance laplacian") { + // We now need to expand the coordinates by the interleaved blocksize + RCP OldCoords = Get>(currentLevel, "Coordinates"); + if (OldCoords->getLocalLength() != realA->getLocalNumRows()) { + LO dim = (LO)OldCoords->getNumVectors(); + Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(), dim); + for (LO k = 0; k < dim; k++) { + ArrayRCP old_vec = OldCoords->getData(k); + ArrayRCP new_vec = Coords->getDataNonConst(k); + for (LO i = 0; i < (LO)OldCoords->getLocalLength(); i++) { + LO new_base = i * dim; + for (LO j = 0; j < interleaved_blocksize; j++) + new_vec[new_base + j] = old_vec[i]; } } - else { - Coords = OldCoords; - } - algo = "distance laplacian"; + } else { + Coords = OldCoords; } - else if(algo == "block diagonal classical") { - algo = "classical"; - } - // All cases - A = filteredMatrix; - rowSumTol = -1.0; - } - else { - A = realA; + algo = "distance laplacian"; + } else if (algo == "block diagonal classical") { + algo = "classical"; } + // All cases + A = filteredMatrix; + rowSumTol = -1.0; + } else { + A = realA; + } - // Distance Laplacian weights - Array dlap_weights = pL.get >("aggregation: distance laplacian directional weights"); - enum {NO_WEIGHTS=0, SINGLE_WEIGHTS, BLOCK_WEIGHTS}; - int use_dlap_weights = NO_WEIGHTS; - if(algo == "distance laplacian") { - LO dim = (LO) Coords->getNumVectors(); - // If anything isn't 1.0 we need to turn on the weighting - bool non_unity = false; - for (LO i=0; !non_unity && i<(LO)dlap_weights.size(); i++) { - if(dlap_weights[i] != 1.0) { - non_unity = true; - } + // Distance Laplacian weights + Array dlap_weights = pL.get>("aggregation: distance laplacian directional weights"); + enum { NO_WEIGHTS = 0, + SINGLE_WEIGHTS, + BLOCK_WEIGHTS }; + int use_dlap_weights = NO_WEIGHTS; + if (algo == "distance laplacian") { + LO dim = (LO)Coords->getNumVectors(); + // If anything isn't 1.0 we need to turn on the weighting + bool non_unity = false; + for (LO i = 0; !non_unity && i < (LO)dlap_weights.size(); i++) { + if (dlap_weights[i] != 1.0) { + non_unity = true; } - if(non_unity) { - LO blocksize = use_block_algorithm ? as(pL.get("aggregation: block diagonal: interleaved blocksize")) : 1; - if((LO)dlap_weights.size() == dim) - use_dlap_weights = SINGLE_WEIGHTS; - else if((LO)dlap_weights.size() == blocksize * dim) - use_dlap_weights = BLOCK_WEIGHTS; - else { - TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError, - "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize"); - } - if (GetVerbLevel() & Statistics1) - GetOStream(Statistics1) << "Using distance laplacian weights: "<(pL.get("aggregation: block diagonal: interleaved blocksize")) : 1; + if ((LO)dlap_weights.size() == dim) + use_dlap_weights = SINGLE_WEIGHTS; + else if ((LO)dlap_weights.size() == blocksize * dim) + use_dlap_weights = BLOCK_WEIGHTS; + else { + TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError, + "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize"); } + if (GetVerbLevel() & Statistics1) + GetOStream(Statistics1) << "Using distance laplacian weights: " << dlap_weights << std::endl; } + } - // decide wether to use the fast-track code path for standard maps or the somewhat slower - // code path for non-standard maps - /*bool bNonStandardMaps = false; - if (A->IsView("stridedMaps") == true) { - Teuchos::RCP myMap = A->getRowMap("stridedMaps"); - Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); - if (strMap->getStridedBlockId() != -1 || strMap->getOffset() > 0) - bNonStandardMaps = true; - }*/ - - if (doExperimentalWrap) { - TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", Exceptions::RuntimeError, "Dropping function must not be provided for \"" << algo << "\" algorithm"); - TEUCHOS_TEST_FOR_EXCEPTION(algo != "classical" && algo != "distance laplacian" && algo != "signed classical", Exceptions::RuntimeError, "\"algorithm\" must be one of (classical|distance laplacian|signed classical)"); - - SC threshold; - // If we're doing the ML-style halving of the drop tol at each level, we do that here. - if (pL.get("aggregation: use ml scaling of drop tol")) - threshold = pL.get("aggregation: drop tol") / pow(2.0,currentLevel.GetLevelID()); - else - threshold = as(pL.get("aggregation: drop tol")); - - - std::string distanceLaplacianAlgoStr = pL.get("aggregation: distance laplacian algo"); - std::string classicalAlgoStr = pL.get("aggregation: classical algo"); - real_type realThreshold = STS::magnitude(threshold);// CMS: Rename this to "magnitude threshold" sometime - - //////////////////////////////////////////////////// - // Remove this bit once we are confident that cut-based dropping works. + // decide wether to use the fast-track code path for standard maps or the somewhat slower + // code path for non-standard maps + /*bool bNonStandardMaps = false; + if (A->IsView("stridedMaps") == true) { + Teuchos::RCP myMap = A->getRowMap("stridedMaps"); + Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); + if (strMap->getStridedBlockId() != -1 || strMap->getOffset() > 0) + bNonStandardMaps = true; + }*/ + + if (doExperimentalWrap) { + TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", Exceptions::RuntimeError, "Dropping function must not be provided for \"" << algo << "\" algorithm"); + TEUCHOS_TEST_FOR_EXCEPTION(algo != "classical" && algo != "distance laplacian" && algo != "signed classical", Exceptions::RuntimeError, "\"algorithm\" must be one of (classical|distance laplacian|signed classical)"); + + SC threshold; + // If we're doing the ML-style halving of the drop tol at each level, we do that here. + if (pL.get("aggregation: use ml scaling of drop tol")) + threshold = pL.get("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID()); + else + threshold = as(pL.get("aggregation: drop tol")); + + std::string distanceLaplacianAlgoStr = pL.get("aggregation: distance laplacian algo"); + std::string classicalAlgoStr = pL.get("aggregation: classical algo"); + real_type realThreshold = STS::magnitude(threshold); // CMS: Rename this to "magnitude threshold" sometime + + //////////////////////////////////////////////////// + // Remove this bit once we are confident that cut-based dropping works. #ifdef HAVE_MUELU_DEBUG - int distanceLaplacianCutVerbose = 0; + int distanceLaplacianCutVerbose = 0; #endif #ifdef DJS_READ_ENV_VARIABLES - if (getenv("MUELU_DROP_TOLERANCE_MODE")) { - distanceLaplacianAlgoStr = std::string(getenv("MUELU_DROP_TOLERANCE_MODE")); - } + if (getenv("MUELU_DROP_TOLERANCE_MODE")) { + distanceLaplacianAlgoStr = std::string(getenv("MUELU_DROP_TOLERANCE_MODE")); + } - if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) { - auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD")); - realThreshold = 1e-4*tmp; - } + if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) { + auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD")); + realThreshold = 1e-4 * tmp; + } -# ifdef HAVE_MUELU_DEBUG - if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) { - distanceLaplacianCutVerbose = atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE")); - } -# endif +#ifdef HAVE_MUELU_DEBUG + if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) { + distanceLaplacianCutVerbose = atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE")); + } #endif - //////////////////////////////////////////////////// - - enum decisionAlgoType {defaultAlgo, unscaled_cut, scaled_cut, scaled_cut_symmetric}; - - decisionAlgoType distanceLaplacianAlgo = defaultAlgo; - decisionAlgoType classicalAlgo = defaultAlgo; - if (algo == "distance laplacian") { - if (distanceLaplacianAlgoStr == "default") - distanceLaplacianAlgo = defaultAlgo; - else if (distanceLaplacianAlgoStr == "unscaled cut") - distanceLaplacianAlgo = unscaled_cut; - else if (distanceLaplacianAlgoStr == "scaled cut") - distanceLaplacianAlgo = scaled_cut; - else if (distanceLaplacianAlgoStr == "scaled cut symmetric") - distanceLaplacianAlgo = scaled_cut_symmetric; - else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr << "\""); - GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize()<< std::endl; - } else if (algo == "classical") { - if (classicalAlgoStr == "default") - classicalAlgo = defaultAlgo; - else if (classicalAlgoStr == "unscaled cut") - classicalAlgo = unscaled_cut; - else if (classicalAlgoStr == "scaled cut") - classicalAlgo = scaled_cut; - else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr << "\""); - GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; - - } else - GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; - Set(currentLevel, "Filtering", (threshold != STS::zero())); +#endif + //////////////////////////////////////////////////// + + enum decisionAlgoType { defaultAlgo, + unscaled_cut, + scaled_cut, + scaled_cut_symmetric }; + + decisionAlgoType distanceLaplacianAlgo = defaultAlgo; + decisionAlgoType classicalAlgo = defaultAlgo; + if (algo == "distance laplacian") { + if (distanceLaplacianAlgoStr == "default") + distanceLaplacianAlgo = defaultAlgo; + else if (distanceLaplacianAlgoStr == "unscaled cut") + distanceLaplacianAlgo = unscaled_cut; + else if (distanceLaplacianAlgoStr == "scaled cut") + distanceLaplacianAlgo = scaled_cut; + else if (distanceLaplacianAlgoStr == "scaled cut symmetric") + distanceLaplacianAlgo = scaled_cut_symmetric; + else + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr << "\""); + GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + } else if (algo == "classical") { + if (classicalAlgoStr == "default") + classicalAlgo = defaultAlgo; + else if (classicalAlgoStr == "unscaled cut") + classicalAlgo = unscaled_cut; + else if (classicalAlgoStr == "scaled cut") + classicalAlgo = scaled_cut; + else + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr << "\""); + GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + + } else + GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + Set(currentLevel, "Filtering", (threshold != STS::zero())); - const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); + const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); + // NOTE: We don't support signed classical RS or SA with cut drop at present + TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalRS && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical aggregation"); + TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalSA && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical sa aggregation"); - // NOTE: We don't support signed classical RS or SA with cut drop at present - TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalRS && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical aggregation"); - TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalSA && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical sa aggregation"); + GO numDropped = 0, numTotal = 0; + std::string graphType = "unamalgamated"; // for description purposes only - GO numDropped = 0, numTotal = 0; - std::string graphType = "unamalgamated"; //for description purposes only + /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme. + BlockSize is the number of storage blocks that must kept together during the amalgamation process. - - /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme. - BlockSize is the number of storage blocks that must kept together during the amalgamation process. + Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold: - Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold: + numPDEs = BlockSize * storageblocksize. - numPDEs = BlockSize * storageblocksize. - - If numPDEs==1 - Matrix is point storage (classical CRS storage). storageblocksize=1 and BlockSize=1 - No other values makes sense. + If numPDEs==1 + Matrix is point storage (classical CRS storage). storageblocksize=1 and BlockSize=1 + No other values makes sense. - If numPDEs>1 - If matrix uses point storage, then storageblocksize=1 and BlockSize=numPDEs. - If matrix uses block storage, with block size of n, then storageblocksize=n, and BlockSize=numPDEs/n. - Thus far, only storageblocksize=numPDEs and BlockSize=1 has been tested. - */ - TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0,Exceptions::RuntimeError,"A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()"); - const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize(); + If numPDEs>1 + If matrix uses point storage, then storageblocksize=1 and BlockSize=numPDEs. + If matrix uses block storage, with block size of n, then storageblocksize=n, and BlockSize=numPDEs/n. + Thus far, only storageblocksize=numPDEs and BlockSize=1 has been tested. + */ + TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()"); + const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize(); + /************************** RS or SA-style Classical Dropping (and variants) **************************/ + if (algo == "classical") { + if (predrop_ == null) { + // ap: this is a hack: had to declare predrop_ as mutable + predrop_ = rcp(new PreDropFunctionConstVal(threshold)); + } - /************************** RS or SA-style Classical Dropping (and variants) **************************/ - if (algo == "classical") { - if (predrop_ == null) { - // ap: this is a hack: had to declare predrop_ as mutable - predrop_ = rcp(new PreDropFunctionConstVal(threshold)); + if (predrop_ != null) { + RCP predropConstVal = rcp_dynamic_cast(predrop_); + TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, Exceptions::BadCast, + "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed."); + // If a user provided a predrop function, it overwrites the XML threshold parameter + SC newt = predropConstVal->GetThreshold(); + if (newt != threshold) { + GetOStream(Warnings0) << "switching threshold parameter from " << threshold << " (list) to " << newt << " (user function" << std::endl; + threshold = newt; + } + } + // At this points we either have + // (predrop_ != null) + // Therefore, it is sufficient to check only threshold + if (BlockSize == 1 && threshold == STS::zero() && !useSignedClassicalRS && !useSignedClassicalSA && A->hasCrsGraph()) { + // Case 1: scalar problem, no dropping => just use matrix graph + RCP graph = rcp(new Graph(A->getCrsGraph(), "graph of A")); + // Detect and record rows that correspond to Dirichlet boundary conditions + ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); + if (rowSumTol > 0.) + Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes); + + graph->SetBoundaryNodeMap(boundaryNodes); + numTotal = A->getLocalNumEntries(); + + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < boundaryNodes.size(); ++i) + if (boundaryNodes[i]) + numLocalBoundaryNodes++; + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; } - if (predrop_ != null) { - RCP predropConstVal = rcp_dynamic_cast(predrop_); - TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, Exceptions::BadCast, - "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed."); - // If a user provided a predrop function, it overwrites the XML threshold parameter - SC newt = predropConstVal->GetThreshold(); - if (newt != threshold) { - GetOStream(Warnings0) << "switching threshold parameter from " << threshold << " (list) to " << newt << " (user function" << std::endl; - threshold = newt; + Set(currentLevel, "DofsPerNode", 1); + Set(currentLevel, "Graph", graph); + + } else if ((BlockSize == 1 && threshold != STS::zero()) || + (BlockSize == 1 && threshold == STS::zero() && !A->hasCrsGraph()) || + (BlockSize == 1 && useSignedClassicalRS) || + (BlockSize == 1 && useSignedClassicalSA)) { + // Case 2: scalar problem with dropping => record the column indices of undropped entries, but still use original + // graph's map information, e.g., whether index is local + // OR a matrix without a CrsGraph + + // allocate space for the local graph + ArrayRCP rows(A->getLocalNumRows() + 1); + ArrayRCP columns(A->getLocalNumEntries()); + + using MT = typename STS::magnitudeType; + RCP ghostedDiag; + ArrayRCP ghostedDiagVals; + ArrayRCP negMaxOffDiagonal; + // RS style needs the max negative off-diagonal, SA style needs the diagonal + if (useSignedClassicalRS) { + if (ghostedBlockNumber.is_null()) { + negMaxOffDiagonal = MueLu::Utilities::GetMatrixMaxMinusOffDiagonal(*A); + if (GetVerbLevel() & Statistics1) + GetOStream(Statistics1) << "Calculated max point off-diagonal" << std::endl; + } else { + negMaxOffDiagonal = MueLu::Utilities::GetMatrixMaxMinusOffDiagonal(*A, *ghostedBlockNumber); + if (GetVerbLevel() & Statistics1) + GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl; } + } else { + ghostedDiag = MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); + ghostedDiagVals = ghostedDiag->getData(0); } - // At this points we either have - // (predrop_ != null) - // Therefore, it is sufficient to check only threshold - if ( BlockSize==1 && threshold == STS::zero() && !useSignedClassicalRS && !useSignedClassicalSA && A->hasCrsGraph()) { - // Case 1: scalar problem, no dropping => just use matrix graph - RCP graph = rcp(new Graph(A->getCrsGraph(), "graph of A")); - // Detect and record rows that correspond to Dirichlet boundary conditions - ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - if (rowSumTol > 0.) + ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); + if (rowSumTol > 0.) { + if (ghostedBlockNumber.is_null()) { + if (GetVerbLevel() & Statistics1) + GetOStream(Statistics1) << "Applying point row sum criterion." << std::endl; Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes); - - graph->SetBoundaryNodeMap(boundaryNodes); - numTotal = A->getLocalNumEntries(); - - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < boundaryNodes.size(); ++i) - if (boundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; - } - - Set(currentLevel, "DofsPerNode", 1); - Set(currentLevel, "Graph", graph); - - } else if ( (BlockSize == 1 && threshold != STS::zero()) || - (BlockSize == 1 && threshold == STS::zero() && !A->hasCrsGraph()) || - (BlockSize == 1 && useSignedClassicalRS) || - (BlockSize == 1 && useSignedClassicalSA) ) { - // Case 2: scalar problem with dropping => record the column indices of undropped entries, but still use original - // graph's map information, e.g., whether index is local - // OR a matrix without a CrsGraph - - // allocate space for the local graph - ArrayRCP rows (A->getLocalNumRows()+1); - ArrayRCP columns(A->getLocalNumEntries()); - - using MT = typename STS::magnitudeType; - RCP ghostedDiag; - ArrayRCP ghostedDiagVals; - ArrayRCP negMaxOffDiagonal; - // RS style needs the max negative off-diagonal, SA style needs the diagonal - if(useSignedClassicalRS) { - if(ghostedBlockNumber.is_null()) { - negMaxOffDiagonal = MueLu::Utilities::GetMatrixMaxMinusOffDiagonal(*A); - if (GetVerbLevel() & Statistics1) - GetOStream(Statistics1) << "Calculated max point off-diagonal" << std::endl; - } - else { - negMaxOffDiagonal = MueLu::Utilities::GetMatrixMaxMinusOffDiagonal(*A,*ghostedBlockNumber); - if (GetVerbLevel() & Statistics1) - GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl; - } - } - else { - ghostedDiag = MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); - ghostedDiagVals = ghostedDiag->getData(0); - } - ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - if (rowSumTol > 0.) { - if(ghostedBlockNumber.is_null()) { - if (GetVerbLevel() & Statistics1) - GetOStream(Statistics1) << "Applying point row sum criterion." << std::endl; - Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes); - } else { - if (GetVerbLevel() & Statistics1) - GetOStream(Statistics1) << "Applying block row sum criterion." << std::endl; - Utilities::ApplyRowSumCriterion(*A, *ghostedBlockNumber, rowSumTol, boundaryNodes); - } + } else { + if (GetVerbLevel() & Statistics1) + GetOStream(Statistics1) << "Applying block row sum criterion." << std::endl; + Utilities::ApplyRowSumCriterion(*A, *ghostedBlockNumber, rowSumTol, boundaryNodes); } + } - LO realnnz = 0; - rows[0] = 0; - for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { - size_t nnz = A->getNumEntriesInLocalRow(row); - bool rowIsDirichlet = boundaryNodes[row]; - ArrayView indices; - ArrayView vals; - A->getLocalRowView(row, indices, vals); - - if(classicalAlgo == defaultAlgo) { - //FIXME the current predrop function uses the following - //FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid ) - //FIXME but the threshold doesn't take into account the rows' diagonal entries - //FIXME For now, hardwiring the dropping in here - - LO rownnz = 0; - if(useSignedClassicalRS) { - // Signed classical RS style - for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { - LO col = indices[colID]; - MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]); - MT neg_aij = - STS::real(vals[colID]); - /* if(row==1326) printf("A(%d,%d) = %6.4e, block = (%d,%d) neg_aij = %6.4e max_neg_aik = %6.4e\n",row,col,vals[colID], - g_block_id.is_null() ? -1 : g_block_id[row], - g_block_id.is_null() ? -1 : g_block_id[col], - neg_aij, max_neg_aik);*/ - if ((!rowIsDirichlet && (g_block_id.is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) { - columns[realnnz++] = col; - rownnz++; - } else - numDropped++; - } - rows[row+1] = realnnz; + LO realnnz = 0; + rows[0] = 0; + for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { + size_t nnz = A->getNumEntriesInLocalRow(row); + bool rowIsDirichlet = boundaryNodes[row]; + ArrayView indices; + ArrayView vals; + A->getLocalRowView(row, indices, vals); + + if (classicalAlgo == defaultAlgo) { + // FIXME the current predrop function uses the following + // FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid ) + // FIXME but the threshold doesn't take into account the rows' diagonal entries + // FIXME For now, hardwiring the dropping in here + + LO rownnz = 0; + if (useSignedClassicalRS) { + // Signed classical RS style + for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { + LO col = indices[colID]; + MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]); + MT neg_aij = -STS::real(vals[colID]); + /* if(row==1326) printf("A(%d,%d) = %6.4e, block = (%d,%d) neg_aij = %6.4e max_neg_aik = %6.4e\n",row,col,vals[colID], + g_block_id.is_null() ? -1 : g_block_id[row], + g_block_id.is_null() ? -1 : g_block_id[col], + neg_aij, max_neg_aik);*/ + if ((!rowIsDirichlet && (g_block_id.is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) { + columns[realnnz++] = col; + rownnz++; + } else + numDropped++; } - else if(useSignedClassicalSA) { - // Signed classical SA style - for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { - LO col = indices[colID]; + rows[row + 1] = realnnz; + } else if (useSignedClassicalSA) { + // Signed classical SA style + for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { + LO col = indices[colID]; - bool is_nonpositive = STS::real(vals[colID]) <= 0; - MT aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj| - MT aij = is_nonpositive ? STS::magnitude(vals[colID]*vals[colID]) : (-STS::magnitude(vals[colID]*vals[colID])); // + |a_ij|^2, if a_ij < 0, - |a_ij|^2 if a_ij >=0 - /* - if(row==1326) printf("A(%d,%d) = %6.4e, raw_aij = %6.4e aij = %6.4e aiiajj = %6.4e\n",row,col,vals[colID], - vals[colID],aij, aiiajj); - */ - - if ((!rowIsDirichlet && aij > aiiajj) || row == col) { - columns[realnnz++] = col; - rownnz++; - } else - numDropped++; - } - rows[row+1] = realnnz; + bool is_nonpositive = STS::real(vals[colID]) <= 0; + MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj| + MT aij = is_nonpositive ? STS::magnitude(vals[colID] * vals[colID]) : (-STS::magnitude(vals[colID] * vals[colID])); // + |a_ij|^2, if a_ij < 0, - |a_ij|^2 if a_ij >=0 + /* + if(row==1326) printf("A(%d,%d) = %6.4e, raw_aij = %6.4e aij = %6.4e aiiajj = %6.4e\n",row,col,vals[colID], + vals[colID],aij, aiiajj); + */ + + if ((!rowIsDirichlet && aij > aiiajj) || row == col) { + columns[realnnz++] = col; + rownnz++; + } else + numDropped++; } - else { - // Standard abs classical - for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { - LO col = indices[colID]; - MT aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj| - MT aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2 - - if ((!rowIsDirichlet && aij > aiiajj) || row == col) { + rows[row + 1] = realnnz; + } else { + // Standard abs classical + for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { + LO col = indices[colID]; + MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj| + MT aij = STS::magnitude(vals[colID] * vals[colID]); // |a_ij|^2 + + if ((!rowIsDirichlet && aij > aiiajj) || row == col) { columns[realnnz++] = col; rownnz++; - } else - numDropped++; - } - rows[row+1] = realnnz; + } else + numDropped++; } + rows[row + 1] = realnnz; } - else { - /* Cut Algorithm */ - //CMS - using DropTol = Details::DropTol; - std::vector drop_vec; - drop_vec.reserve(nnz); - const real_type zero = Teuchos::ScalarTraits::zero(); - const real_type one = Teuchos::ScalarTraits::one(); - LO rownnz = 0; - // NOTE: This probably needs to be fixed for rowsum - - // find magnitudes - for (LO colID = 0; colID < (LO)nnz; colID++) { - LO col = indices[colID]; - if (row == col) { - drop_vec.emplace_back( zero, one, colID, false); - continue; - } - - // Don't aggregate boundaries - if(boundaryNodes[colID]) continue; - typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj| - typename STS::magnitudeType aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2 - drop_vec.emplace_back(aij, aiiajj, colID, false); + } else { + /* Cut Algorithm */ + // CMS + using DropTol = Details::DropTol; + std::vector drop_vec; + drop_vec.reserve(nnz); + const real_type zero = Teuchos::ScalarTraits::zero(); + const real_type one = Teuchos::ScalarTraits::one(); + LO rownnz = 0; + // NOTE: This probably needs to be fixed for rowsum + + // find magnitudes + for (LO colID = 0; colID < (LO)nnz; colID++) { + LO col = indices[colID]; + if (row == col) { + drop_vec.emplace_back(zero, one, colID, false); + continue; } - const size_t n = drop_vec.size(); - - if (classicalAlgo == unscaled_cut) { - std::sort( drop_vec.begin(), drop_vec.end() - , [](DropTol const& a, DropTol const& b) { - return a.val > b.val; - }); - - bool drop = false; - for (size_t i=1; i realThreshold*b) { - drop = true; + // Don't aggregate boundaries + if (boundaryNodes[colID]) continue; + typename STS::magnitudeType aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj| + typename STS::magnitudeType aij = STS::magnitude(vals[colID] * vals[colID]); // |a_ij|^2 + drop_vec.emplace_back(aij, aiiajj, colID, false); + } + + const size_t n = drop_vec.size(); + + if (classicalAlgo == unscaled_cut) { + std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) { + return a.val > b.val; + }); + + bool drop = false; + for (size_t i = 1; i < n; ++i) { + if (!drop) { + auto const& x = drop_vec[i - 1]; + auto const& y = drop_vec[i]; + auto a = x.val; + auto b = y.val; + if (a > realThreshold * b) { + drop = true; #ifdef HAVE_MUELU_DEBUG - if (distanceLaplacianCutVerbose) { - std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl; - } -#endif + if (distanceLaplacianCutVerbose) { + std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl; } +#endif } - drop_vec[i].drop = drop; } - } else if (classicalAlgo == scaled_cut) { - std::sort( drop_vec.begin(), drop_vec.end() - , [](DropTol const& a, DropTol const& b) { - return a.val/a.diag > b.val/b.diag; - }); - bool drop = false; - // printf("[%d] Scaled Cut: ",(int)row); - // printf("%3d(%4s) ",indices[drop_vec[0].col],"keep"); - for (size_t i=1; i realThreshold*b) { - drop = true; + drop_vec[i].drop = drop; + } + } else if (classicalAlgo == scaled_cut) { + std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) { + return a.val / a.diag > b.val / b.diag; + }); + bool drop = false; + // printf("[%d] Scaled Cut: ",(int)row); + // printf("%3d(%4s) ",indices[drop_vec[0].col],"keep"); + for (size_t i = 1; i < n; ++i) { + if (!drop) { + auto const& x = drop_vec[i - 1]; + auto const& y = drop_vec[i]; + auto a = x.val / x.diag; + auto b = y.val / y.diag; + if (a > realThreshold * b) { + drop = true; #ifdef HAVE_MUELU_DEBUG - if (distanceLaplacianCutVerbose) { - std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl; - } -#endif - } - // printf("%3d(%4s) ",indices[drop_vec[i].col],drop?"drop":"keep"); - + if (distanceLaplacianCutVerbose) { + std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl; } - drop_vec[i].drop = drop; +#endif } - // printf("\n"); - } - std::sort( drop_vec.begin(), drop_vec.end() - , [](DropTol const& a, DropTol const& b) { - return a.col < b.col; - } - ); - - for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) { - LO col = indices[drop_vec[idxID].col]; - // don't drop diagonal - if (row == col) { - columns[realnnz++] = col; - rownnz++; - continue; - } - - if (!drop_vec[idxID].drop) { - columns[realnnz++] = col; - rownnz++; - } else { - numDropped++; + // printf("%3d(%4s) ",indices[drop_vec[i].col],drop?"drop":"keep"); } + drop_vec[i].drop = drop; } - // CMS - rows[row+1] = realnnz; - + // printf("\n"); } - }//end for row - - columns.resize(realnnz); - numTotal = A->getLocalNumEntries(); + std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) { + return a.col < b.col; + }); + + for (LO idxID = 0; idxID < (LO)drop_vec.size(); idxID++) { + LO col = indices[drop_vec[idxID].col]; + // don't drop diagonal + if (row == col) { + columns[realnnz++] = col; + rownnz++; + continue; + } - if (aggregationMayCreateDirichlet) { - // If the only element remaining after filtering is diagonal, mark node as boundary - for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { - if (rows[row+1]- rows[row] <= 1) - boundaryNodes[row] = true; + if (!drop_vec[idxID].drop) { + columns[realnnz++] = col; + rownnz++; + } else { + numDropped++; + } } + // CMS + rows[row + 1] = realnnz; } + } // end for row - RCP graph = rcp(new LWGraph(rows, columns, A->getRowMap(), A->getColMap(), "thresholded graph of A")); - graph->SetBoundaryNodeMap(boundaryNodes); - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < boundaryNodes.size(); ++i) - if (boundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; + columns.resize(realnnz); + numTotal = A->getLocalNumEntries(); + + if (aggregationMayCreateDirichlet) { + // If the only element remaining after filtering is diagonal, mark node as boundary + for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { + if (rows[row + 1] - rows[row] <= 1) + boundaryNodes[row] = true; } - Set(currentLevel, "Graph", graph); - Set(currentLevel, "DofsPerNode", 1); - - // If we're doing signed classical, we might want to block-diagonalize *after* the dropping - if(generateColoringGraph) { - RCP colorGraph; - RCP importer = A->getCrsGraph()->getImporter(); - BlockDiagonalizeGraph(graph,ghostedBlockNumber,colorGraph,importer); - Set(currentLevel, "Coloring Graph",colorGraph); - // #define CMS_DUMP + } + + RCP graph = rcp(new LWGraph(rows, columns, A->getRowMap(), A->getColMap(), "thresholded graph of A")); + graph->SetBoundaryNodeMap(boundaryNodes); + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < boundaryNodes.size(); ++i) + if (boundaryNodes[i]) + numLocalBoundaryNodes++; + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; + } + Set(currentLevel, "Graph", graph); + Set(currentLevel, "DofsPerNode", 1); + + // If we're doing signed classical, we might want to block-diagonalize *after* the dropping + if (generateColoringGraph) { + RCP colorGraph; + RCP importer = A->getCrsGraph()->getImporter(); + BlockDiagonalizeGraph(graph, ghostedBlockNumber, colorGraph, importer); + Set(currentLevel, "Coloring Graph", colorGraph); + // #define CMS_DUMP #ifdef CMS_DUMP - { - Xpetra::IO::Write("m_regular_graph."+std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast(graph)->GetCrsGraph()); - Xpetra::IO::Write("m_color_graph."+std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast(colorGraph)->GetCrsGraph()); - // int rank = graph->GetDomainMap()->getComm()->getRank(); - // { - // std::ofstream ofs(std::string("m_color_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out); - // RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs)); - // colorGraph->print(*fancy,Debug); - // } - // { - // std::ofstream ofs(std::string("m_regular_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out); - // RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs)); - // graph->print(*fancy,Debug); - // } - - } + { + Xpetra::IO::Write("m_regular_graph." + std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast(graph)->GetCrsGraph()); + Xpetra::IO::Write("m_color_graph." + std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast(colorGraph)->GetCrsGraph()); + // int rank = graph->GetDomainMap()->getComm()->getRank(); + // { + // std::ofstream ofs(std::string("m_color_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out); + // RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs)); + // colorGraph->print(*fancy,Debug); + // } + // { + // std::ofstream ofs(std::string("m_regular_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out); + // RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs)); + // graph->print(*fancy,Debug); + // } + } #endif - }//end generateColoringGraph - } else if (BlockSize > 1 && threshold == STS::zero()) { - // Case 3: Multiple DOF/node problem without dropping - const RCP rowMap = A->getRowMap(); - const RCP colMap = A->getColMap(); + } // end generateColoringGraph + } else if (BlockSize > 1 && threshold == STS::zero()) { + // Case 3: Multiple DOF/node problem without dropping + const RCP rowMap = A->getRowMap(); + const RCP colMap = A->getColMap(); - graphType = "amalgamated"; + graphType = "amalgamated"; - // build node row map (uniqueMap) and node column map (nonUniqueMap) - // the arrays rowTranslation and colTranslation contain the local node id - // given a local dof id. The data is calculated by the AmalgamationFactory and - // stored in the variable container "UnAmalgamationInfo" - RCP uniqueMap = amalInfo->getNodeRowMap(); - RCP nonUniqueMap = amalInfo->getNodeColMap(); - Array rowTranslation = *(amalInfo->getRowTranslation()); - Array colTranslation = *(amalInfo->getColTranslation()); - - // get number of local nodes - LO numRows = Teuchos::as(uniqueMap->getLocalNumElements()); - - // Allocate space for the local graph - ArrayRCP rows = ArrayRCP(numRows+1); - ArrayRCP columns = ArrayRCP(A->getLocalNumEntries()); - - const ArrayRCP amalgBoundaryNodes(numRows, false); - - // Detect and record rows that correspond to Dirichlet boundary conditions - // TODO If we use ArrayRCP, then we can record boundary nodes as usual. Size - // TODO the array one bigger than the number of local rows, and the last entry can - // TODO hold the actual number of boundary nodes. Clever, huh? - ArrayRCP pointBoundaryNodes; - pointBoundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - if (rowSumTol > 0.) - Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes); - - - // extract striding information - LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map) - LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map) - LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map - if (A->IsView("stridedMaps") == true) { - Teuchos::RCP myMap = A->getRowMap("stridedMaps"); - Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); - blkSize = Teuchos::as(strMap->getFixedBlockSize()); - blkId = strMap->getStridedBlockId(); - if (blkId > -1) - blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); - } + // build node row map (uniqueMap) and node column map (nonUniqueMap) + // the arrays rowTranslation and colTranslation contain the local node id + // given a local dof id. The data is calculated by the AmalgamationFactory and + // stored in the variable container "UnAmalgamationInfo" + RCP uniqueMap = amalInfo->getNodeRowMap(); + RCP nonUniqueMap = amalInfo->getNodeColMap(); + Array rowTranslation = *(amalInfo->getRowTranslation()); + Array colTranslation = *(amalInfo->getColTranslation()); - // loop over all local nodes - LO realnnz = 0; - rows[0] = 0; - Array indicesExtra; - for (LO row = 0; row < numRows; row++) { - ArrayView indices; - indicesExtra.resize(0); + // get number of local nodes + LO numRows = Teuchos::as(uniqueMap->getLocalNumElements()); - // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet - // Note, that pointBoundaryNodes lives on the dofmap (and not the node map). - // Therefore, looping over all dofs is fine here. We use blkPartSize as we work - // with local ids. - // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet) - // node. - bool isBoundary = false; - if (pL.get("aggregation: greedy Dirichlet") == true) { - for (LO j = 0; j < blkPartSize; j++) { - if (pointBoundaryNodes[row*blkPartSize+j]) { - isBoundary = true; - break; - } + // Allocate space for the local graph + ArrayRCP rows = ArrayRCP(numRows + 1); + ArrayRCP columns = ArrayRCP(A->getLocalNumEntries()); + + const ArrayRCP amalgBoundaryNodes(numRows, false); + + // Detect and record rows that correspond to Dirichlet boundary conditions + // TODO If we use ArrayRCP, then we can record boundary nodes as usual. Size + // TODO the array one bigger than the number of local rows, and the last entry can + // TODO hold the actual number of boundary nodes. Clever, huh? + ArrayRCP pointBoundaryNodes; + pointBoundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); + if (rowSumTol > 0.) + Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes); + + // extract striding information + LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map) + LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map) + LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map + if (A->IsView("stridedMaps") == true) { + Teuchos::RCP myMap = A->getRowMap("stridedMaps"); + Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); + blkSize = Teuchos::as(strMap->getFixedBlockSize()); + blkId = strMap->getStridedBlockId(); + if (blkId > -1) + blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); + } + + // loop over all local nodes + LO realnnz = 0; + rows[0] = 0; + Array indicesExtra; + for (LO row = 0; row < numRows; row++) { + ArrayView indices; + indicesExtra.resize(0); + + // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet + // Note, that pointBoundaryNodes lives on the dofmap (and not the node map). + // Therefore, looping over all dofs is fine here. We use blkPartSize as we work + // with local ids. + // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet) + // node. + bool isBoundary = false; + if (pL.get("aggregation: greedy Dirichlet") == true) { + for (LO j = 0; j < blkPartSize; j++) { + if (pointBoundaryNodes[row * blkPartSize + j]) { + isBoundary = true; + break; } - } else { - isBoundary = true; - for (LO j = 0; j < blkPartSize; j++) { - if (!pointBoundaryNodes[row*blkPartSize+j]) { - isBoundary = false; - break; - } + } + } else { + isBoundary = true; + for (LO j = 0; j < blkPartSize; j++) { + if (!pointBoundaryNodes[row * blkPartSize + j]) { + isBoundary = false; + break; } } + } - // Merge rows of A - // The array indicesExtra contains local column node ids for the current local node "row" - if (!isBoundary) - MergeRows(*A, row, indicesExtra, colTranslation); - else - indicesExtra.push_back(row); - indices = indicesExtra; - numTotal += indices.size(); + // Merge rows of A + // The array indicesExtra contains local column node ids for the current local node "row" + if (!isBoundary) + MergeRows(*A, row, indicesExtra, colTranslation); + else + indicesExtra.push_back(row); + indices = indicesExtra; + numTotal += indices.size(); + + // add the local column node ids to the full columns array which + // contains the local column node ids for all local node rows + LO nnz = indices.size(), rownnz = 0; + for (LO colID = 0; colID < nnz; colID++) { + LO col = indices[colID]; + columns[realnnz++] = col; + rownnz++; + } - // add the local column node ids to the full columns array which - // contains the local column node ids for all local node rows - LO nnz = indices.size(), rownnz = 0; - for (LO colID = 0; colID < nnz; colID++) { - LO col = indices[colID]; - columns[realnnz++] = col; - rownnz++; - } + if (rownnz == 1) { + // If the only element remaining after filtering is diagonal, mark node as boundary + // FIXME: this should really be replaced by the following + // if (indices.size() == 1 && indices[0] == row) + // boundaryNodes[row] = true; + // We do not do it this way now because there is no framework for distinguishing isolated + // and boundary nodes in the aggregation algorithms + amalgBoundaryNodes[row] = true; + } + rows[row + 1] = realnnz; + } // for (LO row = 0; row < numRows; row++) + columns.resize(realnnz); - if (rownnz == 1) { - // If the only element remaining after filtering is diagonal, mark node as boundary - // FIXME: this should really be replaced by the following - // if (indices.size() == 1 && indices[0] == row) - // boundaryNodes[row] = true; - // We do not do it this way now because there is no framework for distinguishing isolated - // and boundary nodes in the aggregation algorithms - amalgBoundaryNodes[row] = true; - } - rows[row+1] = realnnz; - } //for (LO row = 0; row < numRows; row++) - columns.resize(realnnz); + RCP graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A")); + graph->SetBoundaryNodeMap(amalgBoundaryNodes); - RCP graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A")); - graph->SetBoundaryNodeMap(amalgBoundaryNodes); + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) + if (amalgBoundaryNodes[i]) + numLocalBoundaryNodes++; - for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) - if (amalgBoundaryNodes[i]) - numLocalBoundaryNodes++; + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes + << " agglomerated Dirichlet nodes" << std::endl; + } - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes - << " agglomerated Dirichlet nodes" << std::endl; - } + Set(currentLevel, "Graph", graph); + Set(currentLevel, "DofsPerNode", blkSize); // full block size - Set(currentLevel, "Graph", graph); - Set(currentLevel, "DofsPerNode", blkSize); // full block size + } else if (BlockSize > 1 && threshold != STS::zero()) { + // Case 4: Multiple DOF/node problem with dropping + const RCP rowMap = A->getRowMap(); + const RCP colMap = A->getColMap(); + graphType = "amalgamated"; - } else if (BlockSize > 1 && threshold != STS::zero()) { - // Case 4: Multiple DOF/node problem with dropping - const RCP rowMap = A->getRowMap(); - const RCP colMap = A->getColMap(); - graphType = "amalgamated"; + // build node row map (uniqueMap) and node column map (nonUniqueMap) + // the arrays rowTranslation and colTranslation contain the local node id + // given a local dof id. The data is calculated by the AmalgamationFactory and + // stored in the variable container "UnAmalgamationInfo" + RCP uniqueMap = amalInfo->getNodeRowMap(); + RCP nonUniqueMap = amalInfo->getNodeColMap(); + Array rowTranslation = *(amalInfo->getRowTranslation()); + Array colTranslation = *(amalInfo->getColTranslation()); - // build node row map (uniqueMap) and node column map (nonUniqueMap) - // the arrays rowTranslation and colTranslation contain the local node id - // given a local dof id. The data is calculated by the AmalgamationFactory and - // stored in the variable container "UnAmalgamationInfo" - RCP uniqueMap = amalInfo->getNodeRowMap(); - RCP nonUniqueMap = amalInfo->getNodeColMap(); - Array rowTranslation = *(amalInfo->getRowTranslation()); - Array colTranslation = *(amalInfo->getColTranslation()); - - // get number of local nodes - LO numRows = Teuchos::as(uniqueMap->getLocalNumElements()); - - // Allocate space for the local graph - ArrayRCP rows = ArrayRCP(numRows+1); - ArrayRCP columns = ArrayRCP(A->getLocalNumEntries()); - - const ArrayRCP amalgBoundaryNodes(numRows, false); - - // Detect and record rows that correspond to Dirichlet boundary conditions - // TODO If we use ArrayRCP, then we can record boundary nodes as usual. Size - // TODO the array one bigger than the number of local rows, and the last entry can - // TODO hold the actual number of boundary nodes. Clever, huh? - ArrayRCP pointBoundaryNodes; - pointBoundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - if (rowSumTol > 0.) - Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes); - - - // extract striding information - LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map) - LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map) - LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map - if (A->IsView("stridedMaps") == true) { - Teuchos::RCP myMap = A->getRowMap("stridedMaps"); - Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); - blkSize = Teuchos::as(strMap->getFixedBlockSize()); - blkId = strMap->getStridedBlockId(); - if (blkId > -1) - blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); - } + // get number of local nodes + LO numRows = Teuchos::as(uniqueMap->getLocalNumElements()); - // extract diagonal data for dropping strategy - RCP ghostedDiag = MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); - const ArrayRCP ghostedDiagVals = ghostedDiag->getData(0); + // Allocate space for the local graph + ArrayRCP rows = ArrayRCP(numRows + 1); + ArrayRCP columns = ArrayRCP(A->getLocalNumEntries()); - // loop over all local nodes - LO realnnz = 0; - rows[0] = 0; - Array indicesExtra; - for (LO row = 0; row < numRows; row++) { - ArrayView indices; - indicesExtra.resize(0); + const ArrayRCP amalgBoundaryNodes(numRows, false); - // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet - // Note, that pointBoundaryNodes lives on the dofmap (and not the node map). - // Therefore, looping over all dofs is fine here. We use blkPartSize as we work - // with local ids. - // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet) - // node. - bool isBoundary = false; - if (pL.get("aggregation: greedy Dirichlet") == true) { - for (LO j = 0; j < blkPartSize; j++) { - if (pointBoundaryNodes[row*blkPartSize+j]) { - isBoundary = true; - break; - } - } - } else { - isBoundary = true; - for (LO j = 0; j < blkPartSize; j++) { - if (!pointBoundaryNodes[row*blkPartSize+j]) { - isBoundary = false; - break; - } - } - } + // Detect and record rows that correspond to Dirichlet boundary conditions + // TODO If we use ArrayRCP, then we can record boundary nodes as usual. Size + // TODO the array one bigger than the number of local rows, and the last entry can + // TODO hold the actual number of boundary nodes. Clever, huh? + ArrayRCP pointBoundaryNodes; + pointBoundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); + if (rowSumTol > 0.) + Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes); - // Merge rows of A - // The array indicesExtra contains local column node ids for the current local node "row" - if (!isBoundary) - MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation); - else - indicesExtra.push_back(row); - indices = indicesExtra; - numTotal += indices.size(); + // extract striding information + LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map) + LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map) + LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map + if (A->IsView("stridedMaps") == true) { + Teuchos::RCP myMap = A->getRowMap("stridedMaps"); + Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); + blkSize = Teuchos::as(strMap->getFixedBlockSize()); + blkId = strMap->getStridedBlockId(); + if (blkId > -1) + blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); + } - // add the local column node ids to the full columns array which - // contains the local column node ids for all local node rows - LO nnz = indices.size(), rownnz = 0; - for (LO colID = 0; colID < nnz; colID++) { - LO col = indices[colID]; - columns[realnnz++] = col; - rownnz++; - } + // extract diagonal data for dropping strategy + RCP ghostedDiag = MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); + const ArrayRCP ghostedDiagVals = ghostedDiag->getData(0); - if (rownnz == 1) { - // If the only element remaining after filtering is diagonal, mark node as boundary - // FIXME: this should really be replaced by the following - // if (indices.size() == 1 && indices[0] == row) - // boundaryNodes[row] = true; - // We do not do it this way now because there is no framework for distinguishing isolated - // and boundary nodes in the aggregation algorithms - amalgBoundaryNodes[row] = true; + // loop over all local nodes + LO realnnz = 0; + rows[0] = 0; + Array indicesExtra; + for (LO row = 0; row < numRows; row++) { + ArrayView indices; + indicesExtra.resize(0); + + // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet + // Note, that pointBoundaryNodes lives on the dofmap (and not the node map). + // Therefore, looping over all dofs is fine here. We use blkPartSize as we work + // with local ids. + // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet) + // node. + bool isBoundary = false; + if (pL.get("aggregation: greedy Dirichlet") == true) { + for (LO j = 0; j < blkPartSize; j++) { + if (pointBoundaryNodes[row * blkPartSize + j]) { + isBoundary = true; + break; + } } - rows[row+1] = realnnz; - } //for (LO row = 0; row < numRows; row++) - columns.resize(realnnz); + } else { + isBoundary = true; + for (LO j = 0; j < blkPartSize; j++) { + if (!pointBoundaryNodes[row * blkPartSize + j]) { + isBoundary = false; + break; + } + } + } - RCP graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A")); - graph->SetBoundaryNodeMap(amalgBoundaryNodes); + // Merge rows of A + // The array indicesExtra contains local column node ids for the current local node "row" + if (!isBoundary) + MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation); + else + indicesExtra.push_back(row); + indices = indicesExtra; + numTotal += indices.size(); + + // add the local column node ids to the full columns array which + // contains the local column node ids for all local node rows + LO nnz = indices.size(), rownnz = 0; + for (LO colID = 0; colID < nnz; colID++) { + LO col = indices[colID]; + columns[realnnz++] = col; + rownnz++; + } + + if (rownnz == 1) { + // If the only element remaining after filtering is diagonal, mark node as boundary + // FIXME: this should really be replaced by the following + // if (indices.size() == 1 && indices[0] == row) + // boundaryNodes[row] = true; + // We do not do it this way now because there is no framework for distinguishing isolated + // and boundary nodes in the aggregation algorithms + amalgBoundaryNodes[row] = true; + } + rows[row + 1] = realnnz; + } // for (LO row = 0; row < numRows; row++) + columns.resize(realnnz); - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; + RCP graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A")); + graph->SetBoundaryNodeMap(amalgBoundaryNodes); - for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) - if (amalgBoundaryNodes[i]) - numLocalBoundaryNodes++; + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes - << " agglomerated Dirichlet nodes" << std::endl; - } + for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) + if (amalgBoundaryNodes[i]) + numLocalBoundaryNodes++; - Set(currentLevel, "Graph", graph); - Set(currentLevel, "DofsPerNode", blkSize); // full block size + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes + << " agglomerated Dirichlet nodes" << std::endl; } - } else if (algo == "distance laplacian") { - LO blkSize = A->GetFixedBlockSize(); - GO indexBase = A->getRowMap()->getIndexBase(); - // [*0*] : FIXME - // ap: somehow, if I move this line to [*1*], Belos throws an error - // I'm not sure what's going on. Do we always have to Get data, if we did - // DeclareInput for it? - // RCP Coords = Get< RCP >(currentLevel, "Coordinates"); + Set(currentLevel, "Graph", graph); + Set(currentLevel, "DofsPerNode", blkSize); // full block size + } - // Detect and record rows that correspond to Dirichlet boundary conditions - // TODO If we use ArrayRCP, then we can record boundary nodes as usual. Size - // TODO the array one bigger than the number of local rows, and the last entry can - // TODO hold the actual number of boundary nodes. Clever, huh? - ArrayRCP pointBoundaryNodes; - pointBoundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - if (rowSumTol > 0.) - Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes); + } else if (algo == "distance laplacian") { + LO blkSize = A->GetFixedBlockSize(); + GO indexBase = A->getRowMap()->getIndexBase(); + // [*0*] : FIXME + // ap: somehow, if I move this line to [*1*], Belos throws an error + // I'm not sure what's going on. Do we always have to Get data, if we did + // DeclareInput for it? + // RCP Coords = Get< RCP >(currentLevel, "Coordinates"); - if ( (blkSize == 1) && (threshold == STS::zero()) ) { - // Trivial case: scalar problem, no dropping. Can return original graph - RCP graph = rcp(new Graph(A->getCrsGraph(), "graph of A")); - graph->SetBoundaryNodeMap(pointBoundaryNodes); - graphType="unamalgamated"; - numTotal = A->getLocalNumEntries(); - - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < pointBoundaryNodes.size(); ++i) - if (pointBoundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; - } + // Detect and record rows that correspond to Dirichlet boundary conditions + // TODO If we use ArrayRCP, then we can record boundary nodes as usual. Size + // TODO the array one bigger than the number of local rows, and the last entry can + // TODO hold the actual number of boundary nodes. Clever, huh? + ArrayRCP pointBoundaryNodes; + pointBoundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); + if (rowSumTol > 0.) + Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes); + + if ((blkSize == 1) && (threshold == STS::zero())) { + // Trivial case: scalar problem, no dropping. Can return original graph + RCP graph = rcp(new Graph(A->getCrsGraph(), "graph of A")); + graph->SetBoundaryNodeMap(pointBoundaryNodes); + graphType = "unamalgamated"; + numTotal = A->getLocalNumEntries(); + + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < pointBoundaryNodes.size(); ++i) + if (pointBoundaryNodes[i]) + numLocalBoundaryNodes++; + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; + } - Set(currentLevel, "DofsPerNode", blkSize); - Set(currentLevel, "Graph", graph); + Set(currentLevel, "DofsPerNode", blkSize); + Set(currentLevel, "Graph", graph); + + } else { + // ap: We make quite a few assumptions here; general case may be a lot different, + // but much much harder to implement. We assume that: + // 1) all maps are standard maps, not strided maps + // 2) global indices of dofs in A are related to dofs in coordinates in a simple arithmetic + // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) correspond to node i + // + // NOTE: Potentially, some of the code below could be simplified with UnAmalgamationInfo, + // but as I totally don't understand that code, here is my solution + + // [*1*]: see [*0*] + + // Check that the number of local coordinates is consistent with the #rows in A + TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() / blkSize != Coords->getLocalLength(), Exceptions::Incompatible, + "Coordinate vector length (" << Coords->getLocalLength() << ") is incompatible with number of rows in A (" << A->getRowMap()->getLocalNumElements() << ") by modulo block size (" << blkSize << ")."); + + const RCP colMap = A->getColMap(); + RCP uniqueMap, nonUniqueMap; + Array colTranslation; + if (blkSize == 1) { + uniqueMap = A->getRowMap(); + nonUniqueMap = A->getColMap(); + graphType = "unamalgamated"; } else { - // ap: We make quite a few assumptions here; general case may be a lot different, - // but much much harder to implement. We assume that: - // 1) all maps are standard maps, not strided maps - // 2) global indices of dofs in A are related to dofs in coordinates in a simple arithmetic - // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) correspond to node i - // - // NOTE: Potentially, some of the code below could be simplified with UnAmalgamationInfo, - // but as I totally don't understand that code, here is my solution - - // [*1*]: see [*0*] - - // Check that the number of local coordinates is consistent with the #rows in A - TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements()/blkSize != Coords->getLocalLength(), Exceptions::Incompatible, - "Coordinate vector length (" << Coords->getLocalLength() << ") is incompatible with number of rows in A (" << A->getRowMap()->getLocalNumElements() << ") by modulo block size ("<< blkSize <<")."); - - const RCP colMap = A->getColMap(); - RCP uniqueMap, nonUniqueMap; - Array colTranslation; - if (blkSize == 1) { - uniqueMap = A->getRowMap(); - nonUniqueMap = A->getColMap(); - graphType="unamalgamated"; - - } else { - uniqueMap = Coords->getMap(); - TEUCHOS_TEST_FOR_EXCEPTION(uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible, - "Different index bases for matrix and coordinates"); - - AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, colTranslation); + uniqueMap = Coords->getMap(); + TEUCHOS_TEST_FOR_EXCEPTION(uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible, + "Different index bases for matrix and coordinates"); - graphType = "amalgamated"; - } - LO numRows = Teuchos::as(uniqueMap->getLocalNumElements()); + AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, colTranslation); - RCP ghostedCoords; - RCP ghostedLaplDiag; - Teuchos::ArrayRCP ghostedLaplDiagData; - if (threshold != STS::zero()) { - // Get ghost coordinates - RCP importer; - { - SubFactoryMonitor m1(*this, "Import construction", currentLevel); - if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) { - GetOStream(Warnings1) << "Using existing importer from matrix graph" << std::endl; - importer = realA->getCrsGraph()->getImporter(); - } else { - GetOStream(Warnings0) << "Constructing new importer instance" << std::endl; - importer = ImportFactory::Build(uniqueMap, nonUniqueMap); - } - } //subtimer - ghostedCoords = Xpetra::MultiVectorFactory::Build(nonUniqueMap, Coords->getNumVectors()); - { + graphType = "amalgamated"; + } + LO numRows = Teuchos::as(uniqueMap->getLocalNumElements()); + + RCP ghostedCoords; + RCP ghostedLaplDiag; + Teuchos::ArrayRCP ghostedLaplDiagData; + if (threshold != STS::zero()) { + // Get ghost coordinates + RCP importer; + { + SubFactoryMonitor m1(*this, "Import construction", currentLevel); + if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) { + GetOStream(Warnings1) << "Using existing importer from matrix graph" << std::endl; + importer = realA->getCrsGraph()->getImporter(); + } else { + GetOStream(Warnings0) << "Constructing new importer instance" << std::endl; + importer = ImportFactory::Build(uniqueMap, nonUniqueMap); + } + } // subtimer + ghostedCoords = Xpetra::MultiVectorFactory::Build(nonUniqueMap, Coords->getNumVectors()); + { SubFactoryMonitor m1(*this, "Coordinate import", currentLevel); ghostedCoords->doImport(*Coords, *importer, Xpetra::INSERT); - } //subtimer + } // subtimer - // Construct Distance Laplacian diagonal - RCP localLaplDiag = VectorFactory::Build(uniqueMap); - Array indicesExtra; - Teuchos::Array> coordData; - if (threshold != STS::zero()) { - const size_t numVectors = ghostedCoords->getNumVectors(); - coordData.reserve(numVectors); - for (size_t j = 0; j < numVectors; j++) { - Teuchos::ArrayRCP tmpData=ghostedCoords->getData(j); - coordData.push_back(tmpData); - } + // Construct Distance Laplacian diagonal + RCP localLaplDiag = VectorFactory::Build(uniqueMap); + Array indicesExtra; + Teuchos::Array> coordData; + if (threshold != STS::zero()) { + const size_t numVectors = ghostedCoords->getNumVectors(); + coordData.reserve(numVectors); + for (size_t j = 0; j < numVectors; j++) { + Teuchos::ArrayRCP tmpData = ghostedCoords->getData(j); + coordData.push_back(tmpData); } - { + } + { SubFactoryMonitor m1(*this, "Laplacian local diagonal", currentLevel); ArrayRCP localLaplDiagData = localLaplDiag->getDataNonConst(0); for (LO row = 0; row < numRows; row++) { @@ -1144,26 +1119,24 @@ namespace MueLu { indices = indicesExtra; } - LO nnz = indices.size(); + LO nnz = indices.size(); bool haveAddedToDiag = false; for (LO colID = 0; colID < nnz; colID++) { const LO col = indices[colID]; if (row != col) { - if(use_dlap_weights == SINGLE_WEIGHTS) { + if (use_dlap_weights == SINGLE_WEIGHTS) { /*printf("[%d,%d] Unweighted Distance = %6.4e Weighted Distance = %6.4e\n",row,col, MueLu::Utilities::Distance2(coordData, row, col), MueLu::Utilities::Distance2(dlap_weights(),coordData, row, col));*/ - localLaplDiagData[row] += STS::one() / MueLu::Utilities::Distance2(dlap_weights(),coordData, row, col); - } - else if(use_dlap_weights == BLOCK_WEIGHTS) { - int block_id = row % interleaved_blocksize; + localLaplDiagData[row] += STS::one() / MueLu::Utilities::Distance2(dlap_weights(), coordData, row, col); + } else if (use_dlap_weights == BLOCK_WEIGHTS) { + int block_id = row % interleaved_blocksize; int block_start = block_id * interleaved_blocksize; - localLaplDiagData[row] += STS::one() / MueLu::Utilities::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col); - } - else { + localLaplDiagData[row] += STS::one() / MueLu::Utilities::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col); + } else { // printf("[%d,%d] Unweighted Distance = %6.4e\n",row,col,MueLu::Utilities::Distance2(coordData, row, col)); - localLaplDiagData[row] += STS::one() / MueLu::Utilities::Distance2(coordData, row, col); + localLaplDiagData[row] += STS::one() / MueLu::Utilities::Distance2(coordData, row, col); } haveAddedToDiag = true; } @@ -1173,67 +1146,67 @@ namespace MueLu { if (!haveAddedToDiag) localLaplDiagData[row] = STS::rmax(); } - } //subtimer - { + } // subtimer + { SubFactoryMonitor m1(*this, "Laplacian distributed diagonal", currentLevel); ghostedLaplDiag = VectorFactory::Build(nonUniqueMap); ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT); ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0); - } //subtimer + } // subtimer - } else { - GetOStream(Runtime0) << "Skipping distance laplacian construction due to 0 threshold" << std::endl; - } + } else { + GetOStream(Runtime0) << "Skipping distance laplacian construction due to 0 threshold" << std::endl; + } - // NOTE: ghostedLaplDiagData might be zero if we don't actually calculate the laplacian + // NOTE: ghostedLaplDiagData might be zero if we don't actually calculate the laplacian - // allocate space for the local graph - ArrayRCP rows = ArrayRCP(numRows+1); - ArrayRCP columns = ArrayRCP(A->getLocalNumEntries()); + // allocate space for the local graph + ArrayRCP rows = ArrayRCP(numRows + 1); + ArrayRCP columns = ArrayRCP(A->getLocalNumEntries()); #ifdef HAVE_MUELU_DEBUG - // DEBUGGING - for(LO i=0; i<(LO)columns.size(); i++) columns[i]=-666; + // DEBUGGING + for (LO i = 0; i < (LO)columns.size(); i++) columns[i] = -666; #endif - // Extra array for if we're allowing symmetrization with cutting - ArrayRCP rows_stop; - bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo == scaled_cut_symmetric; - if(use_stop_array) - rows_stop.resize(numRows); - - const ArrayRCP amalgBoundaryNodes(numRows, false); + // Extra array for if we're allowing symmetrization with cutting + ArrayRCP rows_stop; + bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo == scaled_cut_symmetric; + if (use_stop_array) + rows_stop.resize(numRows); - LO realnnz = 0; - rows[0] = 0; + const ArrayRCP amalgBoundaryNodes(numRows, false); - Array indicesExtra; - { + LO realnnz = 0; + rows[0] = 0; + + Array indicesExtra; + { SubFactoryMonitor m1(*this, "Laplacian dropping", currentLevel); Teuchos::Array> coordData; if (threshold != STS::zero()) { const size_t numVectors = ghostedCoords->getNumVectors(); coordData.reserve(numVectors); for (size_t j = 0; j < numVectors; j++) { - Teuchos::ArrayRCP tmpData=ghostedCoords->getData(j); + Teuchos::ArrayRCP tmpData = ghostedCoords->getData(j); coordData.push_back(tmpData); } } - ArrayView vals;//CMS hackery + ArrayView vals; // CMS hackery for (LO row = 0; row < numRows; row++) { ArrayView indices; indicesExtra.resize(0); - bool isBoundary = false; + bool isBoundary = false; if (blkSize == 1) { - // ArrayView vals;//CMS uncomment + // ArrayView vals;//CMS uncomment A->getLocalRowView(row, indices, vals); - isBoundary = pointBoundaryNodes[row]; + isBoundary = pointBoundaryNodes[row]; } else { // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet for (LO j = 0; j < blkSize; j++) { - if (!pointBoundaryNodes[row*blkSize+j]) { + if (!pointBoundaryNodes[row * blkSize + j]) { isBoundary = false; break; } @@ -1250,17 +1223,16 @@ namespace MueLu { LO nnz = indices.size(), rownnz = 0; - if(use_stop_array) { - rows[row+1] = rows[row]+nnz; - realnnz = rows[row]; - } + if (use_stop_array) { + rows[row + 1] = rows[row] + nnz; + realnnz = rows[row]; + } if (threshold != STS::zero()) { // default if (distanceLaplacianAlgo == defaultAlgo) { - /* Standard Distance Laplacian */ + /* Standard Distance Laplacian */ for (LO colID = 0; colID < nnz; colID++) { - LO col = indices[colID]; if (row == col) { @@ -1269,23 +1241,21 @@ namespace MueLu { continue; } - // We do not want the distance Laplacian aggregating boundary nodes - if(isBoundary) continue; + // We do not want the distance Laplacian aggregating boundary nodes + if (isBoundary) continue; SC laplVal; - if(use_dlap_weights == SINGLE_WEIGHTS) { - laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(),coordData, row, col); - } - else if(use_dlap_weights == BLOCK_WEIGHTS) { - int block_id = row % interleaved_blocksize; + if (use_dlap_weights == SINGLE_WEIGHTS) { + laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(), coordData, row, col); + } else if (use_dlap_weights == BLOCK_WEIGHTS) { + int block_id = row % interleaved_blocksize; int block_start = block_id * interleaved_blocksize; - laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col); - } - else { - laplVal = STS::one() / MueLu::Utilities::Distance2(coordData, row, col); + laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col); + } else { + laplVal = STS::one() / MueLu::Utilities::Distance2(coordData, row, col); } - real_type aiiajj = STS::magnitude(realThreshold*realThreshold * ghostedLaplDiagData[row]*ghostedLaplDiagData[col]); - real_type aij = STS::magnitude(laplVal*laplVal); + real_type aiiajj = STS::magnitude(realThreshold * realThreshold * ghostedLaplDiagData[row] * ghostedLaplDiagData[col]); + real_type aij = STS::magnitude(laplVal * laplVal); if (aij > aiiajj) { columns[realnnz++] = col; @@ -1295,8 +1265,8 @@ namespace MueLu { } } } else { - /* Cut Algorithm */ - using DropTol = Details::DropTol; + /* Cut Algorithm */ + using DropTol = Details::DropTol; std::vector drop_vec; drop_vec.reserve(nnz); const real_type zero = Teuchos::ScalarTraits::zero(); @@ -1304,31 +1274,28 @@ namespace MueLu { // find magnitudes for (LO colID = 0; colID < nnz; colID++) { - LO col = indices[colID]; if (row == col) { - drop_vec.emplace_back( zero, one, colID, false); + drop_vec.emplace_back(zero, one, colID, false); continue; } - // We do not want the distance Laplacian aggregating boundary nodes - if(isBoundary) continue; + // We do not want the distance Laplacian aggregating boundary nodes + if (isBoundary) continue; SC laplVal; - if(use_dlap_weights == SINGLE_WEIGHTS) { - laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(),coordData, row, col); - } - else if(use_dlap_weights == BLOCK_WEIGHTS) { - int block_id = row % interleaved_blocksize; + if (use_dlap_weights == SINGLE_WEIGHTS) { + laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(), coordData, row, col); + } else if (use_dlap_weights == BLOCK_WEIGHTS) { + int block_id = row % interleaved_blocksize; int block_start = block_id * interleaved_blocksize; - laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col); - } - else { - laplVal = STS::one() / MueLu::Utilities::Distance2(coordData, row, col); + laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col); + } else { + laplVal = STS::one() / MueLu::Utilities::Distance2(coordData, row, col); } - real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row]*ghostedLaplDiagData[col]); - real_type aij = STS::magnitude(laplVal*laplVal); + real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row] * ghostedLaplDiagData[col]); + real_type aij = STS::magnitude(laplVal * laplVal); drop_vec.emplace_back(aij, aiiajj, colID, false); } @@ -1336,53 +1303,46 @@ namespace MueLu { const size_t n = drop_vec.size(); if (distanceLaplacianAlgo == unscaled_cut) { - - std::sort( drop_vec.begin(), drop_vec.end() - , [](DropTol const& a, DropTol const& b) { - return a.val > b.val; - } - ); + std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) { + return a.val > b.val; + }); bool drop = false; - for (size_t i=1; i realThreshold*b) { + auto a = x.val; + auto b = y.val; + if (a > realThreshold * b) { drop = true; #ifdef HAVE_MUELU_DEBUG if (distanceLaplacianCutVerbose) { - std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl; + std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl; } #endif } } drop_vec[i].drop = drop; } - } - else if (distanceLaplacianAlgo == scaled_cut || distanceLaplacianAlgo == scaled_cut_symmetric) { - - std::sort( drop_vec.begin(), drop_vec.end() - , [](DropTol const& a, DropTol const& b) { - return a.val/a.diag > b.val/b.diag; - } - ); + } else if (distanceLaplacianAlgo == scaled_cut || distanceLaplacianAlgo == scaled_cut_symmetric) { + std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) { + return a.val / a.diag > b.val / b.diag; + }); bool drop = false; - for (size_t i=1; i realThreshold*b) { + auto a = x.val / x.diag; + auto b = y.val / y.diag; + if (a > realThreshold * b) { drop = true; #ifdef HAVE_MUELU_DEBUG if (distanceLaplacianCutVerbose) { - std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl; - } + std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl; + } #endif } } @@ -1390,30 +1350,27 @@ namespace MueLu { } } - std::sort( drop_vec.begin(), drop_vec.end() - , [](DropTol const& a, DropTol const& b) { - return a.col < b.col; - } - ); + std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) { + return a.col < b.col; + }); - for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) { + for (LO idxID = 0; idxID < (LO)drop_vec.size(); idxID++) { LO col = indices[drop_vec[idxID].col]; - // don't drop diagonal if (row == col) { columns[realnnz++] = col; rownnz++; - // printf("(%d,%d) KEEP %13s matrix = %6.4e\n",row,row,"DIAGONAL",drop_vec[idxID].aux_val); + // printf("(%d,%d) KEEP %13s matrix = %6.4e\n",row,row,"DIAGONAL",drop_vec[idxID].aux_val); continue; } if (!drop_vec[idxID].drop) { columns[realnnz++] = col; - // printf("(%d,%d) KEEP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val); + // printf("(%d,%d) KEEP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val); rownnz++; } else { - // printf("(%d,%d) DROP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val); + // printf("(%d,%d) DROP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val); numDropped++; } } @@ -1421,13 +1378,13 @@ namespace MueLu { } else { // Skip laplace calculation and threshold comparison for zero threshold for (LO colID = 0; colID < nnz; colID++) { - LO col = indices[colID]; + LO col = indices[colID]; columns[realnnz++] = col; rownnz++; } } - if ( rownnz == 1) { + if (rownnz == 1) { // If the only element remaining after filtering is diagonal, mark node as boundary // FIXME: this should really be replaced by the following // if (indices.size() == 1 && indices[0] == row) @@ -1437,584 +1394,575 @@ namespace MueLu { amalgBoundaryNodes[row] = true; } - if(use_stop_array) - rows_stop[row] = rownnz + rows[row]; - else - rows[row+1] = realnnz; - } //for (LO row = 0; row < numRows; row++) - - } //subtimer - - if (use_stop_array) { - // Do symmetrization of the cut matrix - // NOTE: We assume nested row/column maps here - for (LO row = 0; row < numRows; row++) { - for (LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) { - LO col = columns[colidx]; - if(col >= numRows) continue; - - bool found = false; - for(LO t_col = rows[col] ; !found && t_col < rows_stop[col]; t_col++) { - if (columns[t_col] == row) - found = true; - } - // We didn't find the transpose buddy, so let's symmetrize, unless we'd be symmetrizing - // into a Dirichlet unknown. In that case don't. - if(!found && !pointBoundaryNodes[col] && rows_stop[col] < rows[col+1]) { - LO new_idx = rows_stop[col]; - // printf("(%d,%d) SYMADD entry\n",col,row); - columns[new_idx] = row; - rows_stop[col]++; - numDropped--; - } - } - } - - // Condense everything down - LO current_start=0; - for (LO row = 0; row < numRows; row++) { - LO old_start = current_start; - for (LO col = rows[row]; col < rows_stop[row]; col++) { - if(current_start != col) { - columns[current_start] = columns[col]; - } - current_start++; - } - rows[row] = old_start; - } - rows[numRows] = realnnz = current_start; - - } - - columns.resize(realnnz); - - RCP graph; - { + if (use_stop_array) + rows_stop[row] = rownnz + rows[row]; + else + rows[row + 1] = realnnz; + } // for (LO row = 0; row < numRows; row++) + + } // subtimer + + if (use_stop_array) { + // Do symmetrization of the cut matrix + // NOTE: We assume nested row/column maps here + for (LO row = 0; row < numRows; row++) { + for (LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) { + LO col = columns[colidx]; + if (col >= numRows) continue; + + bool found = false; + for (LO t_col = rows[col]; !found && t_col < rows_stop[col]; t_col++) { + if (columns[t_col] == row) + found = true; + } + // We didn't find the transpose buddy, so let's symmetrize, unless we'd be symmetrizing + // into a Dirichlet unknown. In that case don't. + if (!found && !pointBoundaryNodes[col] && rows_stop[col] < rows[col + 1]) { + LO new_idx = rows_stop[col]; + // printf("(%d,%d) SYMADD entry\n",col,row); + columns[new_idx] = row; + rows_stop[col]++; + numDropped--; + } + } + } + + // Condense everything down + LO current_start = 0; + for (LO row = 0; row < numRows; row++) { + LO old_start = current_start; + for (LO col = rows[row]; col < rows_stop[row]; col++) { + if (current_start != col) { + columns[current_start] = columns[col]; + } + current_start++; + } + rows[row] = old_start; + } + rows[numRows] = realnnz = current_start; + } + + columns.resize(realnnz); + + RCP graph; + { SubFactoryMonitor m1(*this, "Build amalgamated graph", currentLevel); graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A")); graph->SetBoundaryNodeMap(amalgBoundaryNodes); - } //subtimer + } // subtimer - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) - if (amalgBoundaryNodes[i]) - numLocalBoundaryNodes++; + for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) + if (amalgBoundaryNodes[i]) + numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " agglomerated Dirichlet nodes" - << " using threshold " << dirichletThreshold << std::endl; - } + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " agglomerated Dirichlet nodes" + << " using threshold " << dirichletThreshold << std::endl; + } + + Set(currentLevel, "Graph", graph); + Set(currentLevel, "DofsPerNode", blkSize); + } + } + + if ((GetVerbLevel() & Statistics1) && !(A->GetFixedBlockSize() > 1 && threshold != STS::zero())) { + RCP> comm = A->getRowMap()->getComm(); + GO numGlobalTotal, numGlobalDropped; + MueLu_sumAll(comm, numTotal, numGlobalTotal); + MueLu_sumAll(comm, numDropped, numGlobalDropped); + GetOStream(Statistics1) << "Number of dropped entries in " << graphType << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal; + if (numGlobalTotal != 0) + GetOStream(Statistics1) << " (" << 100 * Teuchos::as(numGlobalDropped) / Teuchos::as(numGlobalTotal) << "%)"; + GetOStream(Statistics1) << std::endl; + } - Set(currentLevel, "Graph", graph); - Set(currentLevel, "DofsPerNode", blkSize); + } else { + // what Tobias has implemented + + SC threshold = as(pL.get("aggregation: drop tol")); + // GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + GetOStream(Runtime0) << "algorithm = \"" + << "failsafe" + << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + Set(currentLevel, "Filtering", (threshold != STS::zero())); + + RCP rowMap = A->getRowMap(); + RCP colMap = A->getColMap(); + + LO blockdim = 1; // block dim for fixed size blocks + GO indexBase = rowMap->getIndexBase(); // index base of maps + GO offset = 0; + + // 1) check for blocking/striding information + if (A->IsView("stridedMaps") && + Teuchos::rcp_dynamic_cast(A->getRowMap("stridedMaps")) != Teuchos::null) { + Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!) + RCP strMap = Teuchos::rcp_dynamic_cast(A->getRowMap()); + TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null, Exceptions::BadCast, "MueLu::CoalesceFactory::Build: cast to strided row map failed."); + blockdim = strMap->getFixedBlockSize(); + offset = strMap->getOffset(); + oldView = A->SwitchToView(oldView); + GetOStream(Statistics1) << "CoalesceDropFactory::Build():" + << " found blockdim=" << blockdim << " from strided maps. offset=" << offset << std::endl; + } else + GetOStream(Statistics1) << "CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl; + + // 2) get row map for amalgamated matrix (graph of A) + // with same distribution over all procs as row map of A + RCP nodeMap = amalInfo->getNodeRowMap(); + GetOStream(Statistics1) << "CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl; + + // 3) create graph of amalgamated matrix + RCP crsGraph = CrsGraphFactory::Build(nodeMap, A->getLocalMaxNumRowEntries() * blockdim); + + LO numRows = A->getRowMap()->getLocalNumElements(); + LO numNodes = nodeMap->getLocalNumElements(); + const ArrayRCP amalgBoundaryNodes(numNodes, false); + const ArrayRCP numberDirichletRowsPerNode(numNodes, 0); // helper array counting the number of Dirichlet nodes associated with node + bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid + + // 4) do amalgamation. generate graph of amalgamated matrix + // Note, this code is much more inefficient than the leightwight implementation + // Most of the work has already been done in the AmalgamationFactory + for (LO row = 0; row < numRows; row++) { + // get global DOF id + GO grid = rowMap->getGlobalElement(row); + + // reinitialize boolean helper variable + bIsDiagonalEntry = false; + + // translate grid to nodeid + GO nodeId = AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase); + + size_t nnz = A->getNumEntriesInLocalRow(row); + Teuchos::ArrayView indices; + Teuchos::ArrayView vals; + A->getLocalRowView(row, indices, vals); + + RCP> cnodeIds = Teuchos::rcp(new std::vector); // global column block ids + LO realnnz = 0; + for (LO col = 0; col < Teuchos::as(nnz); col++) { + GO gcid = colMap->getGlobalElement(indices[col]); // global column id + + if (vals[col] != STS::zero()) { + GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, offset, indexBase); + cnodeIds->push_back(cnodeId); + realnnz++; // increment number of nnz in matrix row + if (grid == gcid) bIsDiagonalEntry = true; } } - if ((GetVerbLevel() & Statistics1) && !(A->GetFixedBlockSize() > 1 && threshold != STS::zero())) { - RCP > comm = A->getRowMap()->getComm(); - GO numGlobalTotal, numGlobalDropped; - MueLu_sumAll(comm, numTotal, numGlobalTotal); - MueLu_sumAll(comm, numDropped, numGlobalDropped); - GetOStream(Statistics1) << "Number of dropped entries in " << graphType << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal; - if (numGlobalTotal != 0) - GetOStream(Statistics1) << " (" << 100*Teuchos::as(numGlobalDropped)/Teuchos::as(numGlobalTotal) << "%)"; - GetOStream(Statistics1) << std::endl; + if (realnnz == 1 && bIsDiagonalEntry == true) { + LO lNodeId = nodeMap->getLocalElement(nodeId); + numberDirichletRowsPerNode[lNodeId] += 1; // increment Dirichlet row counter associated with lNodeId + if (numberDirichletRowsPerNode[lNodeId] == blockdim) // mark full Dirichlet nodes + amalgBoundaryNodes[lNodeId] = true; } - } else { - //what Tobias has implemented - - SC threshold = as(pL.get("aggregation: drop tol")); - //GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; - GetOStream(Runtime0) << "algorithm = \"" << "failsafe" << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; - Set(currentLevel, "Filtering", (threshold != STS::zero())); - - RCP rowMap = A->getRowMap(); - RCP colMap = A->getColMap(); - - LO blockdim = 1; // block dim for fixed size blocks - GO indexBase = rowMap->getIndexBase(); // index base of maps - GO offset = 0; - - // 1) check for blocking/striding information - if(A->IsView("stridedMaps") && - Teuchos::rcp_dynamic_cast(A->getRowMap("stridedMaps")) != Teuchos::null) { - Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!) - RCP strMap = Teuchos::rcp_dynamic_cast(A->getRowMap()); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null,Exceptions::BadCast,"MueLu::CoalesceFactory::Build: cast to strided row map failed."); - blockdim = strMap->getFixedBlockSize(); - offset = strMap->getOffset(); - oldView = A->SwitchToView(oldView); - GetOStream(Statistics1) << "CoalesceDropFactory::Build():" << " found blockdim=" << blockdim << " from strided maps. offset=" << offset << std::endl; - } else GetOStream(Statistics1) << "CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl; - - // 2) get row map for amalgamated matrix (graph of A) - // with same distribution over all procs as row map of A - RCP nodeMap = amalInfo->getNodeRowMap(); - GetOStream(Statistics1) << "CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl; - - // 3) create graph of amalgamated matrix - RCP crsGraph = CrsGraphFactory::Build(nodeMap, A->getLocalMaxNumRowEntries()*blockdim); - - LO numRows = A->getRowMap()->getLocalNumElements(); - LO numNodes = nodeMap->getLocalNumElements(); - const ArrayRCP amalgBoundaryNodes(numNodes, false); - const ArrayRCP numberDirichletRowsPerNode(numNodes, 0); // helper array counting the number of Dirichlet nodes associated with node - bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid - - // 4) do amalgamation. generate graph of amalgamated matrix - // Note, this code is much more inefficient than the leightwight implementation - // Most of the work has already been done in the AmalgamationFactory - for(LO row=0; rowgetGlobalElement(row); - - // reinitialize boolean helper variable - bIsDiagonalEntry = false; - - // translate grid to nodeid - GO nodeId = AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase); - - size_t nnz = A->getNumEntriesInLocalRow(row); - Teuchos::ArrayView indices; - Teuchos::ArrayView vals; - A->getLocalRowView(row, indices, vals); - - RCP > cnodeIds = Teuchos::rcp(new std::vector); // global column block ids - LO realnnz = 0; - for(LO col=0; col(nnz); col++) { - GO gcid = colMap->getGlobalElement(indices[col]); // global column id - - if(vals[col]!=STS::zero()) { - GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, offset, indexBase); - cnodeIds->push_back(cnodeId); - realnnz++; // increment number of nnz in matrix row - if (grid == gcid) bIsDiagonalEntry = true; - } - } + Teuchos::ArrayRCP arr_cnodeIds = Teuchos::arcp(cnodeIds); - if(realnnz == 1 && bIsDiagonalEntry == true) { - LO lNodeId = nodeMap->getLocalElement(nodeId); - numberDirichletRowsPerNode[lNodeId] += 1; // increment Dirichlet row counter associated with lNodeId - if (numberDirichletRowsPerNode[lNodeId] == blockdim) // mark full Dirichlet nodes - amalgBoundaryNodes[lNodeId] = true; - } + if (arr_cnodeIds.size() > 0) + crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds()); + } + // fill matrix graph + crsGraph->fillComplete(nodeMap, nodeMap); - Teuchos::ArrayRCP arr_cnodeIds = Teuchos::arcp( cnodeIds ); + // 5) create MueLu Graph object + RCP graph = rcp(new Graph(crsGraph, "amalgamated graph of A")); - if(arr_cnodeIds.size() > 0 ) - crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds()); - } - // fill matrix graph - crsGraph->fillComplete(nodeMap,nodeMap); + // Detect and record rows that correspond to Dirichlet boundary conditions + graph->SetBoundaryNodeMap(amalgBoundaryNodes); - // 5) create MueLu Graph object - RCP graph = rcp(new Graph(crsGraph, "amalgamated graph of A")); + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) + if (amalgBoundaryNodes[i]) + numLocalBoundaryNodes++; + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; + } - // Detect and record rows that correspond to Dirichlet boundary conditions - graph->SetBoundaryNodeMap(amalgBoundaryNodes); - - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) - if (amalgBoundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; - } + // 6) store results in Level + // graph->SetBoundaryNodeMap(gBoundaryNodeMap); + Set(currentLevel, "DofsPerNode", blockdim); + Set(currentLevel, "Graph", graph); - // 6) store results in Level - //graph->SetBoundaryNodeMap(gBoundaryNodeMap); - Set(currentLevel, "DofsPerNode", blockdim); - Set(currentLevel, "Graph", graph); + } // if (doExperimentalWrap) ... else ... - } //if (doExperimentalWrap) ... else ... +} // Build +template +void CoalesceDropFactory::MergeRows(const Matrix& A, const LO row, Array& cols, const Array& translation) const { + typedef typename ArrayView::size_type size_type; - } //Build + // extract striding information + LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map + if (A.IsView("stridedMaps") == true) { + Teuchos::RCP myMap = A.getRowMap("stridedMaps"); + Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); + if (strMap->getStridedBlockId() > -1) + blkSize = Teuchos::as(strMap->getStridingData()[strMap->getStridedBlockId()]); + } - template - void CoalesceDropFactory::MergeRows(const Matrix& A, const LO row, Array& cols, const Array& translation) const { - typedef typename ArrayView::size_type size_type; + // count nonzero entries in all dof rows associated with node row + size_t nnz = 0, pos = 0; + for (LO j = 0; j < blkSize; j++) + nnz += A.getNumEntriesInLocalRow(row * blkSize + j); - // extract striding information - LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map - if (A.IsView("stridedMaps") == true) { - Teuchos::RCP myMap = A.getRowMap("stridedMaps"); - Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); - if (strMap->getStridedBlockId() > -1) - blkSize = Teuchos::as(strMap->getStridingData()[strMap->getStridedBlockId()]); + if (nnz == 0) { + cols.resize(0); + return; + } + + cols.resize(nnz); + + // loop over all local dof rows associated with local node "row" + ArrayView inds; + ArrayView vals; + for (LO j = 0; j < blkSize; j++) { + A.getLocalRowView(row * blkSize + j, inds, vals); + size_type numIndices = inds.size(); + + if (numIndices == 0) // skip empty dof rows + continue; + + // cols: stores all local node ids for current local node id "row" + cols[pos++] = translation[inds[0]]; + for (size_type k = 1; k < numIndices; k++) { + LO nodeID = translation[inds[k]]; + // Here we try to speed up the process by reducing the size of an array + // to sort. This works if the column nonzeros belonging to the same + // node are stored consequently. + if (nodeID != cols[pos - 1]) + cols[pos++] = nodeID; } + } + cols.resize(pos); + nnz = pos; + + // Sort and remove duplicates + std::sort(cols.begin(), cols.end()); + pos = 0; + for (size_t j = 1; j < nnz; j++) + if (cols[j] != cols[pos]) + cols[++pos] = cols[j]; + cols.resize(pos + 1); +} + +template +void CoalesceDropFactory::MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP& ghostedDiagVals, SC threshold, Array& cols, const Array& translation) const { + typedef typename ArrayView::size_type size_type; + typedef Teuchos::ScalarTraits STS; + + // extract striding information + LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map + if (A.IsView("stridedMaps") == true) { + Teuchos::RCP myMap = A.getRowMap("stridedMaps"); + Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); + if (strMap->getStridedBlockId() > -1) + blkSize = Teuchos::as(strMap->getStridingData()[strMap->getStridedBlockId()]); + } - // count nonzero entries in all dof rows associated with node row - size_t nnz = 0, pos = 0; - for (LO j = 0; j < blkSize; j++) - nnz += A.getNumEntriesInLocalRow(row*blkSize+j); + // count nonzero entries in all dof rows associated with node row + size_t nnz = 0, pos = 0; + for (LO j = 0; j < blkSize; j++) + nnz += A.getNumEntriesInLocalRow(row * blkSize + j); - if (nnz == 0) { - cols.resize(0); - return; - } + if (nnz == 0) { + cols.resize(0); + return; + } - cols.resize(nnz); + cols.resize(nnz); - // loop over all local dof rows associated with local node "row" - ArrayView inds; - ArrayView vals; - for (LO j = 0; j < blkSize; j++) { - A.getLocalRowView(row*blkSize+j, inds, vals); - size_type numIndices = inds.size(); + // loop over all local dof rows associated with local node "row" + ArrayView inds; + ArrayView vals; + for (LO j = 0; j < blkSize; j++) { + A.getLocalRowView(row * blkSize + j, inds, vals); + size_type numIndices = inds.size(); + + if (numIndices == 0) // skip empty dof rows + continue; + + // cols: stores all local node ids for current local node id "row" + LO prevNodeID = -1; + for (size_type k = 0; k < numIndices; k++) { + LO dofID = inds[k]; + LO nodeID = translation[inds[k]]; - if (numIndices == 0) // skip empty dof rows - continue; + // we avoid a square root by using squared values + typename STS::magnitudeType aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[dofID] * ghostedDiagVals[row * blkSize + j]); // eps^2 * |a_ii| * |a_jj| + typename STS::magnitudeType aij = STS::magnitude(vals[k] * vals[k]); + + // check dropping criterion + if (aij > aiiajj || (row * blkSize + j == dofID)) { + // accept entry in graph - // cols: stores all local node ids for current local node id "row" - cols[pos++] = translation[inds[0]]; - for (size_type k = 1; k < numIndices; k++) { - LO nodeID = translation[inds[k]]; // Here we try to speed up the process by reducing the size of an array // to sort. This works if the column nonzeros belonging to the same // node are stored consequently. - if (nodeID != cols[pos-1]) + if (nodeID != prevNodeID) { cols[pos++] = nodeID; + prevNodeID = nodeID; + } } } - cols.resize(pos); - nnz = pos; - - // Sort and remove duplicates - std::sort(cols.begin(), cols.end()); - pos = 0; - for (size_t j = 1; j < nnz; j++) - if (cols[j] != cols[pos]) - cols[++pos] = cols[j]; - cols.resize(pos+1); + } + cols.resize(pos); + nnz = pos; + + // Sort and remove duplicates + std::sort(cols.begin(), cols.end()); + pos = 0; + for (size_t j = 1; j < nnz; j++) + if (cols[j] != cols[pos]) + cols[++pos] = cols[j]; + cols.resize(pos + 1); + + return; +} + +template +Teuchos::RCP> CoalesceDropFactory::BlockDiagonalize(Level& currentLevel, const RCP& A, bool generate_matrix) const { + typedef Teuchos::ScalarTraits STS; + + const ParameterList& pL = GetParameterList(); + const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); + const typename STS::magnitudeType rowSumTol = as(pL.get("aggregation: row sum drop tol")); + + RCP BlockNumber = Get>(currentLevel, "BlockNumber"); + RCP ghostedBlockNumber; + GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)" << std::endl; + + // Ghost the column block numbers if we need to + RCP importer = A->getCrsGraph()->getImporter(); + if (!importer.is_null()) { + SubFactoryMonitor m1(*this, "Block Number import", currentLevel); + ghostedBlockNumber = Xpetra::VectorFactory::Build(importer->getTargetMap()); + ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT); + } else { + ghostedBlockNumber = BlockNumber; } - template - void CoalesceDropFactory::MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP& ghostedDiagVals, SC threshold, Array& cols, const Array& translation) const { - typedef typename ArrayView::size_type size_type; - typedef Teuchos::ScalarTraits STS; - - // extract striding information - LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map - if (A.IsView("stridedMaps") == true) { - Teuchos::RCP myMap = A.getRowMap("stridedMaps"); - Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); - if (strMap->getStridedBlockId() > -1) - blkSize = Teuchos::as(strMap->getStridingData()[strMap->getStridedBlockId()]); - } - - // count nonzero entries in all dof rows associated with node row - size_t nnz = 0, pos = 0; - for (LO j = 0; j < blkSize; j++) - nnz += A.getNumEntriesInLocalRow(row*blkSize+j); + // Accessors for block numbers + Teuchos::ArrayRCP row_block_number = BlockNumber->getData(0); + Teuchos::ArrayRCP col_block_number = ghostedBlockNumber->getData(0); + + // allocate space for the local graph + ArrayRCP rows_mat; + ArrayRCP rows_graph, columns; + ArrayRCP values; + RCP crs_matrix_wrap; + + if (generate_matrix) { + crs_matrix_wrap = rcp(new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0)); + crs_matrix_wrap->getCrsMatrix()->allocateAllValues(A->getLocalNumEntries(), rows_mat, columns, values); + } else { + rows_graph.resize(A->getLocalNumRows() + 1); + columns.resize(A->getLocalNumEntries()); + values.resize(A->getLocalNumEntries()); + } - if (nnz == 0) { - cols.resize(0); - return; - } + LO realnnz = 0; + GO numDropped = 0, numTotal = 0; + for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { + LO row_block = row_block_number[row]; + size_t nnz = A->getNumEntriesInLocalRow(row); + ArrayView indices; + ArrayView vals; + A->getLocalRowView(row, indices, vals); - cols.resize(nnz); + LO rownnz = 0; + for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { + LO col = indices[colID]; + LO col_block = col_block_number[col]; - // loop over all local dof rows associated with local node "row" - ArrayView inds; - ArrayView vals; - for (LO j = 0; j < blkSize; j++) { - A.getLocalRowView(row*blkSize+j, inds, vals); - size_type numIndices = inds.size(); - - if (numIndices == 0) // skip empty dof rows - continue; - - // cols: stores all local node ids for current local node id "row" - LO prevNodeID = -1; - for (size_type k = 0; k < numIndices; k++) { - LO dofID = inds[k]; - LO nodeID = translation[inds[k]]; - - // we avoid a square root by using squared values - typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold*ghostedDiagVals[dofID]*ghostedDiagVals[row*blkSize+j]); // eps^2 * |a_ii| * |a_jj| - typename STS::magnitudeType aij = STS::magnitude(vals[k]*vals[k]); - - // check dropping criterion - if (aij > aiiajj || (row*blkSize+j == dofID)) { - // accept entry in graph - - // Here we try to speed up the process by reducing the size of an array - // to sort. This works if the column nonzeros belonging to the same - // node are stored consequently. - if (nodeID != prevNodeID) { - cols[pos++] = nodeID; - prevNodeID = nodeID; - } - } - } + if (row_block == col_block) { + if (generate_matrix) values[realnnz] = vals[colID]; + columns[realnnz++] = col; + rownnz++; + } else + numDropped++; } - cols.resize(pos); - nnz = pos; + if (generate_matrix) + rows_mat[row + 1] = realnnz; + else + rows_graph[row + 1] = realnnz; + } - // Sort and remove duplicates - std::sort(cols.begin(), cols.end()); - pos = 0; - for (size_t j = 1; j < nnz; j++) - if (cols[j] != cols[pos]) - cols[++pos] = cols[j]; - cols.resize(pos+1); + ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); + if (rowSumTol > 0.) + Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes); - return; + if (!generate_matrix) { + // We can't resize an Arrayrcp and pass the checks for setAllValues + values.resize(realnnz); + columns.resize(realnnz); + } + numTotal = A->getLocalNumEntries(); + + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < boundaryNodes.size(); ++i) + if (boundaryNodes[i]) + numLocalBoundaryNodes++; + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; + + GO numGlobalTotal, numGlobalDropped; + MueLu_sumAll(comm, numTotal, numGlobalTotal); + MueLu_sumAll(comm, numDropped, numGlobalDropped); + GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal; + if (numGlobalTotal != 0) + GetOStream(Statistics1) << " (" << 100 * Teuchos::as(numGlobalDropped) / Teuchos::as(numGlobalTotal) << "%)"; + GetOStream(Statistics1) << std::endl; } + Set(currentLevel, "Filtering", true); + + if (generate_matrix) { + // NOTE: Trying to use A's Import/Export objects will cause the code to segfault back in Build() with errors on the Import + // if you're using Epetra. I'm not really sure why. By using the Col==Domain and Row==Range maps, we get null Import/Export objects + // here, which is legit, because we never use them anyway. + crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat, columns, values); + crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap()); + } else { + RCP graph = rcp(new LWGraph(rows_graph, columns, A->getRowMap(), A->getColMap(), "block-diagonalized graph of A")); + graph->SetBoundaryNodeMap(boundaryNodes); + Set(currentLevel, "Graph", graph); + } + Set(currentLevel, "DofsPerNode", 1); + return crs_matrix_wrap; +} - template - Teuchos::RCP > CoalesceDropFactory::BlockDiagonalize(Level & currentLevel,const RCP& A,bool generate_matrix) const { - typedef Teuchos::ScalarTraits STS; - - const ParameterList & pL = GetParameterList(); - const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); - const typename STS::magnitudeType rowSumTol = as(pL.get("aggregation: row sum drop tol")); +template +void CoalesceDropFactory::BlockDiagonalizeGraph(const RCP& inputGraph, const RCP& ghostedBlockNumber, RCP& outputGraph, RCP& importer) const { + TEUCHOS_TEST_FOR_EXCEPTION(ghostedBlockNumber.is_null(), Exceptions::RuntimeError, "BlockDiagonalizeGraph(): ghostedBlockNumber is null."); + const ParameterList& pL = GetParameterList(); - RCP BlockNumber = Get >(currentLevel, "BlockNumber"); - RCP ghostedBlockNumber; - GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)"<("aggregation: coloring: localize color graph"); - // Ghost the column block numbers if we need to - RCP importer = A->getCrsGraph()->getImporter(); - if(!importer.is_null()) { - SubFactoryMonitor m1(*this, "Block Number import", currentLevel); - ghostedBlockNumber= Xpetra::VectorFactory::Build(importer->getTargetMap()); - ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT); - } - else { - ghostedBlockNumber = BlockNumber; - } + GetOStream(Statistics1) << "Using BlockDiagonal Graph after Dropping (with provided blocking)"; + if (localizeColoringGraph) + GetOStream(Statistics1) << ", with localization" << std::endl; + else + GetOStream(Statistics1) << ", without localization" << std::endl; - // Accessors for block numbers - Teuchos::ArrayRCP row_block_number = BlockNumber->getData(0); - Teuchos::ArrayRCP col_block_number = ghostedBlockNumber->getData(0); - - // allocate space for the local graph - ArrayRCP rows_mat; - ArrayRCP rows_graph,columns; - ArrayRCP values; - RCP crs_matrix_wrap; - - if(generate_matrix) { - crs_matrix_wrap = rcp(new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0)); - crs_matrix_wrap->getCrsMatrix()->allocateAllValues(A->getLocalNumEntries(), rows_mat, columns, values); - } - else { - rows_graph.resize(A->getLocalNumRows()+1); - columns.resize(A->getLocalNumEntries()); - values.resize(A->getLocalNumEntries()); - } - - LO realnnz = 0; - GO numDropped = 0, numTotal = 0; - for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { - LO row_block = row_block_number[row]; - size_t nnz = A->getNumEntriesInLocalRow(row); - ArrayView indices; - ArrayView vals; - A->getLocalRowView(row, indices, vals); + // Accessors for block numbers + Teuchos::ArrayRCP row_block_number = ghostedBlockNumber->getData(0); + Teuchos::ArrayRCP col_block_number = ghostedBlockNumber->getData(0); + + // allocate space for the local graph + ArrayRCP rows_mat; + ArrayRCP rows_graph, columns; + + rows_graph.resize(inputGraph->GetNodeNumVertices() + 1); + columns.resize(inputGraph->GetNodeNumEdges()); + + LO realnnz = 0; + GO numDropped = 0, numTotal = 0; + const LO numRows = Teuchos::as(inputGraph->GetDomainMap()->getLocalNumElements()); + if (localizeColoringGraph) { + for (LO row = 0; row < numRows; ++row) { + LO row_block = row_block_number[row]; + ArrayView indices = inputGraph->getNeighborVertices(row); LO rownnz = 0; - for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { - LO col = indices[colID]; + for (LO colID = 0; colID < Teuchos::as(indices.size()); colID++) { + LO col = indices[colID]; LO col_block = col_block_number[col]; - - if(row_block == col_block) { - if(generate_matrix) values[realnnz] = vals[colID]; + + if ((row_block == col_block) && (col < numRows)) { columns[realnnz++] = col; rownnz++; } else numDropped++; } - if(generate_matrix) rows_mat[row+1] = realnnz; - else rows_graph[row+1] = realnnz; + rows_graph[row + 1] = realnnz; } - - ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - if (rowSumTol > 0.) - Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes); - - - if(!generate_matrix) { - // We can't resize an Arrayrcp and pass the checks for setAllValues - values.resize(realnnz); - columns.resize(realnnz); - } - numTotal = A->getLocalNumEntries(); + } else { + // ghosting of boundary node map + Teuchos::ArrayRCP boundaryNodes = inputGraph->GetBoundaryNodeMap(); + auto boundaryNodesVector = Xpetra::VectorFactory::Build(inputGraph->GetDomainMap()); + for (size_t i = 0; i < inputGraph->GetNodeNumVertices(); i++) + boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i]; + // Xpetra::IO::Write("boundary",*boundaryNodesVector); + auto boundaryColumnVector = Xpetra::VectorFactory::Build(inputGraph->GetImportMap()); + boundaryColumnVector->doImport(*boundaryNodesVector, *importer, Xpetra::INSERT); + auto boundaryColumn = boundaryColumnVector->getData(0); + + for (LO row = 0; row < numRows; ++row) { + LO row_block = row_block_number[row]; + ArrayView indices = inputGraph->getNeighborVertices(row); - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < boundaryNodes.size(); ++i) - if (boundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; - - GO numGlobalTotal, numGlobalDropped; - MueLu_sumAll(comm, numTotal, numGlobalTotal); - MueLu_sumAll(comm, numDropped, numGlobalDropped); - GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal; - if (numGlobalTotal != 0) - GetOStream(Statistics1) << " (" << 100*Teuchos::as(numGlobalDropped)/Teuchos::as(numGlobalTotal) << "%)"; - GetOStream(Statistics1) << std::endl; - } - - Set(currentLevel, "Filtering", true); + LO rownnz = 0; + for (LO colID = 0; colID < Teuchos::as(indices.size()); colID++) { + LO col = indices[colID]; + LO col_block = col_block_number[col]; - if(generate_matrix) { - // NOTE: Trying to use A's Import/Export objects will cause the code to segfault back in Build() with errors on the Import - // if you're using Epetra. I'm not really sure why. By using the Col==Domain and Row==Range maps, we get null Import/Export objects - // here, which is legit, because we never use them anyway. - crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat,columns,values); - crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap()); - } - else { - RCP graph = rcp(new LWGraph(rows_graph, columns, A->getRowMap(), A->getColMap(), "block-diagonalized graph of A")); - graph->SetBoundaryNodeMap(boundaryNodes); - Set(currentLevel, "Graph", graph); + if ((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) { + columns[realnnz++] = col; + rownnz++; + } else + numDropped++; + } + rows_graph[row + 1] = realnnz; } - - - Set(currentLevel, "DofsPerNode", 1); - return crs_matrix_wrap; } + columns.resize(realnnz); + numTotal = inputGraph->GetNodeNumEdges(); + + if (GetVerbLevel() & Statistics1) { + RCP> comm = inputGraph->GetDomainMap()->getComm(); + GO numGlobalTotal, numGlobalDropped; + MueLu_sumAll(comm, numTotal, numGlobalTotal); + MueLu_sumAll(comm, numDropped, numGlobalDropped); + GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal; + if (numGlobalTotal != 0) + GetOStream(Statistics1) << " (" << 100 * Teuchos::as(numGlobalDropped) / Teuchos::as(numGlobalTotal) << "%)"; + GetOStream(Statistics1) << std::endl; + } - template - void CoalesceDropFactory::BlockDiagonalizeGraph(const RCP & inputGraph, const RCP & ghostedBlockNumber, RCP & outputGraph, RCP & importer) const { - - TEUCHOS_TEST_FOR_EXCEPTION(ghostedBlockNumber.is_null(), Exceptions::RuntimeError, "BlockDiagonalizeGraph(): ghostedBlockNumber is null."); - const ParameterList & pL = GetParameterList(); - - const bool localizeColoringGraph = pL.get("aggregation: coloring: localize color graph"); - - GetOStream(Statistics1) << "Using BlockDiagonal Graph after Dropping (with provided blocking)"; - if (localizeColoringGraph) - GetOStream(Statistics1) << ", with localization" < row_block_number = ghostedBlockNumber->getData(0); - Teuchos::ArrayRCP col_block_number = ghostedBlockNumber->getData(0); - - // allocate space for the local graph - ArrayRCP rows_mat; - ArrayRCP rows_graph,columns; - - rows_graph.resize(inputGraph->GetNodeNumVertices()+1); - columns.resize(inputGraph->GetNodeNumEdges()); - - LO realnnz = 0; - GO numDropped = 0, numTotal = 0; - const LO numRows = Teuchos::as(inputGraph->GetDomainMap()->getLocalNumElements()); - if (localizeColoringGraph) { - - for (LO row = 0; row < numRows; ++row) { - LO row_block = row_block_number[row]; - ArrayView indices = inputGraph->getNeighborVertices(row); - - LO rownnz = 0; - for (LO colID = 0; colID < Teuchos::as(indices.size()); colID++) { - LO col = indices[colID]; - LO col_block = col_block_number[col]; - - if((row_block == col_block) && (col < numRows)) { - columns[realnnz++] = col; - rownnz++; - } else - numDropped++; - } - rows_graph[row+1] = realnnz; - } - } else { - // ghosting of boundary node map - Teuchos::ArrayRCP boundaryNodes = inputGraph->GetBoundaryNodeMap(); - auto boundaryNodesVector = Xpetra::VectorFactory::Build(inputGraph->GetDomainMap()); - for (size_t i=0; iGetNodeNumVertices(); i++) - boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i]; - // Xpetra::IO::Write("boundary",*boundaryNodesVector); - auto boundaryColumnVector = Xpetra::VectorFactory::Build(inputGraph->GetImportMap()); - boundaryColumnVector->doImport(*boundaryNodesVector,*importer, Xpetra::INSERT); - auto boundaryColumn = boundaryColumnVector->getData(0); - - for (LO row = 0; row < numRows; ++row) { - LO row_block = row_block_number[row]; - ArrayView indices = inputGraph->getNeighborVertices(row); - - LO rownnz = 0; - for (LO colID = 0; colID < Teuchos::as(indices.size()); colID++) { - LO col = indices[colID]; - LO col_block = col_block_number[col]; - - if((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) { - columns[realnnz++] = col; - rownnz++; - } else - numDropped++; - } - rows_graph[row+1] = realnnz; - } - } - - columns.resize(realnnz); - numTotal = inputGraph->GetNodeNumEdges(); - - if (GetVerbLevel() & Statistics1) { - RCP > comm = inputGraph->GetDomainMap()->getComm(); - GO numGlobalTotal, numGlobalDropped; - MueLu_sumAll(comm, numTotal, numGlobalTotal); - MueLu_sumAll(comm, numDropped, numGlobalDropped); - GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal; - if (numGlobalTotal != 0) - GetOStream(Statistics1) << " (" << 100*Teuchos::as(numGlobalDropped)/Teuchos::as(numGlobalTotal) << "%)"; - GetOStream(Statistics1) << std::endl; - } - - if (localizeColoringGraph) { - outputGraph = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A")); - outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap()); - } else { - TEUCHOS_ASSERT(inputGraph->GetDomainMap()->lib() == Xpetra::UseTpetra); + if (localizeColoringGraph) { + outputGraph = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A")); + outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap()); + } else { + TEUCHOS_ASSERT(inputGraph->GetDomainMap()->lib() == Xpetra::UseTpetra); #ifdef HAVE_XPETRA_TPETRA - auto outputGraph2 = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A")); - - auto tpGraph = Xpetra::toTpetra(rcp_const_cast(outputGraph2->GetCrsGraph())); - auto sym = rcp(new Tpetra::CrsGraphTransposer(tpGraph)); - auto tpGraphSym = sym->symmetrize(); - - auto colIndsSym = // FIXME persistingView is temporary; better fix would be change to LWGraph constructor - Kokkos::Compat::persistingView(tpGraphSym->getLocalIndicesHost()); - - auto rowsSym = tpGraphSym->getLocalRowPtrsHost(); - ArrayRCP rows_graphSym; - rows_graphSym.resize(rowsSym.size()); - for (size_t row = 0; row < rowsSym.size(); row++) - rows_graphSym[row] = rowsSym[row]; - outputGraph = rcp(new LWGraph(rows_graphSym, colIndsSym, inputGraph->GetDomainMap(), Xpetra::toXpetra(tpGraphSym->getColMap()), "block-diagonalized graph of A")); - outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap()); + auto outputGraph2 = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A")); + + auto tpGraph = Xpetra::toTpetra(rcp_const_cast(outputGraph2->GetCrsGraph())); + auto sym = rcp(new Tpetra::CrsGraphTransposer(tpGraph)); + auto tpGraphSym = sym->symmetrize(); + + auto colIndsSym = // FIXME persistingView is temporary; better fix would be change to LWGraph constructor + Kokkos::Compat::persistingView(tpGraphSym->getLocalIndicesHost()); + + auto rowsSym = tpGraphSym->getLocalRowPtrsHost(); + ArrayRCP rows_graphSym; + rows_graphSym.resize(rowsSym.size()); + for (size_t row = 0; row < rowsSym.size(); row++) + rows_graphSym[row] = rowsSym[row]; + outputGraph = rcp(new LWGraph(rows_graphSym, colIndsSym, inputGraph->GetDomainMap(), Xpetra::toXpetra(tpGraphSym->getColMap()), "block-diagonalized graph of A")); + outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap()); #endif - } - - } - - + } +} -} //namespace MueLu +} // namespace MueLu -#endif // MUELU_COALESCEDROPFACTORY_DEF_HPP +#endif // MUELU_COALESCEDROPFACTORY_DEF_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp index 45d2601b230d..3f314a916904 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp @@ -62,116 +62,114 @@ namespace MueLu { - /*! - @class CoalesceDropFactory_kokkos - @brief Factory for creating a graph based on a given matrix. - - Factory for creating graphs from matrices with entries selectively dropped. - This factory combines the functionality of CoalesceDropFactory and FilteredAFactory from the non-Kokkos - code path. - - For an in-depth discussion, see https://github.com/trilinos/Trilinos/issues/1676. - - ## Code paths ## - - Both the classic dropping strategy as well as a coordinate-based distance - laplacian method is implemented. For performance reasons there are four - distinctive code paths for the classical method: - - - one DOF per node without dropping (i.e. "aggregation: drop tol" = 0.0) - - one DOF per node with dropping (i.e. "aggregation: drop tol" > 0.0) - - DOFs per node > 1 withouth dropping - - DOFs per node > 1 with dropping - - Additionally there is a code path for the distance-laplacian mode. - - ## Input/output of CoalesceDropFactory_kokkos ## - - ### User parameters of CoalesceDropFactory_kokkos ### - Parameter | type | default | master.xml | validated | requested | description - ----------|------|---------|:----------:|:---------:|:---------:|------------ - A | Factory | null | | * | * | Generating factory of the operator A - UnAmalgamationInfo | Factory | null | | * | * | Generating factory of type AmalgamationFactory which generates the variable 'UnAmalgamationInfo'. Do not change the default unless you know what you are doing. - Coordinates | Factory | null | | * | (*) | Generating factory for variable 'Coordinates'. The coordinates are only needed if "distance laplacian" is chosen for the parameter "aggregation: drop scheme" - "aggregation: drop scheme" | std::string | "classical" | * | * | | Coalescing algorithm. You can choose either "classical" (=default) or "distance laplacian" - "aggregation: drop tol" | double | 0.0 | * | * | | Threshold parameter for dropping small entries - "aggregation: Dirichlet threshold" | double | 0.0 | * | * | | Threshold for determining whether entries are zero during Dirichlet row detection - "lightweight wrap" | bool | true | | * | | hidden switch between fast implementation based on MueLu::LWGraph and a failsafe slower implementation based on Xpetra::Graph (for comparison). The user should not change the default value (=true) - - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see CoalesceDropFactory_kokkos::GetValidParameters).
- The * in the @c requested column states that the data is requested as input with all dependencies (see CoalesceDropFactory_kokkos::DeclareInput). - - ### Variables provided by UncoupledAggregationFactory ### - - After CoalesceDropFactory_kokkos::Build the following data is available (if requested) - - Parameter | generated by | description - ----------|--------------|------------ - Graph | CoalesceDropFactory_kokkos | Graph of matrix A - DofsPerNode | CoalesceDropFactory_kokkos | number of DOFs per node. Note, that we assume a constant number of DOFs per node for all nodes associated with the operator A. - - ## Amalgamation process ## - - The CoalesceDropFactory_kokkos is internally using the AmalgamationFactory - for amalgamating the dof-based maps to node-based maps. The - AmalgamationFactory creates the "UnAmalgamationInfo" container which - basically stores all the necessary information for translating dof based - data to node based data and vice versa. The container is used, since this - way the amalgamation is only done once and later reused by other factories. - - Of course, often one does not need the information from the - "UnAmalgamationInfo" container since the same information could be - extracted of the "Graph" or the map from the "Coordinates" vector. - However, there are also some situations (e.g. when doing rebalancing based - on HyperGraph partitioning without coordinate information) where one has - not access to a "Graph" or "Coordinates" variable. - */ - template - class CoalesceDropFactory_kokkos; - - template - class CoalesceDropFactory_kokkos > : public SingleLevelFactoryBase { - public: - using local_ordinal_type = LocalOrdinal; - using global_ordinal_type = GlobalOrdinal; - using execution_space = typename DeviceType::execution_space; - using range_type = Kokkos::RangePolicy; - using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; - - private: - // For compatibility - using Node = node_type; +/*! + @class CoalesceDropFactory_kokkos + @brief Factory for creating a graph based on a given matrix. + + Factory for creating graphs from matrices with entries selectively dropped. + This factory combines the functionality of CoalesceDropFactory and FilteredAFactory from the non-Kokkos + code path. + + For an in-depth discussion, see https://github.com/trilinos/Trilinos/issues/1676. + + ## Code paths ## + + Both the classic dropping strategy as well as a coordinate-based distance + laplacian method is implemented. For performance reasons there are four + distinctive code paths for the classical method: + + - one DOF per node without dropping (i.e. "aggregation: drop tol" = 0.0) + - one DOF per node with dropping (i.e. "aggregation: drop tol" > 0.0) + - DOFs per node > 1 withouth dropping + - DOFs per node > 1 with dropping + + Additionally there is a code path for the distance-laplacian mode. + + ## Input/output of CoalesceDropFactory_kokkos ## + + ### User parameters of CoalesceDropFactory_kokkos ### + Parameter | type | default | master.xml | validated | requested | description + ----------|------|---------|:----------:|:---------:|:---------:|------------ + A | Factory | null | | * | * | Generating factory of the operator A + UnAmalgamationInfo | Factory | null | | * | * | Generating factory of type AmalgamationFactory which generates the variable 'UnAmalgamationInfo'. Do not change the default unless you know what you are doing. + Coordinates | Factory | null | | * | (*) | Generating factory for variable 'Coordinates'. The coordinates are only needed if "distance laplacian" is chosen for the parameter "aggregation: drop scheme" + "aggregation: drop scheme" | std::string | "classical" | * | * | | Coalescing algorithm. You can choose either "classical" (=default) or "distance laplacian" + "aggregation: drop tol" | double | 0.0 | * | * | | Threshold parameter for dropping small entries + "aggregation: Dirichlet threshold" | double | 0.0 | * | * | | Threshold for determining whether entries are zero during Dirichlet row detection + "lightweight wrap" | bool | true | | * | | hidden switch between fast implementation based on MueLu::LWGraph and a failsafe slower implementation based on Xpetra::Graph (for comparison). The user should not change the default value (=true) + + The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
+ The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see CoalesceDropFactory_kokkos::GetValidParameters).
+ The * in the @c requested column states that the data is requested as input with all dependencies (see CoalesceDropFactory_kokkos::DeclareInput). + + ### Variables provided by UncoupledAggregationFactory ### + + After CoalesceDropFactory_kokkos::Build the following data is available (if requested) + + Parameter | generated by | description + ----------|--------------|------------ + Graph | CoalesceDropFactory_kokkos | Graph of matrix A + DofsPerNode | CoalesceDropFactory_kokkos | number of DOFs per node. Note, that we assume a constant number of DOFs per node for all nodes associated with the operator A. + + ## Amalgamation process ## + + The CoalesceDropFactory_kokkos is internally using the AmalgamationFactory + for amalgamating the dof-based maps to node-based maps. The + AmalgamationFactory creates the "UnAmalgamationInfo" container which + basically stores all the necessary information for translating dof based + data to node based data and vice versa. The container is used, since this + way the amalgamation is only done once and later reused by other factories. + + Of course, often one does not need the information from the + "UnAmalgamationInfo" container since the same information could be + extracted of the "Graph" or the map from the "Coordinates" vector. + However, there are also some situations (e.g. when doing rebalancing based + on HyperGraph partitioning without coordinate information) where one has + not access to a "Graph" or "Coordinates" variable. +*/ +template +class CoalesceDropFactory_kokkos; + +template +class CoalesceDropFactory_kokkos > : public SingleLevelFactoryBase { + public: + using local_ordinal_type = LocalOrdinal; + using global_ordinal_type = GlobalOrdinal; + using execution_space = typename DeviceType::execution_space; + using range_type = Kokkos::RangePolicy; + using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; + + private: + // For compatibility + using Node = node_type; #undef MUELU_COALESCEDROPFACTORY_KOKKOS_SHORT #include "MueLu_UseShortNames.hpp" - public: + public: + //! @name Constructors/Destructors. + //@{ - //! @name Constructors/Destructors. - //@{ + //! Constructor + CoalesceDropFactory_kokkos() {} - //! Constructor - CoalesceDropFactory_kokkos() { } + //! Destructor + virtual ~CoalesceDropFactory_kokkos() {} - //! Destructor - virtual ~CoalesceDropFactory_kokkos() { } + RCP GetValidParameterList() const; - RCP GetValidParameterList() const; + //@} - //@} + //! Input + //@{ - //! Input - //@{ + void DeclareInput(Level& currentLevel) const; - void DeclareInput(Level& currentLevel) const; + //@} - //@} + void Build(Level& currentLevel) const; +}; - void Build(Level& currentLevel) const; - - }; - -} //namespace MueLu +} // namespace MueLu #define MUELU_COALESCEDROPFACTORY_KOKKOS_SHORT -#endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DECL_HPP +#endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp index e7e613aa1d93..f6aa0cca78fa 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp @@ -63,620 +63,609 @@ namespace MueLu { +namespace CoalesceDrop_Kokkos_Details { // anonymous + +template +class ScanFunctor { + public: + ScanFunctor(RowType rows_) + : rows(rows_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const LO i, LO& upd, const bool& final) const { + upd += rows(i); + if (final) + rows(i) = upd; + } - namespace CoalesceDrop_Kokkos_Details { // anonymous + private: + RowType rows; +}; + +template +class ClassicalDropFunctor { + private: + typedef typename GhostedViewType::value_type SC; + typedef Kokkos::ArithTraits ATS; + typedef typename ATS::magnitudeType magnitudeType; + + GhostedViewType diag; // corresponds to overlapped diagonal multivector (2D View) + magnitudeType eps; + + public: + ClassicalDropFunctor(GhostedViewType ghostedDiag, magnitudeType threshold) + : diag(ghostedDiag) + , eps(threshold) {} + + // Return true if we drop, false if not + KOKKOS_FORCEINLINE_FUNCTION + bool operator()(LO row, LO col, SC val) const { + // We avoid square root by using squared values + auto aiiajj = ATS::magnitude(diag(row, 0)) * ATS::magnitude(diag(col, 0)); // |a_ii|*|a_jj| + auto aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2 + + return (aij2 <= eps * eps * aiiajj); + } +}; + +template +class DistanceFunctor { + private: + typedef typename CoordsType::value_type SC; + typedef Kokkos::ArithTraits ATS; + typedef typename ATS::magnitudeType magnitudeType; + + public: + typedef SC value_type; + + public: + DistanceFunctor(CoordsType coords_) + : coords(coords_) {} + + KOKKOS_INLINE_FUNCTION + magnitudeType distance2(LO row, LO col) const { + SC d = ATS::zero(), s; + for (size_t j = 0; j < coords.extent(1); j++) { + s = coords(row, j) - coords(col, j); + d += s * s; + } + return ATS::magnitude(d); + } - template - class ScanFunctor { - public: - ScanFunctor(RowType rows_) : rows(rows_) { } + private: + CoordsType coords; +}; + +template +class DistanceLaplacianDropFunctor { + private: + typedef typename GhostedViewType::value_type SC; + typedef Kokkos::ArithTraits ATS; + typedef typename ATS::magnitudeType magnitudeType; + + public: + DistanceLaplacianDropFunctor(GhostedViewType ghostedLaplDiag, DistanceFunctor distFunctor_, magnitudeType threshold) + : diag(ghostedLaplDiag) + , distFunctor(distFunctor_) + , eps(threshold) {} + + // Return true if we drop, false if not + KOKKOS_INLINE_FUNCTION + bool operator()(LO row, LO col, SC /* val */) const { + // We avoid square root by using squared values + + // We ignore incoming value of val as we operate on an auxiliary + // distance Laplacian matrix + typedef typename DistanceFunctor::value_type dSC; + typedef Kokkos::ArithTraits dATS; + auto fval = dATS::one() / distFunctor.distance2(row, col); + + auto aiiajj = ATS::magnitude(diag(row, 0)) * ATS::magnitude(diag(col, 0)); // |a_ii|*|a_jj| + auto aij2 = ATS::magnitude(fval) * ATS::magnitude(fval); // |a_ij|^2 + + return (aij2 <= eps * eps * aiiajj); + } - KOKKOS_INLINE_FUNCTION - void operator()(const LO i, LO& upd, const bool& final) const { - upd += rows(i); - if (final) - rows(i) = upd; - } + private: + GhostedViewType diag; // corresponds to overlapped diagonal multivector (2D View) + DistanceFunctor distFunctor; + magnitudeType eps; +}; + +template +class ScalarFunctor { + private: + typedef typename MatrixType::StaticCrsGraphType graph_type; + typedef typename graph_type::row_map_type rows_type; + typedef typename graph_type::entries_type cols_type; + typedef typename MatrixType::values_type vals_type; + typedef Kokkos::ArithTraits ATS; + typedef typename ATS::val_type impl_Scalar; + typedef Kokkos::ArithTraits impl_ATS; + typedef typename ATS::magnitudeType magnitudeType; + + public: + ScalarFunctor(MatrixType A_, BndViewType bndNodes_, DropFunctorType dropFunctor_, + typename rows_type::non_const_type rows_, + typename cols_type::non_const_type colsAux_, + typename vals_type::non_const_type valsAux_, + bool reuseGraph_, bool lumping_, SC /* threshold_ */, + bool aggregationMayCreateDirichlet_) + : A(A_) + , bndNodes(bndNodes_) + , dropFunctor(dropFunctor_) + , rows(rows_) + , colsAux(colsAux_) + , valsAux(valsAux_) + , reuseGraph(reuseGraph_) + , lumping(lumping_) + , aggregationMayCreateDirichlet(aggregationMayCreateDirichlet_) { + rowsA = A.graph.row_map; + zero = impl_ATS::zero(); + } - private: - RowType rows; - }; - - template - class ClassicalDropFunctor { - private: - typedef typename GhostedViewType::value_type SC; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::magnitudeType magnitudeType; - - GhostedViewType diag; // corresponds to overlapped diagonal multivector (2D View) - magnitudeType eps; - - public: - ClassicalDropFunctor(GhostedViewType ghostedDiag, magnitudeType threshold) : - diag(ghostedDiag), - eps(threshold) - { } - - // Return true if we drop, false if not - KOKKOS_FORCEINLINE_FUNCTION - bool operator()(LO row, LO col, SC val) const { - // We avoid square root by using squared values - auto aiiajj = ATS::magnitude(diag(row, 0)) * ATS::magnitude(diag(col, 0)); // |a_ii|*|a_jj| - auto aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2 - - return (aij2 <= eps*eps * aiiajj); - } - }; - - template - class DistanceFunctor { - private: - typedef typename CoordsType::value_type SC; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::magnitudeType magnitudeType; - - public: - typedef SC value_type; - - public: - DistanceFunctor(CoordsType coords_) : coords(coords_) { } - - KOKKOS_INLINE_FUNCTION - magnitudeType distance2(LO row, LO col) const { - SC d = ATS::zero(), s; - for (size_t j = 0; j < coords.extent(1); j++) { - s = coords(row,j) - coords(col,j); - d += s*s; - } - return ATS::magnitude(d); - } - private: - CoordsType coords; - }; - - template - class DistanceLaplacianDropFunctor { - private: - typedef typename GhostedViewType::value_type SC; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::magnitudeType magnitudeType; - - public: - DistanceLaplacianDropFunctor(GhostedViewType ghostedLaplDiag, DistanceFunctor distFunctor_, magnitudeType threshold) : - diag(ghostedLaplDiag), - distFunctor(distFunctor_), - eps(threshold) - { } - - // Return true if we drop, false if not - KOKKOS_INLINE_FUNCTION - bool operator()(LO row, LO col, SC /* val */) const { - // We avoid square root by using squared values - - // We ignore incoming value of val as we operate on an auxiliary - // distance Laplacian matrix - typedef typename DistanceFunctor::value_type dSC; - typedef Kokkos::ArithTraits dATS; - auto fval = dATS::one() / distFunctor.distance2(row, col); - - auto aiiajj = ATS::magnitude(diag(row, 0)) * ATS::magnitude(diag(col, 0)); // |a_ii|*|a_jj| - auto aij2 = ATS::magnitude(fval) * ATS::magnitude(fval); // |a_ij|^2 - - return (aij2 <= eps*eps * aiiajj); - } + KOKKOS_INLINE_FUNCTION + void operator()(const LO row, LO& nnz) const { + auto rowView = A.rowConst(row); + auto length = rowView.length; + auto offset = rowsA(row); - private: - GhostedViewType diag; // corresponds to overlapped diagonal multivector (2D View) - DistanceFunctor distFunctor; - magnitudeType eps; - }; - - template - class ScalarFunctor { - private: - typedef typename MatrixType::StaticCrsGraphType graph_type; - typedef typename graph_type::row_map_type rows_type; - typedef typename graph_type::entries_type cols_type; - typedef typename MatrixType::values_type vals_type; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::val_type impl_Scalar; - typedef Kokkos::ArithTraits impl_ATS; - typedef typename ATS::magnitudeType magnitudeType; - - public: - ScalarFunctor(MatrixType A_, BndViewType bndNodes_, DropFunctorType dropFunctor_, - typename rows_type::non_const_type rows_, - typename cols_type::non_const_type colsAux_, - typename vals_type::non_const_type valsAux_, - bool reuseGraph_, bool lumping_, SC /* threshold_ */, - bool aggregationMayCreateDirichlet_ ) : - A(A_), - bndNodes(bndNodes_), - dropFunctor(dropFunctor_), - rows(rows_), - colsAux(colsAux_), - valsAux(valsAux_), - reuseGraph(reuseGraph_), - lumping(lumping_), - aggregationMayCreateDirichlet(aggregationMayCreateDirichlet_) - { - rowsA = A.graph.row_map; - zero = impl_ATS::zero(); - } + impl_Scalar diag = zero; + LO rownnz = 0; + LO diagID = -1; + for (decltype(length) colID = 0; colID < length; colID++) { + LO col = rowView.colidx(colID); + impl_Scalar val = rowView.value(colID); - KOKKOS_INLINE_FUNCTION - void operator()(const LO row, LO& nnz) const { - auto rowView = A.rowConst(row); - auto length = rowView.length; - auto offset = rowsA(row); - - impl_Scalar diag = zero; - LO rownnz = 0; - LO diagID = -1; - for (decltype(length) colID = 0; colID < length; colID++) { - LO col = rowView.colidx(colID); - impl_Scalar val = rowView.value (colID); - - if (!dropFunctor(row, col, rowView.value(colID)) || row == col) { - colsAux(offset+rownnz) = col; - - LO valID = (reuseGraph ? colID : rownnz); - valsAux(offset+valID) = val; - if (row == col) - diagID = valID; - - rownnz++; - - } else { - // Rewrite with zeros (needed for reuseGraph) - valsAux(offset+colID) = zero; - diag += val; - } - } - // How to assert on the device? - // assert(diagIndex != -1); - rows(row+1) = rownnz; - // if (lumping && diagID != -1) { - if (lumping) { - // Add diag to the diagonal - - // NOTE_KOKKOS: valsAux was allocated with - // ViewAllocateWithoutInitializing. This is not a problem here - // because we explicitly set this value above. - valsAux(offset+diagID) += diag; - } + if (!dropFunctor(row, col, rowView.value(colID)) || row == col) { + colsAux(offset + rownnz) = col; - // If the only element remaining after filtering is diagonal, mark node as boundary - // FIXME: this should really be replaced by the following - // if (indices.size() == 1 && indices[0] == row) - // boundaryNodes[row] = true; - // We do not do it this way now because there is no framework for distinguishing isolated - // and boundary nodes in the aggregation algorithms - bndNodes(row) = (rownnz == 1 && aggregationMayCreateDirichlet); + LO valID = (reuseGraph ? colID : rownnz); + valsAux(offset + valID) = val; + if (row == col) + diagID = valID; - nnz += rownnz; - } + rownnz++; - private: - MatrixType A; - BndViewType bndNodes; - DropFunctorType dropFunctor; - - rows_type rowsA; - - typename rows_type::non_const_type rows; - typename cols_type::non_const_type colsAux; - typename vals_type::non_const_type valsAux; - - bool reuseGraph; - bool lumping; - bool aggregationMayCreateDirichlet; - impl_Scalar zero; - }; - - // collect number nonzeros of blkSize rows in nnz_(row+1) - template - class Stage1aVectorFunctor { - private: - typedef typename MatrixType::ordinal_type LO; - - public: - Stage1aVectorFunctor(MatrixType kokkosMatrix_, NnzType nnz_, blkSizeType blkSize_) : - kokkosMatrix(kokkosMatrix_), - nnz(nnz_), - blkSize(blkSize_) { } - - KOKKOS_INLINE_FUNCTION - void operator()(const LO row, LO& totalnnz) const { - - // the following code is more or less what MergeRows is doing - // count nonzero entries in all dof rows associated with node row - LO nodeRowMaxNonZeros = 0; - for (LO j = 0; j < blkSize; j++) { - auto rowView = kokkosMatrix.row(row * blkSize + j); - nodeRowMaxNonZeros += rowView.length; - } - nnz(row + 1) = nodeRowMaxNonZeros; - totalnnz += nodeRowMaxNonZeros; + } else { + // Rewrite with zeros (needed for reuseGraph) + valsAux(offset + colID) = zero; + diag += val; } + } + // How to assert on the device? + // assert(diagIndex != -1); + rows(row + 1) = rownnz; + // if (lumping && diagID != -1) { + if (lumping) { + // Add diag to the diagonal + + // NOTE_KOKKOS: valsAux was allocated with + // ViewAllocateWithoutInitializing. This is not a problem here + // because we explicitly set this value above. + valsAux(offset + diagID) += diag; + } + // If the only element remaining after filtering is diagonal, mark node as boundary + // FIXME: this should really be replaced by the following + // if (indices.size() == 1 && indices[0] == row) + // boundaryNodes[row] = true; + // We do not do it this way now because there is no framework for distinguishing isolated + // and boundary nodes in the aggregation algorithms + bndNodes(row) = (rownnz == 1 && aggregationMayCreateDirichlet); - private: - MatrixType kokkosMatrix; //< local matrix part - NnzType nnz; //< View containing number of nonzeros for current row - blkSizeType blkSize; //< block size (or partial block size in strided maps) - }; - - - // build the dof-based column map containing the local dof ids belonging to blkSize rows in matrix - // sort column ids - // translate them into (unique) node ids - // count the node column ids per node row - template - class Stage1bcVectorFunctor { - private: - typedef typename MatrixType::ordinal_type LO; - - private: - MatrixType kokkosMatrix; //< local matrix part - NnzType coldofnnz; //< view containing start and stop indices for subviews - blkSizeType blkSize; //< block size (or partial block size in strided maps) - ColDofType coldofs; //< view containing the local dof ids associated with columns for the blkSize rows (not sorted) - Dof2NodeTranslationType dof2node; //< view containing the local node id associated with the local dof id - NnzType colnodennz; //< view containing number of column nodes for each node row - BdryNodeTypeConst dirichletdof; //< view containing with num dofs booleans. True if dof (not necessarily entire node) is dirichlet boundardy dof. - BdryNodeType bdrynode; //< view containing with numNodes booleans. True if node is (full) dirichlet boundardy node. - boolType usegreedydirichlet; //< boolean for use of greedy Dirichlet (if any dof is Dirichlet, entire node is dirichlet) default false (need all dofs in node to be Dirichlet for node to be Dirichlet) - - public: - Stage1bcVectorFunctor(MatrixType kokkosMatrix_, - NnzType coldofnnz_, - blkSizeType blkSize_, - ColDofType coldofs_, - Dof2NodeTranslationType dof2node_, - NnzType colnodennz_, - BdryNodeTypeConst dirichletdof_, - BdryNodeType bdrynode_, - boolType usegreedydirichlet_) : - kokkosMatrix(kokkosMatrix_), - coldofnnz(coldofnnz_), - blkSize(blkSize_), - coldofs(coldofs_), - dof2node(dof2node_), - colnodennz(colnodennz_), - dirichletdof(dirichletdof_), - bdrynode(bdrynode_), - usegreedydirichlet(usegreedydirichlet_) { - } + nnz += rownnz; + } - KOKKOS_INLINE_FUNCTION - void operator()(const LO rowNode, LO& nnz) const { + private: + MatrixType A; + BndViewType bndNodes; + DropFunctorType dropFunctor; + + rows_type rowsA; + + typename rows_type::non_const_type rows; + typename cols_type::non_const_type colsAux; + typename vals_type::non_const_type valsAux; + + bool reuseGraph; + bool lumping; + bool aggregationMayCreateDirichlet; + impl_Scalar zero; +}; + +// collect number nonzeros of blkSize rows in nnz_(row+1) +template +class Stage1aVectorFunctor { + private: + typedef typename MatrixType::ordinal_type LO; + + public: + Stage1aVectorFunctor(MatrixType kokkosMatrix_, NnzType nnz_, blkSizeType blkSize_) + : kokkosMatrix(kokkosMatrix_) + , nnz(nnz_) + , blkSize(blkSize_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const LO row, LO& totalnnz) const { + // the following code is more or less what MergeRows is doing + // count nonzero entries in all dof rows associated with node row + LO nodeRowMaxNonZeros = 0; + for (LO j = 0; j < blkSize; j++) { + auto rowView = kokkosMatrix.row(row * blkSize + j); + nodeRowMaxNonZeros += rowView.length; + } + nnz(row + 1) = nodeRowMaxNonZeros; + totalnnz += nodeRowMaxNonZeros; + } - LO pos = coldofnnz(rowNode); - if( usegreedydirichlet ){ - bdrynode(rowNode) = false; - for (LO j = 0; j < blkSize; j++) { - auto rowView = kokkosMatrix.row(rowNode * blkSize + j); - auto numIndices = rowView.length; - - // if any dof in the node is Dirichlet - if( dirichletdof(rowNode * blkSize + j) ) - bdrynode(rowNode) = true; - - for (decltype(numIndices) k = 0; k < numIndices; k++) { - auto dofID = rowView.colidx(k); - coldofs(pos) = dofID; - pos ++; - } - } - }else{ + private: + MatrixType kokkosMatrix; //< local matrix part + NnzType nnz; //< View containing number of nonzeros for current row + blkSizeType blkSize; //< block size (or partial block size in strided maps) +}; + +// build the dof-based column map containing the local dof ids belonging to blkSize rows in matrix +// sort column ids +// translate them into (unique) node ids +// count the node column ids per node row +template +class Stage1bcVectorFunctor { + private: + typedef typename MatrixType::ordinal_type LO; + + private: + MatrixType kokkosMatrix; //< local matrix part + NnzType coldofnnz; //< view containing start and stop indices for subviews + blkSizeType blkSize; //< block size (or partial block size in strided maps) + ColDofType coldofs; //< view containing the local dof ids associated with columns for the blkSize rows (not sorted) + Dof2NodeTranslationType dof2node; //< view containing the local node id associated with the local dof id + NnzType colnodennz; //< view containing number of column nodes for each node row + BdryNodeTypeConst dirichletdof; //< view containing with num dofs booleans. True if dof (not necessarily entire node) is dirichlet boundardy dof. + BdryNodeType bdrynode; //< view containing with numNodes booleans. True if node is (full) dirichlet boundardy node. + boolType usegreedydirichlet; //< boolean for use of greedy Dirichlet (if any dof is Dirichlet, entire node is dirichlet) default false (need all dofs in node to be Dirichlet for node to be Dirichlet) + + public: + Stage1bcVectorFunctor(MatrixType kokkosMatrix_, + NnzType coldofnnz_, + blkSizeType blkSize_, + ColDofType coldofs_, + Dof2NodeTranslationType dof2node_, + NnzType colnodennz_, + BdryNodeTypeConst dirichletdof_, + BdryNodeType bdrynode_, + boolType usegreedydirichlet_) + : kokkosMatrix(kokkosMatrix_) + , coldofnnz(coldofnnz_) + , blkSize(blkSize_) + , coldofs(coldofs_) + , dof2node(dof2node_) + , colnodennz(colnodennz_) + , dirichletdof(dirichletdof_) + , bdrynode(bdrynode_) + , usegreedydirichlet(usegreedydirichlet_) { + } + + KOKKOS_INLINE_FUNCTION + void operator()(const LO rowNode, LO& nnz) const { + LO pos = coldofnnz(rowNode); + if (usegreedydirichlet) { + bdrynode(rowNode) = false; + for (LO j = 0; j < blkSize; j++) { + auto rowView = kokkosMatrix.row(rowNode * blkSize + j); + auto numIndices = rowView.length; + + // if any dof in the node is Dirichlet + if (dirichletdof(rowNode * blkSize + j)) bdrynode(rowNode) = true; - for (LO j = 0; j < blkSize; j++) { - auto rowView = kokkosMatrix.row(rowNode * blkSize + j); - auto numIndices = rowView.length; - - // if any dof in the node is not Dirichlet - if( dirichletdof(rowNode * blkSize + j) == false ) - bdrynode(rowNode) = false; - - for (decltype(numIndices) k = 0; k < numIndices; k++) { - auto dofID = rowView.colidx(k); - coldofs(pos) = dofID; - pos ++; - } - } - } - // sort coldofs - LO begin = coldofnnz(rowNode); - LO end = coldofnnz(rowNode+1); - LO n = end - begin; - for (LO i = 0; i < (n-1); i++) { - for (LO j = 0; j < (n-i-1); j++) { - if (coldofs(j+begin) > coldofs(j+begin+1)) { - LO temp = coldofs(j+begin); - coldofs(j+begin) = coldofs(j+begin+1); - coldofs(j+begin+1) = temp; - } - } + for (decltype(numIndices) k = 0; k < numIndices; k++) { + auto dofID = rowView.colidx(k); + coldofs(pos) = dofID; + pos++; } - size_t cnt = 0; - LO lastNodeID = -1; - for (LO i = 0; i < n; i++) { - LO dofID = coldofs(begin + i); - LO nodeID = dof2node(dofID); - if(nodeID != lastNodeID) { - lastNodeID = nodeID; - coldofs(begin+cnt) = nodeID; - cnt++; - } - } - colnodennz(rowNode+1) = cnt; - nnz += cnt; - } - - }; - - // fill column node id view - template - class Stage1dVectorFunctor { - private: - typedef typename MatrixType::ordinal_type LO; - typedef typename MatrixType::value_type SC; - - private: - ColDofType coldofs; //< view containing mixed node and dof indices (only input) - ColDofNnzType coldofnnz; //< view containing the start and stop indices for subviews (dofs) - ColNodeType colnodes; //< view containing the local node ids associated with columns - ColNodeNnzType colnodennz; //< view containing start and stop indices for subviews - - public: - Stage1dVectorFunctor(ColDofType coldofs_, ColDofNnzType coldofnnz_, ColNodeType colnodes_, ColNodeNnzType colnodennz_) : - coldofs(coldofs_), - coldofnnz(coldofnnz_), - colnodes(colnodes_), - colnodennz(colnodennz_) { } + } else { + bdrynode(rowNode) = true; + for (LO j = 0; j < blkSize; j++) { + auto rowView = kokkosMatrix.row(rowNode * blkSize + j); + auto numIndices = rowView.length; - KOKKOS_INLINE_FUNCTION - void operator()(const LO rowNode) const { - auto dofbegin = coldofnnz(rowNode); - auto nodebegin = colnodennz(rowNode); - auto nodeend = colnodennz(rowNode+1); - auto n = nodeend - nodebegin; + // if any dof in the node is not Dirichlet + if (dirichletdof(rowNode * blkSize + j) == false) + bdrynode(rowNode) = false; - for (decltype(nodebegin) i = 0; i < n; i++) { - colnodes(nodebegin + i) = coldofs(dofbegin + i); + for (decltype(numIndices) k = 0; k < numIndices; k++) { + auto dofID = rowView.colidx(k); + coldofs(pos) = dofID; + pos++; } } - }; - + } - } // namespace + // sort coldofs + LO begin = coldofnnz(rowNode); + LO end = coldofnnz(rowNode + 1); + LO n = end - begin; + for (LO i = 0; i < (n - 1); i++) { + for (LO j = 0; j < (n - i - 1); j++) { + if (coldofs(j + begin) > coldofs(j + begin + 1)) { + LO temp = coldofs(j + begin); + coldofs(j + begin) = coldofs(j + begin + 1); + coldofs(j + begin + 1) = temp; + } + } + } + size_t cnt = 0; + LO lastNodeID = -1; + for (LO i = 0; i < n; i++) { + LO dofID = coldofs(begin + i); + LO nodeID = dof2node(dofID); + if (nodeID != lastNodeID) { + lastNodeID = nodeID; + coldofs(begin + cnt) = nodeID; + cnt++; + } + } + colnodennz(rowNode + 1) = cnt; + nnz += cnt; + } +}; + +// fill column node id view +template +class Stage1dVectorFunctor { + private: + typedef typename MatrixType::ordinal_type LO; + typedef typename MatrixType::value_type SC; + + private: + ColDofType coldofs; //< view containing mixed node and dof indices (only input) + ColDofNnzType coldofnnz; //< view containing the start and stop indices for subviews (dofs) + ColNodeType colnodes; //< view containing the local node ids associated with columns + ColNodeNnzType colnodennz; //< view containing start and stop indices for subviews + + public: + Stage1dVectorFunctor(ColDofType coldofs_, ColDofNnzType coldofnnz_, ColNodeType colnodes_, ColNodeNnzType colnodennz_) + : coldofs(coldofs_) + , coldofnnz(coldofnnz_) + , colnodes(colnodes_) + , colnodennz(colnodennz_) { + } - template - RCP CoalesceDropFactory_kokkos>::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); + KOKKOS_INLINE_FUNCTION + void operator()(const LO rowNode) const { + auto dofbegin = coldofnnz(rowNode); + auto nodebegin = colnodennz(rowNode); + auto nodeend = colnodennz(rowNode + 1); + auto n = nodeend - nodebegin; -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: drop tol"); - SET_VALID_ENTRY("aggregation: Dirichlet threshold"); - SET_VALID_ENTRY("aggregation: drop scheme"); - SET_VALID_ENTRY("aggregation: dropping may create Dirichlet"); - SET_VALID_ENTRY("aggregation: greedy Dirichlet"); - SET_VALID_ENTRY("filtered matrix: use lumping"); - SET_VALID_ENTRY("filtered matrix: reuse graph"); - SET_VALID_ENTRY("filtered matrix: reuse eigenvalue"); - SET_VALID_ENTRY("aggregation: use ml scaling of drop tol"); - { - typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; - validParamList->getEntry("aggregation: drop scheme").setValidator( - rcp(new validatorType(Teuchos::tuple("classical", "distance laplacian"), "aggregation: drop scheme"))); + for (decltype(nodebegin) i = 0; i < n; i++) { + colnodes(nodebegin + i) = coldofs(dofbegin + i); } -#undef SET_VALID_ENTRY - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< RCP >("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo"); - validParamList->set< RCP >("Coordinates", Teuchos::null, "Generating factory for Coordinates"); - - return validParamList; } +}; + +} // namespace CoalesceDrop_Kokkos_Details - template - void CoalesceDropFactory_kokkos>::DeclareInput(Level ¤tLevel) const { - Input(currentLevel, "A"); - Input(currentLevel, "UnAmalgamationInfo"); +template +RCP CoalesceDropFactory_kokkos>::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - const ParameterList& pL = GetParameterList(); - if (pL.get("aggregation: drop scheme") == "distance laplacian") - Input(currentLevel, "Coordinates"); +#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("aggregation: drop tol"); + SET_VALID_ENTRY("aggregation: Dirichlet threshold"); + SET_VALID_ENTRY("aggregation: drop scheme"); + SET_VALID_ENTRY("aggregation: dropping may create Dirichlet"); + SET_VALID_ENTRY("aggregation: greedy Dirichlet"); + SET_VALID_ENTRY("filtered matrix: use lumping"); + SET_VALID_ENTRY("filtered matrix: reuse graph"); + SET_VALID_ENTRY("filtered matrix: reuse eigenvalue"); + SET_VALID_ENTRY("aggregation: use ml scaling of drop tol"); + { + typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; + validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new validatorType(Teuchos::tuple("classical", "distance laplacian"), "aggregation: drop scheme"))); } +#undef SET_VALID_ENTRY + validParamList->set>("A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo"); + validParamList->set>("Coordinates", Teuchos::null, "Generating factory for Coordinates"); - template - void CoalesceDropFactory_kokkos>:: - Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); + return validParamList; +} - typedef Teuchos::ScalarTraits STS; - typedef typename STS::magnitudeType MT; - const MT zero = Teuchos::ScalarTraits::zero(); +template +void CoalesceDropFactory_kokkos>::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + Input(currentLevel, "UnAmalgamationInfo"); - auto A = Get< RCP >(currentLevel, "A"); + const ParameterList& pL = GetParameterList(); + if (pL.get("aggregation: drop scheme") == "distance laplacian") + Input(currentLevel, "Coordinates"); +} +template +void CoalesceDropFactory_kokkos>:: + Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); - /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme. - blkSize is the number of storage blocks that must kept together during the amalgamation process. + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType MT; + const MT zero = Teuchos::ScalarTraits::zero(); - Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold: + auto A = Get>(currentLevel, "A"); - numPDEs = blkSize * storageblocksize. - - If numPDEs==1 - Matrix is point storage (classical CRS storage). storageblocksize=1 and blkSize=1 - No other values makes sense. + /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme. + blkSize is the number of storage blocks that must kept together during the amalgamation process. - If numPDEs>1 - If matrix uses point storage, then storageblocksize=1 and blkSize=numPDEs. - If matrix uses block storage, with block size of n, then storageblocksize=n, and blkSize=numPDEs/n. - Thus far, only storageblocksize=numPDEs and blkSize=1 has been tested. - */ - - TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0,Exceptions::RuntimeError,"A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()"); - LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize(); + Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold: - auto amalInfo = Get< RCP >(currentLevel, "UnAmalgamationInfo"); + numPDEs = blkSize * storageblocksize. - const ParameterList& pL = GetParameterList(); + If numPDEs==1 + Matrix is point storage (classical CRS storage). storageblocksize=1 and blkSize=1 + No other values makes sense. - // Sanity Checking: ML drop tol scaling is not supported in UncoupledAggregation_Kokkos - TEUCHOS_TEST_FOR_EXCEPTION( pL.get("aggregation: use ml scaling of drop tol"),std::invalid_argument,"Option: 'aggregation: use ml scaling of drop tol' is not supported in the Kokkos version of CoalesceDroPFactory"); + If numPDEs>1 + If matrix uses point storage, then storageblocksize=1 and blkSize=numPDEs. + If matrix uses block storage, with block size of n, then storageblocksize=n, and blkSize=numPDEs/n. + Thus far, only storageblocksize=numPDEs and blkSize=1 has been tested. + */ + TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()"); + LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize(); + auto amalInfo = Get>(currentLevel, "UnAmalgamationInfo"); - std::string algo = pL.get("aggregation: drop scheme"); + const ParameterList& pL = GetParameterList(); - double threshold = pL.get("aggregation: drop tol"); - GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold - << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + // Sanity Checking: ML drop tol scaling is not supported in UncoupledAggregation_Kokkos + TEUCHOS_TEST_FOR_EXCEPTION(pL.get("aggregation: use ml scaling of drop tol"), std::invalid_argument, "Option: 'aggregation: use ml scaling of drop tol' is not supported in the Kokkos version of CoalesceDroPFactory"); - const typename STS::magnitudeType dirichletThreshold = - STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); + std::string algo = pL.get("aggregation: drop scheme"); - GO numDropped = 0, numTotal = 0; + double threshold = pL.get("aggregation: drop tol"); + GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold + << ", blocksize = " << A->GetFixedBlockSize() << std::endl; - RCP graph; - LO dofsPerNode = -1; + const typename STS::magnitudeType dirichletThreshold = + STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); - typedef typename LWGraph_kokkos::boundary_nodes_type boundary_nodes_type; - boundary_nodes_type boundaryNodes; + GO numDropped = 0, numTotal = 0; - RCP filteredA; - if (blkSize == 1 && threshold == zero) { - // Scalar problem without dropping + RCP graph; + LO dofsPerNode = -1; - // Detect and record rows that correspond to Dirichlet boundary conditions - boundaryNodes = Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold); + typedef typename LWGraph_kokkos::boundary_nodes_type boundary_nodes_type; + boundary_nodes_type boundaryNodes; - // Trivial LWGraph construction - graph = rcp(new LWGraph_kokkos(A->getCrsGraph()->getLocalGraphDevice(), A->getRowMap(), A->getColMap(), "graph of A")); - graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes); + RCP filteredA; + if (blkSize == 1 && threshold == zero) { + // Scalar problem without dropping - numTotal = A->getLocalNumEntries(); - dofsPerNode = 1; + // Detect and record rows that correspond to Dirichlet boundary conditions + boundaryNodes = Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold); - filteredA = A; + // Trivial LWGraph construction + graph = rcp(new LWGraph_kokkos(A->getCrsGraph()->getLocalGraphDevice(), A->getRowMap(), A->getColMap(), "graph of A")); + graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes); - } else if (blkSize == 1 && threshold != zero) { - // Scalar problem with dropping + numTotal = A->getLocalNumEntries(); + dofsPerNode = 1; - typedef typename Matrix::local_matrix_type local_matrix_type; - typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type; - typedef typename kokkos_graph_type::row_map_type::non_const_type rows_type; - typedef typename kokkos_graph_type::entries_type::non_const_type cols_type; - typedef typename local_matrix_type::values_type::non_const_type vals_type; + filteredA = A; - LO numRows = A->getLocalNumRows(); - local_matrix_type kokkosMatrix = A->getLocalMatrixDevice(); - auto nnzA = kokkosMatrix.nnz(); - auto rowsA = kokkosMatrix.graph.row_map; + } else if (blkSize == 1 && threshold != zero) { + // Scalar problem with dropping + typedef typename Matrix::local_matrix_type local_matrix_type; + typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type; + typedef typename kokkos_graph_type::row_map_type::non_const_type rows_type; + typedef typename kokkos_graph_type::entries_type::non_const_type cols_type; + typedef typename local_matrix_type::values_type::non_const_type vals_type; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::val_type impl_Scalar; - typedef Kokkos::ArithTraits impl_ATS; + LO numRows = A->getLocalNumRows(); + local_matrix_type kokkosMatrix = A->getLocalMatrixDevice(); + auto nnzA = kokkosMatrix.nnz(); + auto rowsA = kokkosMatrix.graph.row_map; - bool reuseGraph = pL.get("filtered matrix: reuse graph"); - bool lumping = pL.get("filtered matrix: use lumping"); - if (lumping) - GetOStream(Runtime0) << "Lumping dropped entries" << std::endl; + typedef Kokkos::ArithTraits ATS; + typedef typename ATS::val_type impl_Scalar; + typedef Kokkos::ArithTraits impl_ATS; - const bool aggregationMayCreateDirichlet = pL.get("aggregation: dropping may create Dirichlet"); + bool reuseGraph = pL.get("filtered matrix: reuse graph"); + bool lumping = pL.get("filtered matrix: use lumping"); + if (lumping) + GetOStream(Runtime0) << "Lumping dropped entries" << std::endl; - // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + setting a single value - rows_type rows ("FA_rows", numRows+1); - cols_type colsAux(Kokkos::ViewAllocateWithoutInitializing("FA_aux_cols"), nnzA); - vals_type valsAux; - if (reuseGraph) { - SubFactoryMonitor m2(*this, "CopyMatrix", currentLevel); + const bool aggregationMayCreateDirichlet = pL.get("aggregation: dropping may create Dirichlet"); - // Share graph with the original matrix - filteredA = MatrixFactory::Build(A->getCrsGraph()); + // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + setting a single value + rows_type rows("FA_rows", numRows + 1); + cols_type colsAux(Kokkos::ViewAllocateWithoutInitializing("FA_aux_cols"), nnzA); + vals_type valsAux; + if (reuseGraph) { + SubFactoryMonitor m2(*this, "CopyMatrix", currentLevel); - // Do a no-op fill-complete - RCP fillCompleteParams(new ParameterList); - fillCompleteParams->set("No Nonlocal Changes", true); - filteredA->fillComplete(fillCompleteParams); + // Share graph with the original matrix + filteredA = MatrixFactory::Build(A->getCrsGraph()); - // No need to reuseFill, just modify in place - valsAux = filteredA->getLocalMatrixDevice().values; + // Do a no-op fill-complete + RCP fillCompleteParams(new ParameterList); + fillCompleteParams->set("No Nonlocal Changes", true); + filteredA->fillComplete(fillCompleteParams); - } else { - // Need an extra array to compress - valsAux = vals_type(Kokkos::ViewAllocateWithoutInitializing("FA_aux_vals"), nnzA); - } + // No need to reuseFill, just modify in place + valsAux = filteredA->getLocalMatrixDevice().values; - typename boundary_nodes_type::non_const_type bndNodes(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numRows); + } else { + // Need an extra array to compress + valsAux = vals_type(Kokkos::ViewAllocateWithoutInitializing("FA_aux_vals"), nnzA); + } - LO nnzFA = 0; - { - if (algo == "classical") { - // Construct overlapped matrix diagonal - RCP ghostedDiag; - { - kokkosMatrix = local_matrix_type(); - SubFactoryMonitor m2(*this, "Ghosted diag construction", currentLevel); - ghostedDiag = Utilities::GetMatrixOverlappedDiagonal(*A); - kokkosMatrix=A->getLocalMatrixDevice(); - } + typename boundary_nodes_type::non_const_type bndNodes(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numRows); - // Filter out entries - { - SubFactoryMonitor m2(*this, "MainLoop", currentLevel); + LO nnzFA = 0; + { + if (algo == "classical") { + // Construct overlapped matrix diagonal + RCP ghostedDiag; + { + kokkosMatrix = local_matrix_type(); + SubFactoryMonitor m2(*this, "Ghosted diag construction", currentLevel); + ghostedDiag = Utilities::GetMatrixOverlappedDiagonal(*A); + kokkosMatrix = A->getLocalMatrixDevice(); + } - auto ghostedDiagView = ghostedDiag->getDeviceLocalView(Xpetra::Access::ReadWrite); + // Filter out entries + { + SubFactoryMonitor m2(*this, "MainLoop", currentLevel); - CoalesceDrop_Kokkos_Details::ClassicalDropFunctor dropFunctor(ghostedDiagView, threshold); - CoalesceDrop_Kokkos_Details::ScalarFunctor + auto ghostedDiagView = ghostedDiag->getDeviceLocalView(Xpetra::Access::ReadWrite); + + CoalesceDrop_Kokkos_Details::ClassicalDropFunctor dropFunctor(ghostedDiagView, threshold); + CoalesceDrop_Kokkos_Details::ScalarFunctor scalarFunctor(kokkosMatrix, bndNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, aggregationMayCreateDirichlet); - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:main_loop", range_type(0,numRows), - scalarFunctor, nnzFA); - } + Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:main_loop", range_type(0, numRows), + scalarFunctor, nnzFA); + } - } else if (algo == "distance laplacian") { - typedef Xpetra::MultiVector::magnitudeType,LO,GO,NO> doubleMultiVector; - auto coords = Get >(currentLevel, "Coordinates"); + } else if (algo == "distance laplacian") { + typedef Xpetra::MultiVector::magnitudeType, LO, GO, NO> doubleMultiVector; + auto coords = Get>(currentLevel, "Coordinates"); - auto uniqueMap = A->getRowMap(); - auto nonUniqueMap = A->getColMap(); + auto uniqueMap = A->getRowMap(); + auto nonUniqueMap = A->getColMap(); - // Construct ghosted coordinates - RCP importer; - { - SubFactoryMonitor m2(*this, "Coords Import construction", currentLevel); - importer = ImportFactory::Build(uniqueMap, nonUniqueMap); - } - RCP ghostedCoords; - { - SubFactoryMonitor m2(*this, "Ghosted coords construction", currentLevel); - ghostedCoords = Xpetra::MultiVectorFactory::magnitudeType,LO,GO,NO>::Build(nonUniqueMap, coords->getNumVectors()); - ghostedCoords->doImport(*coords, *importer, Xpetra::INSERT); - } + // Construct ghosted coordinates + RCP importer; + { + SubFactoryMonitor m2(*this, "Coords Import construction", currentLevel); + importer = ImportFactory::Build(uniqueMap, nonUniqueMap); + } + RCP ghostedCoords; + { + SubFactoryMonitor m2(*this, "Ghosted coords construction", currentLevel); + ghostedCoords = Xpetra::MultiVectorFactory::magnitudeType, LO, GO, NO>::Build(nonUniqueMap, coords->getNumVectors()); + ghostedCoords->doImport(*coords, *importer, Xpetra::INSERT); + } - auto ghostedCoordsView = ghostedCoords->getDeviceLocalView(Xpetra::Access::ReadWrite); - CoalesceDrop_Kokkos_Details::DistanceFunctor distFunctor(ghostedCoordsView); + auto ghostedCoordsView = ghostedCoords->getDeviceLocalView(Xpetra::Access::ReadWrite); + CoalesceDrop_Kokkos_Details::DistanceFunctor distFunctor(ghostedCoordsView); - // Construct Laplacian diagonal - RCP localLaplDiag; - { - SubFactoryMonitor m2(*this, "Local Laplacian diag construction", currentLevel); + // Construct Laplacian diagonal + RCP localLaplDiag; + { + SubFactoryMonitor m2(*this, "Local Laplacian diag construction", currentLevel); - localLaplDiag = VectorFactory::Build(uniqueMap); + localLaplDiag = VectorFactory::Build(uniqueMap); - auto localLaplDiagView = localLaplDiag->getDeviceLocalView(Xpetra::Access::OverwriteAll); - auto kokkosGraph = kokkosMatrix.graph; + auto localLaplDiagView = localLaplDiag->getDeviceLocalView(Xpetra::Access::OverwriteAll); + auto kokkosGraph = kokkosMatrix.graph; - Kokkos::parallel_for("MueLu:CoalesceDropF:Build:scalar_filter:laplacian_diag", range_type(0,numRows), + Kokkos::parallel_for( + "MueLu:CoalesceDropF:Build:scalar_filter:laplacian_diag", range_type(0, numRows), KOKKOS_LAMBDA(const LO row) { auto rowView = kokkosGraph.rowConst(row); auto length = rowView.length; @@ -685,264 +674,267 @@ namespace MueLu { for (decltype(length) colID = 0; colID < length; colID++) { auto col = rowView(colID); if (row != col) - d += impl_ATS::one()/distFunctor.distance2(row, col); + d += impl_ATS::one() / distFunctor.distance2(row, col); } - localLaplDiagView(row,0) = d; + localLaplDiagView(row, 0) = d; }); - } - - // Construct ghosted Laplacian diagonal - RCP ghostedLaplDiag; - { - SubFactoryMonitor m2(*this, "Ghosted Laplacian diag construction", currentLevel); - ghostedLaplDiag = VectorFactory::Build(nonUniqueMap); - ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT); - } - - // Filter out entries - { - SubFactoryMonitor m2(*this, "MainLoop", currentLevel); - - auto ghostedLaplDiagView = ghostedLaplDiag->getDeviceLocalView(Xpetra::Access::ReadWrite); - - CoalesceDrop_Kokkos_Details::DistanceLaplacianDropFunctor - dropFunctor(ghostedLaplDiagView, distFunctor, threshold); - CoalesceDrop_Kokkos_Details::ScalarFunctor - scalarFunctor(kokkosMatrix, bndNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, true); - - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:main_loop", range_type(0,numRows), - scalarFunctor, nnzFA); - } } + // Construct ghosted Laplacian diagonal + RCP ghostedLaplDiag; + { + SubFactoryMonitor m2(*this, "Ghosted Laplacian diag construction", currentLevel); + ghostedLaplDiag = VectorFactory::Build(nonUniqueMap); + ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT); + } + + // Filter out entries + { + SubFactoryMonitor m2(*this, "MainLoop", currentLevel); + + auto ghostedLaplDiagView = ghostedLaplDiag->getDeviceLocalView(Xpetra::Access::ReadWrite); + + CoalesceDrop_Kokkos_Details::DistanceLaplacianDropFunctor + dropFunctor(ghostedLaplDiagView, distFunctor, threshold); + CoalesceDrop_Kokkos_Details::ScalarFunctor + scalarFunctor(kokkosMatrix, bndNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, true); + + Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:main_loop", range_type(0, numRows), + scalarFunctor, nnzFA); + } } - numDropped = nnzA - nnzFA; + } + numDropped = nnzA - nnzFA; - boundaryNodes = bndNodes; + boundaryNodes = bndNodes; - { - SubFactoryMonitor m2(*this, "CompressRows", currentLevel); + { + SubFactoryMonitor m2(*this, "CompressRows", currentLevel); - // parallel_scan (exclusive) - Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:compress_rows", range_type(0,numRows+1), + // parallel_scan (exclusive) + Kokkos::parallel_scan( + "MueLu:CoalesceDropF:Build:scalar_filter:compress_rows", range_type(0, numRows + 1), KOKKOS_LAMBDA(const LO i, LO& update, const bool& final_pass) { update += rows(i); if (final_pass) rows(i) = update; }); - } + } - // Compress cols (and optionally vals) - // We use a trick here: we moved all remaining elements to the beginning - // of the original row in the main loop, so we don't need to check for - // INVALID here, and just stop when achieving the new number of elements - // per row. - cols_type cols(Kokkos::ViewAllocateWithoutInitializing("FA_cols"), nnzFA); - vals_type vals; - if (reuseGraph) { - GetOStream(Runtime1) << "reuse matrix graph for filtering (compress matrix columns only)" << std::endl; - // Only compress cols - SubFactoryMonitor m2(*this, "CompressColsAndVals", currentLevel); - - Kokkos::parallel_for("MueLu:TentativePF:Build:compress_cols", range_type(0,numRows), + // Compress cols (and optionally vals) + // We use a trick here: we moved all remaining elements to the beginning + // of the original row in the main loop, so we don't need to check for + // INVALID here, and just stop when achieving the new number of elements + // per row. + cols_type cols(Kokkos::ViewAllocateWithoutInitializing("FA_cols"), nnzFA); + vals_type vals; + if (reuseGraph) { + GetOStream(Runtime1) << "reuse matrix graph for filtering (compress matrix columns only)" << std::endl; + // Only compress cols + SubFactoryMonitor m2(*this, "CompressColsAndVals", currentLevel); + + Kokkos::parallel_for( + "MueLu:TentativePF:Build:compress_cols", range_type(0, numRows), KOKKOS_LAMBDA(const LO i) { // Is there Kokkos memcpy? - LO rowStart = rows(i); - LO rowAStart = rowsA(i); - size_t rownnz = rows(i+1) - rows(i); + LO rowStart = rows(i); + LO rowAStart = rowsA(i); + size_t rownnz = rows(i + 1) - rows(i); for (size_t j = 0; j < rownnz; j++) - cols(rowStart+j) = colsAux(rowAStart+j); + cols(rowStart + j) = colsAux(rowAStart + j); }); - } else { - // Compress cols and vals - GetOStream(Runtime1) << "new matrix graph for filtering (compress matrix columns and values)" << std::endl; - SubFactoryMonitor m2(*this, "CompressColsAndVals", currentLevel); + } else { + // Compress cols and vals + GetOStream(Runtime1) << "new matrix graph for filtering (compress matrix columns and values)" << std::endl; + SubFactoryMonitor m2(*this, "CompressColsAndVals", currentLevel); - vals = vals_type(Kokkos::ViewAllocateWithoutInitializing("FA_vals"), nnzFA); + vals = vals_type(Kokkos::ViewAllocateWithoutInitializing("FA_vals"), nnzFA); - Kokkos::parallel_for("MueLu:TentativePF:Build:compress_cols", range_type(0,numRows), + Kokkos::parallel_for( + "MueLu:TentativePF:Build:compress_cols", range_type(0, numRows), KOKKOS_LAMBDA(const LO i) { - LO rowStart = rows(i); - LO rowAStart = rowsA(i); - size_t rownnz = rows(i+1) - rows(i); + LO rowStart = rows(i); + LO rowAStart = rowsA(i); + size_t rownnz = rows(i + 1) - rows(i); for (size_t j = 0; j < rownnz; j++) { - cols(rowStart+j) = colsAux(rowAStart+j); - vals(rowStart+j) = valsAux(rowAStart+j); + cols(rowStart + j) = colsAux(rowAStart + j); + vals(rowStart + j) = valsAux(rowAStart + j); } }); - } + } - kokkos_graph_type kokkosGraph(cols, rows); + kokkos_graph_type kokkosGraph(cols, rows); - { - SubFactoryMonitor m2(*this, "LWGraph construction", currentLevel); + { + SubFactoryMonitor m2(*this, "LWGraph construction", currentLevel); - graph = rcp(new LWGraph_kokkos(kokkosGraph, A->getRowMap(), A->getColMap(), "filtered graph of A")); - graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes); - } + graph = rcp(new LWGraph_kokkos(kokkosGraph, A->getRowMap(), A->getColMap(), "filtered graph of A")); + graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes); + } - numTotal = A->getLocalNumEntries(); + numTotal = A->getLocalNumEntries(); - dofsPerNode = 1; + dofsPerNode = 1; - if (!reuseGraph) { - SubFactoryMonitor m2(*this, "LocalMatrix+FillComplete", currentLevel); + if (!reuseGraph) { + SubFactoryMonitor m2(*this, "LocalMatrix+FillComplete", currentLevel); - local_matrix_type localFA = local_matrix_type("A", numRows, A->getLocalMatrixDevice().numCols(), nnzFA, vals, rows, cols); - auto filteredACrs = CrsMatrixFactory::Build(localFA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap(), - A->getCrsGraph()->getImporter(), A->getCrsGraph()->getExporter()); - filteredA = rcp(new CrsMatrixWrap(filteredACrs)); - } + local_matrix_type localFA = local_matrix_type("A", numRows, A->getLocalMatrixDevice().numCols(), nnzFA, vals, rows, cols); + auto filteredACrs = CrsMatrixFactory::Build(localFA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap(), + A->getCrsGraph()->getImporter(), A->getCrsGraph()->getExporter()); + filteredA = rcp(new CrsMatrixWrap(filteredACrs)); + } - filteredA->SetFixedBlockSize(A->GetFixedBlockSize()); + filteredA->SetFixedBlockSize(A->GetFixedBlockSize()); - if (pL.get("filtered matrix: reuse eigenvalue")) { - // Reuse max eigenvalue from A - // It is unclear what eigenvalue is the best for the smoothing, but we already may have - // the D^{-1}A estimate in A, may as well use it. - // NOTE: ML does that too - filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate()); - } else { - filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); - } + if (pL.get("filtered matrix: reuse eigenvalue")) { + // Reuse max eigenvalue from A + // It is unclear what eigenvalue is the best for the smoothing, but we already may have + // the D^{-1}A estimate in A, may as well use it. + // NOTE: ML does that too + filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate()); + } else { + filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); + } - } else if (blkSize > 1 && threshold == zero) { - // Case 3: block problem without filtering - // - // FIXME_KOKKOS: this code is completely unoptimized. It really should do - // a very simple thing: merge rows and produce nodal graph. But the code - // seems very complicated. Can we do better? - - TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiply of " << blkSize); - - const RCP rowMap = A->getRowMap(); - const RCP colMap = A->getColMap(); - - // build a node row map (uniqueMap = non-overlapping) and a node column map - // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation - // stored in the AmalgamationInfo class container contain the local node id - // given a local dof id. The data is calculated in the AmalgamationFactory and - // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo) - const RCP uniqueMap = amalInfo->getNodeRowMap(); - const RCP nonUniqueMap = amalInfo->getNodeColMap(); - Array rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View? - Array colTranslationArray = *(amalInfo->getColTranslation()); - - Kokkos::View - rowTranslationView(rowTranslationArray.getRawPtr(),rowTranslationArray.size() ); - Kokkos::View - colTranslationView(colTranslationArray.getRawPtr(),colTranslationArray.size() ); - - // get number of local nodes - LO numNodes = Teuchos::as(uniqueMap->getLocalNumElements()); - typedef typename Kokkos::View id_translation_type; - id_translation_type rowTranslation("dofId2nodeId",rowTranslationArray.size()); - id_translation_type colTranslation("ov_dofId2nodeId",colTranslationArray.size()); - Kokkos::deep_copy(rowTranslation, rowTranslationView); - Kokkos::deep_copy(colTranslation, colTranslationView); - - // extract striding information - blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map) - LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map - LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size) - if(A->IsView("stridedMaps") == true) { - const RCP myMap = A->getRowMap("stridedMaps"); - const RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap"); - blkSize = Teuchos::as(strMap->getFixedBlockSize()); - blkId = strMap->getStridedBlockId(); - if (blkId > -1) - blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); - } - auto kokkosMatrix = A->getLocalMatrixDevice(); // access underlying kokkos data - - // - typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type; - typedef typename kokkos_graph_type::row_map_type row_map_type; - //typedef typename row_map_type::HostMirror row_map_type_h; - typedef typename kokkos_graph_type::entries_type entries_type; - - // Stage 1c: get number of dof-nonzeros per blkSize node rows - typename row_map_type::non_const_type dofNnz("nnz_map", numNodes + 1); - LO numDofCols = 0; - CoalesceDrop_Kokkos_Details::Stage1aVectorFunctor stage1aFunctor(kokkosMatrix, dofNnz, blkPartSize); - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1a", range_type(0,numNodes), stage1aFunctor, numDofCols); - // parallel_scan (exclusive) - CoalesceDrop_Kokkos_Details::ScanFunctor scanFunctor(dofNnz); - Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0,numNodes+1), scanFunctor); + } else if (blkSize > 1 && threshold == zero) { + // Case 3: block problem without filtering + // + // FIXME_KOKKOS: this code is completely unoptimized. It really should do + // a very simple thing: merge rows and produce nodal graph. But the code + // seems very complicated. Can we do better? + + TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiply of " << blkSize); + + const RCP rowMap = A->getRowMap(); + const RCP colMap = A->getColMap(); + + // build a node row map (uniqueMap = non-overlapping) and a node column map + // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation + // stored in the AmalgamationInfo class container contain the local node id + // given a local dof id. The data is calculated in the AmalgamationFactory and + // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo) + const RCP uniqueMap = amalInfo->getNodeRowMap(); + const RCP nonUniqueMap = amalInfo->getNodeColMap(); + Array rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View? + Array colTranslationArray = *(amalInfo->getColTranslation()); + + Kokkos::View + rowTranslationView(rowTranslationArray.getRawPtr(), rowTranslationArray.size()); + Kokkos::View + colTranslationView(colTranslationArray.getRawPtr(), colTranslationArray.size()); + + // get number of local nodes + LO numNodes = Teuchos::as(uniqueMap->getLocalNumElements()); + typedef typename Kokkos::View id_translation_type; + id_translation_type rowTranslation("dofId2nodeId", rowTranslationArray.size()); + id_translation_type colTranslation("ov_dofId2nodeId", colTranslationArray.size()); + Kokkos::deep_copy(rowTranslation, rowTranslationView); + Kokkos::deep_copy(colTranslation, colTranslationView); + + // extract striding information + blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map) + LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map + LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size) + if (A->IsView("stridedMaps") == true) { + const RCP myMap = A->getRowMap("stridedMaps"); + const RCP strMap = Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap"); + blkSize = Teuchos::as(strMap->getFixedBlockSize()); + blkId = strMap->getStridedBlockId(); + if (blkId > -1) + blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); + } + auto kokkosMatrix = A->getLocalMatrixDevice(); // access underlying kokkos data - // Detect and record dof rows that correspond to Dirichlet boundary conditions - boundary_nodes_type singleEntryRows = Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold); + // + typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type; + typedef typename kokkos_graph_type::row_map_type row_map_type; + // typedef typename row_map_type::HostMirror row_map_type_h; + typedef typename kokkos_graph_type::entries_type entries_type; - typename entries_type::non_const_type dofcols("dofcols", numDofCols/*dofNnz(numNodes)*/); // why does dofNnz(numNodes) work? should be a parallel reduce, i guess + // Stage 1c: get number of dof-nonzeros per blkSize node rows + typename row_map_type::non_const_type dofNnz("nnz_map", numNodes + 1); + LO numDofCols = 0; + CoalesceDrop_Kokkos_Details::Stage1aVectorFunctor stage1aFunctor(kokkosMatrix, dofNnz, blkPartSize); + Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1a", range_type(0, numNodes), stage1aFunctor, numDofCols); + // parallel_scan (exclusive) + CoalesceDrop_Kokkos_Details::ScanFunctor scanFunctor(dofNnz); + Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0, numNodes + 1), scanFunctor); - // we have dofcols and dofids from Stage1dVectorFunctor - LO numNodeCols = 0; - typename row_map_type::non_const_type rows("nnz_nodemap", numNodes + 1); - typename boundary_nodes_type::non_const_type bndNodes("boundaryNodes", numNodes); + // Detect and record dof rows that correspond to Dirichlet boundary conditions + boundary_nodes_type singleEntryRows = Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold); - CoalesceDrop_Kokkos_Details::Stage1bcVectorFunctor stage1bcFunctor(kokkosMatrix, dofNnz, blkPartSize, dofcols, colTranslation, rows, singleEntryRows, bndNodes, pL.get("aggregation: greedy Dirichlet")); - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0,numNodes), stage1bcFunctor,numNodeCols); + typename entries_type::non_const_type dofcols("dofcols", numDofCols /*dofNnz(numNodes)*/); // why does dofNnz(numNodes) work? should be a parallel reduce, i guess - // parallel_scan (exclusive) - CoalesceDrop_Kokkos_Details::ScanFunctor scanNodeFunctor(rows); - Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0,numNodes+1), scanNodeFunctor); + // we have dofcols and dofids from Stage1dVectorFunctor + LO numNodeCols = 0; + typename row_map_type::non_const_type rows("nnz_nodemap", numNodes + 1); + typename boundary_nodes_type::non_const_type bndNodes("boundaryNodes", numNodes); - // create column node view - typename entries_type::non_const_type cols("nodecols", numNodeCols); + CoalesceDrop_Kokkos_Details::Stage1bcVectorFunctor stage1bcFunctor(kokkosMatrix, dofNnz, blkPartSize, dofcols, colTranslation, rows, singleEntryRows, bndNodes, pL.get("aggregation: greedy Dirichlet")); + Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0, numNodes), stage1bcFunctor, numNodeCols); + // parallel_scan (exclusive) + CoalesceDrop_Kokkos_Details::ScanFunctor scanNodeFunctor(rows); + Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0, numNodes + 1), scanNodeFunctor); - CoalesceDrop_Kokkos_Details::Stage1dVectorFunctor stage1dFunctor(dofcols, dofNnz, cols, rows); - Kokkos::parallel_for("MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0,numNodes), stage1dFunctor); - kokkos_graph_type kokkosGraph(cols, rows); + // create column node view + typename entries_type::non_const_type cols("nodecols", numNodeCols); - // create LW graph - graph = rcp(new LWGraph_kokkos(kokkosGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A")); + CoalesceDrop_Kokkos_Details::Stage1dVectorFunctor stage1dFunctor(dofcols, dofNnz, cols, rows); + Kokkos::parallel_for("MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0, numNodes), stage1dFunctor); + kokkos_graph_type kokkosGraph(cols, rows); - boundaryNodes = bndNodes; - graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes); - numTotal = A->getLocalNumEntries(); + // create LW graph + graph = rcp(new LWGraph_kokkos(kokkosGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A")); - dofsPerNode = blkSize; + boundaryNodes = bndNodes; + graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes); + numTotal = A->getLocalNumEntries(); - filteredA = A; + dofsPerNode = blkSize; - } else { - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu: CoalesceDropFactory_kokkos: Block filtering is not implemented"); - } + filteredA = A; + + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu: CoalesceDropFactory_kokkos: Block filtering is not implemented"); + } - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)), + Kokkos::parallel_reduce( + "MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)), KOKKOS_LAMBDA(const LO i, GO& n) { if (boundaryNodes(i)) n++; - }, numLocalBoundaryNodes); + }, + numLocalBoundaryNodes); - auto comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; - } + auto comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; + } - if ((GetVerbLevel() & Statistics1) && threshold != zero) { - auto comm = A->getRowMap()->getComm(); + if ((GetVerbLevel() & Statistics1) && threshold != zero) { + auto comm = A->getRowMap()->getComm(); - GO numGlobalTotal, numGlobalDropped; - MueLu_sumAll(comm, numTotal, numGlobalTotal); - MueLu_sumAll(comm, numDropped, numGlobalDropped); + GO numGlobalTotal, numGlobalDropped; + MueLu_sumAll(comm, numTotal, numGlobalTotal); + MueLu_sumAll(comm, numDropped, numGlobalDropped); - if (numGlobalTotal != 0) { - GetOStream(Statistics1) << "Number of dropped entries: " - << numGlobalDropped << "/" << numGlobalTotal - << " (" << 100*Teuchos::as(numGlobalDropped)/Teuchos::as(numGlobalTotal) << "%)" << std::endl; - } + if (numGlobalTotal != 0) { + GetOStream(Statistics1) << "Number of dropped entries: " + << numGlobalDropped << "/" << numGlobalTotal + << " (" << 100 * Teuchos::as(numGlobalDropped) / Teuchos::as(numGlobalTotal) << "%)" << std::endl; } - - Set(currentLevel, "DofsPerNode", dofsPerNode); - Set(currentLevel, "Graph", graph); - Set(currentLevel, "A", filteredA); } + + Set(currentLevel, "DofsPerNode", dofsPerNode); + Set(currentLevel, "Graph", graph); + Set(currentLevel, "A", filteredA); } -#endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP +} // namespace MueLu +#endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionBaseClass_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionBaseClass_decl.hpp index 7f94599c3ba1..a7fc2e3aad98 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionBaseClass_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionBaseClass_decl.hpp @@ -46,35 +46,32 @@ #ifndef MUELU_PREDROPFUNCTIONBASECLASS_DECL_HPP #define MUELU_PREDROPFUNCTIONBASECLASS_DECL_HPP - #include "MueLu_ConfigDefs.hpp" #include "MueLu_BaseClass.hpp" #include "MueLu_PreDropFunctionBaseClass_fwd.hpp" namespace MueLu { - /*! - * Base class you can derive from to allow user defined dropping - * - */ - template - class PreDropFunctionBaseClass : public BaseClass { +/*! + * Base class you can derive from to allow user defined dropping + * + */ +template +class PreDropFunctionBaseClass : public BaseClass { #undef MUELU_PREDROPFUNCTIONBASECLASS_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! Destructor - virtual ~PreDropFunctionBaseClass() { } - - //! Drop - virtual bool Drop(size_t lrow, GlobalOrdinal grow, size_t k, LocalOrdinal lcid, GlobalOrdinal gcid, const Teuchos::ArrayView & indices, const Teuchos::ArrayView & vals) = 0; + public: + //! Destructor + virtual ~PreDropFunctionBaseClass() {} - }; -} + //! Drop + virtual bool Drop(size_t lrow, GlobalOrdinal grow, size_t k, LocalOrdinal lcid, GlobalOrdinal gcid, const Teuchos::ArrayView& indices, const Teuchos::ArrayView& vals) = 0; +}; +} // namespace MueLu #define MUELU_PREDROPFUNCTIONBASECLASS_SHORT -#endif // MUELU_PREDROPFUNCTIONBASECLASS_DECL_HPP +#endif // MUELU_PREDROPFUNCTIONBASECLASS_DECL_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_decl.hpp index a42b58f56b18..599ca918572b 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_decl.hpp @@ -50,64 +50,59 @@ #include "MueLu_PreDropFunctionBaseClass.hpp" #include "MueLu_PreDropFunctionConstVal_fwd.hpp" - namespace MueLu { - /*! - * Example implementation for dropping values smaller then a constant threshold - * - */ - template - class PreDropFunctionConstVal : - public MueLu::PreDropFunctionBaseClass { +/*! + * Example implementation for dropping values smaller then a constant threshold + * + */ +template +class PreDropFunctionConstVal : public MueLu::PreDropFunctionBaseClass { #undef MUELU_PREDROPFUNCTIONCONSTVAL_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! Constructor - explicit PreDropFunctionConstVal(const Scalar threshold = 0.0); - - //! Destructor - virtual ~PreDropFunctionConstVal() { } - - /*! Drop - * @param lrow (size_t): local row index (=lrowid) - * @param grow (GlobalOrdinal: global row id - * @param k (size_t): local column iterator - * @param lcid (LocalOrdinal): local column id (=indices[k]) - * @param gcid (GlobalOrdinal): global column id - * @param indices (ArrrayView): array of local column ids in current row (lrow) - * @param vals (ArrayView): array of corresponding values in current row (lrow) - * @return bool: false, if value in (lrow, lcid) shall be kept, true if it should be dropped - */ - bool Drop(size_t lrow, GlobalOrdinal grow, size_t k, LocalOrdinal lcid, GlobalOrdinal gcid, const Teuchos::ArrayView & indices, const Teuchos::ArrayView & vals); - - //! Return threshold value. - Scalar GetThreshold() const; - - //! @name Overridden from Teuchos::Describable - //@{ + public: + //! Constructor + explicit PreDropFunctionConstVal(const Scalar threshold = 0.0); + + //! Destructor + virtual ~PreDropFunctionConstVal() {} + + /*! Drop + * @param lrow (size_t): local row index (=lrowid) + * @param grow (GlobalOrdinal: global row id + * @param k (size_t): local column iterator + * @param lcid (LocalOrdinal): local column id (=indices[k]) + * @param gcid (GlobalOrdinal): global column id + * @param indices (ArrrayView): array of local column ids in current row (lrow) + * @param vals (ArrayView): array of corresponding values in current row (lrow) + * @return bool: false, if value in (lrow, lcid) shall be kept, true if it should be dropped + */ + bool Drop(size_t lrow, GlobalOrdinal grow, size_t k, LocalOrdinal lcid, GlobalOrdinal gcid, const Teuchos::ArrayView& indices, const Teuchos::ArrayView& vals); - //! Return a simple one-line description of this object. - std::string description() const; + //! Return threshold value. + Scalar GetThreshold() const; - //! Print the object with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const; + //! @name Overridden from Teuchos::Describable + //@{ - //@} + //! Return a simple one-line description of this object. + std::string description() const; - private: + //! Print the object with some verbosity level to an FancyOStream object. + // using MueLu::Describable::describe; // overloading, not hiding + // void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const; - Scalar threshold_; + //@} - }; + private: + Scalar threshold_; +}; -} +} // namespace MueLu #define MUELU_PREDROPFUNCTIONCONSTVAL_SHORT -#endif // MUELU_PREDROPFUNCTIONCONSTVAL_DECL_HPP +#endif // MUELU_PREDROPFUNCTIONCONSTVAL_DECL_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_def.hpp index 4c1577d90146..75b35c447c45 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_def.hpp @@ -53,40 +53,39 @@ namespace MueLu { - template - PreDropFunctionConstVal::PreDropFunctionConstVal(const Scalar threshold) - : threshold_(threshold) { } +template +PreDropFunctionConstVal::PreDropFunctionConstVal(const Scalar threshold) + : threshold_(threshold) {} - template - bool PreDropFunctionConstVal::Drop(size_t /* lrow */, GlobalOrdinal grow, size_t k, LocalOrdinal /* lcid */, GlobalOrdinal gcid, const Teuchos::ArrayView & /* indices */, const Teuchos::ArrayView & vals) { - if(Teuchos::ScalarTraits::magnitude(vals[k]) > Teuchos::ScalarTraits::magnitude(threshold_) || grow == gcid ) { - return false; // keep values - } - return true; // values too small -> drop them +template +bool PreDropFunctionConstVal::Drop(size_t /* lrow */, GlobalOrdinal grow, size_t k, LocalOrdinal /* lcid */, GlobalOrdinal gcid, const Teuchos::ArrayView& /* indices */, const Teuchos::ArrayView& vals) { + if (Teuchos::ScalarTraits::magnitude(vals[k]) > Teuchos::ScalarTraits::magnitude(threshold_) || grow == gcid) { + return false; // keep values } + return true; // values too small -> drop them +} - template - Scalar PreDropFunctionConstVal::GetThreshold() const { - return threshold_; - } +template +Scalar PreDropFunctionConstVal::GetThreshold() const { + return threshold_; +} +template +std::string PreDropFunctionConstVal::description() const { + std::ostringstream out; + out << "PreDropFunctionConstVal: threshold = " << threshold_ << std::endl; + return out.str(); +} - template - std::string PreDropFunctionConstVal::description() const { - std::ostringstream out; - out << "PreDropFunctionConstVal: threshold = " << threshold_ << std::endl; - return out.str(); +/*template +void PreDropFunctionConstVal::describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { + MUELU_DESCRIBE; + if (verbLevel & Parameters0) { + out0 << "PreDropFunctionConstVal: threshold = " << threshold_ << std::endl; } +}*/ - /*template - void PreDropFunctionConstVal::describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { - MUELU_DESCRIBE; - if (verbLevel & Parameters0) { - out0 << "PreDropFunctionConstVal: threshold = " << threshold_ << std::endl; - } - }*/ - -} +} // namespace MueLu #define MUELU_PREDROPFUNCTIONCONSTVAL_SHORT -#endif // MUELU_PREDROPFUNCTIONCONSTVAL_DEF_HPP +#endif // MUELU_PREDROPFUNCTIONCONSTVAL_DEF_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_decl.hpp index b763c93cd955..062dd78fa71b 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_decl.hpp @@ -65,103 +65,100 @@ namespace MueLu { - /*! - @class SmooVecCoalesceDropFactory - @brief Factory for creating a graph base on a given matrix. - - Factory for creating graphs from matrices with entries selectively dropped. - - ## Code paths ## - - Experimental dropping function based on taking a set of random vectors u, running - a smoother on A u = 0, and then basing the drop decisions on "how smooth" the vectors - are local. Neighobring regions where the vectors are smooth can be aggregated - together and so these are kept in the associated drop matrix. Areas that are - not smooth should end up in different aggregates and so the A_ij representing - these should be dropped. This Factory can address both PDE systems and - scalar PDEs, always creating a matrix reprsenting nodal connections as opposed - to dof connections. - - To enter this factor as opposed to the more standard CoalesceDropFactory() one - must set "aggregation: drop scheme" to "unsupported vector smoothing". In this - case some of the parameter options associated with CoalesceDropFactory (e.g., - "aggregation: drop tol", "aggregation: Dirichlet threshold", "lightweight wrap") - will cause parameter validator errors. - - ## Input/output of SmooVecCoalesceDropFactory ## - - ### User parameters of SmooVecCoalesceDropFactory ### - Parameter | type | default | master.xml | validated | requested | description - ---------------------------|-----------|-----------|:----------:|:---------:|:---------:|------------ - A |Factory | null | | * | * | Generating factory of the operator A - "aggregation: drop scheme"|std::string|"classical"| * | * | * | Must choose "unsupported vector smoothing" - "aggregation: number of times to pre or post smooth"|int| 10|* | | * | Amount of pre or post smoothing invocations - "aggregation: number of random vectors"|int| 10 | * | * | * | Number of random vectors - "aggregation: penalty parameters"|Array(double)|{12.0,-.20}| * | * | * | Ultimately determines how much dropping is done - - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see SmooVecCoalesceDropFactory::GetValidParameters).
- The * in the @c requested column states that the data is requested as input with all dependencies (see SmooVecCoalesceDropFactory::DeclareInput). - - ### Variables provided by UncoupledAggregationFactory ### - - After SmooVecCoalesceDropFactory::Build the following data is available (if requested) - - Parameter | generated by | description - ----------|--------------|------------ - Graph | SmooVecCoalesceDropFactory | Graph of matrix A - DofsPerNode | SmooVecCoalesceDropFactory | number of DOFs per node. Note, that we assume a constant number of DOFs per node for all nodes associated with the operator A. - - */ - - template - class SmooVecCoalesceDropFactory : public SingleLevelFactoryBase { +/*! + @class SmooVecCoalesceDropFactory + @brief Factory for creating a graph base on a given matrix. + + Factory for creating graphs from matrices with entries selectively dropped. + + ## Code paths ## + + Experimental dropping function based on taking a set of random vectors u, running + a smoother on A u = 0, and then basing the drop decisions on "how smooth" the vectors + are local. Neighobring regions where the vectors are smooth can be aggregated + together and so these are kept in the associated drop matrix. Areas that are + not smooth should end up in different aggregates and so the A_ij representing + these should be dropped. This Factory can address both PDE systems and + scalar PDEs, always creating a matrix reprsenting nodal connections as opposed + to dof connections. + + To enter this factor as opposed to the more standard CoalesceDropFactory() one + must set "aggregation: drop scheme" to "unsupported vector smoothing". In this + case some of the parameter options associated with CoalesceDropFactory (e.g., + "aggregation: drop tol", "aggregation: Dirichlet threshold", "lightweight wrap") + will cause parameter validator errors. + + ## Input/output of SmooVecCoalesceDropFactory ## + + ### User parameters of SmooVecCoalesceDropFactory ### + Parameter | type | default | master.xml | validated | requested | description + ---------------------------|-----------|-----------|:----------:|:---------:|:---------:|------------ + A |Factory | null | | * | * | Generating factory of the operator A + "aggregation: drop scheme"|std::string|"classical"| * | * | * | Must choose "unsupported vector smoothing" + "aggregation: number of times to pre or post smooth"|int| 10|* | | * | Amount of pre or post smoothing invocations + "aggregation: number of random vectors"|int| 10 | * | * | * | Number of random vectors + "aggregation: penalty parameters"|Array(double)|{12.0,-.20}| * | * | * | Ultimately determines how much dropping is done + + The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
+ The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see SmooVecCoalesceDropFactory::GetValidParameters).
+ The * in the @c requested column states that the data is requested as input with all dependencies (see SmooVecCoalesceDropFactory::DeclareInput). + + ### Variables provided by UncoupledAggregationFactory ### + + After SmooVecCoalesceDropFactory::Build the following data is available (if requested) + + Parameter | generated by | description + ----------|--------------|------------ + Graph | SmooVecCoalesceDropFactory | Graph of matrix A + DofsPerNode | SmooVecCoalesceDropFactory | number of DOFs per node. Note, that we assume a constant number of DOFs per node for all nodes associated with the operator A. + +*/ + +template +class SmooVecCoalesceDropFactory : public SingleLevelFactoryBase { #undef MUELU_SMOOVECCOALESCEDROPFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: + public: + //! @name Constructors/Destructors. + //@{ - //! @name Constructors/Destructors. - //@{ + //! Constructor + SmooVecCoalesceDropFactory(); - //! Constructor - SmooVecCoalesceDropFactory(); + //! Destructor + virtual ~SmooVecCoalesceDropFactory() {} - //! Destructor - virtual ~SmooVecCoalesceDropFactory() { } + RCP GetValidParameterList() const; - RCP GetValidParameterList() const; + //@} - //@} + //! Input + //@{ - //! Input - //@{ + void DeclareInput(Level& currentLevel) const; - void DeclareInput(Level ¤tLevel) const; + /// set predrop function + void SetPreDropFunction(const RCP >& predrop) { predrop_ = predrop; } - /// set predrop function - void SetPreDropFunction(const RCP > &predrop) { predrop_ = predrop; } + //@} - //@} + void Build(Level& currentLevel) const; // Build - void Build(Level ¤tLevel) const; // Build + private: + // pre-drop function + mutable RCP predrop_; - private: + //! Methods to support compatible-relaxation style dropping + void badGuysCoalesceDrop(const Matrix& Amat, Teuchos::ArrayRCP& dropParams, LO nPDEs, const MultiVector& smoothedTVecs, const MultiVector& smoothedNull, RCP& filteredGraph) const; + void badGuysDropfunc(LO row, const Teuchos::ArrayView& indices, const Teuchos::ArrayView& vals, const MultiVector& smoothedTVecs, LO nPDEs, Teuchos::ArrayRCP& penalties, const MultiVector& smoothedNull, Teuchos::ArrayRCP& Bcols, Teuchos::ArrayRCP& keepOrNot, LO& Nbcols, LO nLoc) const; - // pre-drop function - mutable - RCP predrop_; +}; // class SmooVecCoalesceDropFactory - //! Methods to support compatible-relaxation style dropping - void badGuysCoalesceDrop(const Matrix& Amat, Teuchos::ArrayRCP & dropParams, LO nPDEs, const MultiVector& smoothedTVecs, const MultiVector& smoothedNull, RCP& filteredGraph) const; - void badGuysDropfunc(LO row, const Teuchos::ArrayView& indices, const Teuchos::ArrayView& vals, const MultiVector& smoothedTVecs, LO nPDEs, Teuchos::ArrayRCP & penalties, const MultiVector& smoothedNull, Teuchos::ArrayRCP& Bcols, Teuchos::ArrayRCP& keepOrNot, LO &Nbcols, LO nLoc) const; - - }; //class SmooVecCoalesceDropFactory - -} //namespace MueLu +} // namespace MueLu #define MUELU_SMOOVECCOALESCEDROPFACTORY_SHORT -#endif // MUELU_SMOOVECCOALESCEDROPFACTORY_DECL_HPP +#endif // MUELU_SMOOVECCOALESCEDROPFACTORY_DECL_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_def.hpp index 24a9a8334275..d6ad50c24f99 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_def.hpp @@ -69,7 +69,6 @@ #include "MueLu_Monitor.hpp" #include "MueLu_PreDropFunctionBaseClass.hpp" - #include #include @@ -84,160 +83,154 @@ #include #include - -#define poly0thOrderCoef 0 -#define poly1stOrderCoef 1 -#define poly2ndOrderCoef 2 -#define poly3rdOrderCoef 3 -#define poly4thOrderCoef 4 +#define poly0thOrderCoef 0 +#define poly1stOrderCoef 1 +#define poly2ndOrderCoef 2 +#define poly3rdOrderCoef 3 +#define poly4thOrderCoef 4 namespace MueLu { - template - RCP SmooVecCoalesceDropFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP SmooVecCoalesceDropFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: drop scheme"); - { - typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; - validParamList->getEntry("aggregation: drop scheme").setValidator( - rcp(new validatorType(Teuchos::tuple("unsupported vector smoothing"), "aggregation: drop scheme"))); - } - SET_VALID_ENTRY("aggregation: number of random vectors"); - SET_VALID_ENTRY("aggregation: number of times to pre or post smooth"); - SET_VALID_ENTRY("aggregation: penalty parameters"); -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< RCP >("PreSmoother", Teuchos::null, "Generating factory of the PreSmoother"); - validParamList->set< RCP >("PostSmoother", Teuchos::null, "Generating factory of the PostSmoother"); - - return validParamList; + SET_VALID_ENTRY("aggregation: drop scheme"); + { + typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; + validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new validatorType(Teuchos::tuple("unsupported vector smoothing"), "aggregation: drop scheme"))); } - - template - SmooVecCoalesceDropFactory::SmooVecCoalesceDropFactory() : predrop_(Teuchos::null) { } - - template - void SmooVecCoalesceDropFactory::DeclareInput(Level ¤tLevel) const { - Input(currentLevel, "A"); - if (currentLevel.IsAvailable("PreSmoother")) { // rst: totally unsure that this is legal - Input(currentLevel, "PreSmoother"); // my guess is that this is not yet available - } // so this always comes out false. - else if (currentLevel.IsAvailable("PostSmoother")) { // perhaps we can look on the param list? - Input(currentLevel, "PostSmoother"); - } + SET_VALID_ENTRY("aggregation: number of random vectors"); + SET_VALID_ENTRY("aggregation: number of times to pre or post smooth"); + SET_VALID_ENTRY("aggregation: penalty parameters"); +#undef SET_VALID_ENTRY + + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set >("PreSmoother", Teuchos::null, "Generating factory of the PreSmoother"); + validParamList->set >("PostSmoother", Teuchos::null, "Generating factory of the PostSmoother"); + + return validParamList; +} + +template +SmooVecCoalesceDropFactory::SmooVecCoalesceDropFactory() + : predrop_(Teuchos::null) {} + +template +void SmooVecCoalesceDropFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + if (currentLevel.IsAvailable("PreSmoother")) { // rst: totally unsure that this is legal + Input(currentLevel, "PreSmoother"); // my guess is that this is not yet available + } // so this always comes out false. + else if (currentLevel.IsAvailable("PostSmoother")) { // perhaps we can look on the param list? + Input(currentLevel, "PostSmoother"); } +} - template - void SmooVecCoalesceDropFactory::Build(Level ¤tLevel) const { - - FactoryMonitor m(*this, "Build", currentLevel); +template +void SmooVecCoalesceDropFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); - typedef Teuchos::ScalarTraits STS; + typedef Teuchos::ScalarTraits STS; - if (predrop_ != Teuchos::null) - GetOStream(Parameters0) << predrop_->description(); + if (predrop_ != Teuchos::null) + GetOStream(Parameters0) << predrop_->description(); - RCP A = Get< RCP >(currentLevel, "A"); + RCP A = Get >(currentLevel, "A"); - const ParameterList & pL = GetParameterList(); + const ParameterList& pL = GetParameterList(); - LO nPDEs = A->GetFixedBlockSize(); + LO nPDEs = A->GetFixedBlockSize(); - RCP< MultiVector > testVecs; - RCP< MultiVector > nearNull; + RCP testVecs; + RCP nearNull; #ifdef takeOut - testVecs = Xpetra::IO::ReadMultiVector("TpetraTVecs.mm", A->getRowMap()); + testVecs = Xpetra::IO::ReadMultiVector("TpetraTVecs.mm", A->getRowMap()); #endif - size_t numRandom= as(pL.get("aggregation: number of random vectors")); - testVecs = MultiVectorFactory::Build(A->getRowMap(), numRandom, true); - // use random test vectors but should be positive in order to not get - // crummy results ... so take abs() of randomize(). - testVecs->randomize(); - for (size_t kk = 0; kk < testVecs->getNumVectors(); kk++ ) { - Teuchos::ArrayRCP< Scalar > curVec = testVecs->getDataNonConst(kk); - for (size_t ii = kk; ii < as(A->getRowMap()->getLocalNumElements()); ii++ ) curVec[ii] = Teuchos::ScalarTraits::magnitude(curVec[ii]); - } - nearNull = MultiVectorFactory::Build(A->getRowMap(), nPDEs, true); - - // initialize null space to constants - for (size_t kk = 0; kk < nearNull->getNumVectors(); kk++ ) { - Teuchos::ArrayRCP< Scalar > curVec = nearNull->getDataNonConst(kk); - for (size_t ii = kk; ii < as(A->getRowMap()->getLocalNumElements()); ii += nearNull->getNumVectors() ) curVec[ii] = Teuchos::ScalarTraits::one(); - } - - RCP< MultiVector > zeroVec_TVecs; - RCP< MultiVector > zeroVec_Null; - - zeroVec_TVecs = MultiVectorFactory::Build(A->getRowMap(), testVecs->getNumVectors(), true); - zeroVec_Null = MultiVectorFactory::Build(A->getRowMap(), nPDEs, true); - zeroVec_TVecs->putScalar(Teuchos::ScalarTraits::zero()); - zeroVec_Null->putScalar( Teuchos::ScalarTraits::zero()); - - size_t nInvokeSmoother=as(pL.get("aggregation: number of times to pre or post smooth")); - if (currentLevel.IsAvailable("PreSmoother")) { - RCP preSmoo = currentLevel.Get< RCP >("PreSmoother"); - for (size_t ii = 0; ii < nInvokeSmoother; ii++) preSmoo->Apply(*testVecs,*zeroVec_TVecs,false); - for (size_t ii = 0; ii < nInvokeSmoother; ii++) preSmoo->Apply(*nearNull,*zeroVec_Null,false); - } - else if (currentLevel.IsAvailable("PostSmoother")) { - RCP postSmoo = currentLevel.Get< RCP >("PostSmoother"); - for (size_t ii = 0; ii < nInvokeSmoother; ii++) postSmoo->Apply(*testVecs,*zeroVec_TVecs,false); - for (size_t ii = 0; ii < nInvokeSmoother; ii++) postSmoo->Apply(*nearNull, *zeroVec_Null,false); - } - else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Must set a smoother"); - - Teuchos::ArrayRCP penaltyPolyCoef(5); - Teuchos::ArrayView inputPolyCoef; - - penaltyPolyCoef[poly0thOrderCoef] = 12.; - penaltyPolyCoef[poly1stOrderCoef] = -.2; - penaltyPolyCoef[poly2ndOrderCoef] = 0.0; - penaltyPolyCoef[poly3rdOrderCoef] = 0.0; - penaltyPolyCoef[poly4thOrderCoef] = 0.0; - - if(pL.isParameter("aggregation: penalty parameters") && pL.get >("aggregation: penalty parameters").size() > 0) { - if (pL.get >("aggregation: penalty parameters").size() > penaltyPolyCoef.size()) - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of penalty parameters must be " << penaltyPolyCoef.size() << " or less"); - inputPolyCoef = pL.get >("aggregation: penalty parameters")(); - - for (size_t i = 0; i < as(inputPolyCoef.size()) ; i++) penaltyPolyCoef[i] = as(inputPolyCoef[i]); - for (size_t i = as(inputPolyCoef.size()); i < as(penaltyPolyCoef.size()); i++) penaltyPolyCoef[i] = Teuchos::ScalarTraits::zero(); - } - - - RCP filteredGraph; - badGuysCoalesceDrop(*A, penaltyPolyCoef, nPDEs, *testVecs, *nearNull, filteredGraph); + size_t numRandom = as(pL.get("aggregation: number of random vectors")); + testVecs = MultiVectorFactory::Build(A->getRowMap(), numRandom, true); + // use random test vectors but should be positive in order to not get + // crummy results ... so take abs() of randomize(). + testVecs->randomize(); + for (size_t kk = 0; kk < testVecs->getNumVectors(); kk++) { + Teuchos::ArrayRCP curVec = testVecs->getDataNonConst(kk); + for (size_t ii = kk; ii < as(A->getRowMap()->getLocalNumElements()); ii++) curVec[ii] = Teuchos::ScalarTraits::magnitude(curVec[ii]); + } + nearNull = MultiVectorFactory::Build(A->getRowMap(), nPDEs, true); + + // initialize null space to constants + for (size_t kk = 0; kk < nearNull->getNumVectors(); kk++) { + Teuchos::ArrayRCP curVec = nearNull->getDataNonConst(kk); + for (size_t ii = kk; ii < as(A->getRowMap()->getLocalNumElements()); ii += nearNull->getNumVectors()) curVec[ii] = Teuchos::ScalarTraits::one(); + } + + RCP zeroVec_TVecs; + RCP zeroVec_Null; + + zeroVec_TVecs = MultiVectorFactory::Build(A->getRowMap(), testVecs->getNumVectors(), true); + zeroVec_Null = MultiVectorFactory::Build(A->getRowMap(), nPDEs, true); + zeroVec_TVecs->putScalar(Teuchos::ScalarTraits::zero()); + zeroVec_Null->putScalar(Teuchos::ScalarTraits::zero()); + + size_t nInvokeSmoother = as(pL.get("aggregation: number of times to pre or post smooth")); + if (currentLevel.IsAvailable("PreSmoother")) { + RCP preSmoo = currentLevel.Get >("PreSmoother"); + for (size_t ii = 0; ii < nInvokeSmoother; ii++) preSmoo->Apply(*testVecs, *zeroVec_TVecs, false); + for (size_t ii = 0; ii < nInvokeSmoother; ii++) preSmoo->Apply(*nearNull, *zeroVec_Null, false); + } else if (currentLevel.IsAvailable("PostSmoother")) { + RCP postSmoo = currentLevel.Get >("PostSmoother"); + for (size_t ii = 0; ii < nInvokeSmoother; ii++) postSmoo->Apply(*testVecs, *zeroVec_TVecs, false); + for (size_t ii = 0; ii < nInvokeSmoother; ii++) postSmoo->Apply(*nearNull, *zeroVec_Null, false); + } else + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Must set a smoother"); + + Teuchos::ArrayRCP penaltyPolyCoef(5); + Teuchos::ArrayView inputPolyCoef; + + penaltyPolyCoef[poly0thOrderCoef] = 12.; + penaltyPolyCoef[poly1stOrderCoef] = -.2; + penaltyPolyCoef[poly2ndOrderCoef] = 0.0; + penaltyPolyCoef[poly3rdOrderCoef] = 0.0; + penaltyPolyCoef[poly4thOrderCoef] = 0.0; + + if (pL.isParameter("aggregation: penalty parameters") && pL.get >("aggregation: penalty parameters").size() > 0) { + if (pL.get >("aggregation: penalty parameters").size() > penaltyPolyCoef.size()) + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of penalty parameters must be " << penaltyPolyCoef.size() << " or less"); + inputPolyCoef = pL.get >("aggregation: penalty parameters")(); + + for (size_t i = 0; i < as(inputPolyCoef.size()); i++) penaltyPolyCoef[i] = as(inputPolyCoef[i]); + for (size_t i = as(inputPolyCoef.size()); i < as(penaltyPolyCoef.size()); i++) penaltyPolyCoef[i] = Teuchos::ScalarTraits::zero(); + } + RCP filteredGraph; + badGuysCoalesceDrop(*A, penaltyPolyCoef, nPDEs, *testVecs, *nearNull, filteredGraph); #ifdef takeOut - /* write out graph for serial debugging purposes only. */ - - FILE* fp = fopen("codeOutput","w"); - fprintf(fp,"%d %d %d\n",(int) filteredGraph->GetNodeNumVertices(),(int) filteredGraph->GetNodeNumVertices(), - (int) filteredGraph->GetNodeNumEdges()); - for (size_t i = 0; i < filteredGraph->GetNodeNumVertices(); i++) { - ArrayView inds = filteredGraph->getNeighborVertices(as(i)); - for (size_t j = 0; j < as(inds.size()); j++) { - fprintf(fp,"%d %d 1.00e+00\n",(int) i+1,(int) inds[j]+1); - } - } - fclose(fp); + /* write out graph for serial debugging purposes only. */ + + FILE* fp = fopen("codeOutput", "w"); + fprintf(fp, "%d %d %d\n", (int)filteredGraph->GetNodeNumVertices(), (int)filteredGraph->GetNodeNumVertices(), + (int)filteredGraph->GetNodeNumEdges()); + for (size_t i = 0; i < filteredGraph->GetNodeNumVertices(); i++) { + ArrayView inds = filteredGraph->getNeighborVertices(as(i)); + for (size_t j = 0; j < as(inds.size()); j++) { + fprintf(fp, "%d %d 1.00e+00\n", (int)i + 1, (int)inds[j] + 1); + } + } + fclose(fp); #endif - SC threshold = .01; - Set(currentLevel, "Filtering", (threshold != STS::zero())); - Set(currentLevel, "Graph", filteredGraph); - Set(currentLevel, "DofsPerNode", 1); + SC threshold = .01; + Set(currentLevel, "Filtering", (threshold != STS::zero())); + Set(currentLevel, "Graph", filteredGraph); + Set(currentLevel, "DofsPerNode", 1); - } //Build +} // Build - template - void SmooVecCoalesceDropFactory::badGuysCoalesceDrop(const Matrix& Amat, Teuchos::ArrayRCP & penaltyPolyCoef , LO nPDEs, const MultiVector& testVecs, const MultiVector& nearNull, RCP& filteredGraph) const { +template +void SmooVecCoalesceDropFactory::badGuysCoalesceDrop(const Matrix& Amat, Teuchos::ArrayRCP& penaltyPolyCoef, LO nPDEs, const MultiVector& testVecs, const MultiVector& nearNull, RCP& filteredGraph) const { /* * Compute coalesce/drop graph (in filteredGraph) for A. The basic idea is to * balance trade-offs associated with @@ -274,28 +267,28 @@ namespace MueLu { * Note: testVecs is supplied by the user, but normally is the result of * applying a relaxation scheme to Au = 0 where u is initial random. */ - - GO numMyNnz = Teuchos::as(Amat.getLocalNumEntries()); - size_t nLoc = Amat.getRowMap()->getLocalNumElements(); - size_t nBlks = nLoc/nPDEs; - if (nBlks*nPDEs != nLoc ) - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of local dofs not divisible by BlkSize"); + GO numMyNnz = Teuchos::as(Amat.getLocalNumEntries()); + size_t nLoc = Amat.getRowMap()->getLocalNumElements(); - Teuchos::ArrayRCP newRowPtr(nBlks+1); /* coalesce & drop matrix */ - Teuchos::ArrayRCP newCols(numMyNnz); /* arrays */ + size_t nBlks = nLoc / nPDEs; + if (nBlks * nPDEs != nLoc) + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of local dofs not divisible by BlkSize"); - Teuchos::ArrayRCP bcols(nBlks); /* returned by dropfun(j,...) */ - Teuchos::ArrayRCP keepOrNot(nBlks); /* gives cols for jth row and */ - /* whether or not entry is */ - /* kept or dropped. */ + Teuchos::ArrayRCP newRowPtr(nBlks + 1); /* coalesce & drop matrix */ + Teuchos::ArrayRCP newCols(numMyNnz); /* arrays */ - LO maxNzPerRow = 200; + Teuchos::ArrayRCP bcols(nBlks); /* returned by dropfun(j,...) */ + Teuchos::ArrayRCP keepOrNot(nBlks); /* gives cols for jth row and */ + /* whether or not entry is */ + /* kept or dropped. */ + + LO maxNzPerRow = 200; Teuchos::ArrayRCP penalties(maxNzPerRow); /* Penalty function */ /* described above. */ - - Teuchos::ArrayRCP keepStatus(nBlks,true); /* accumulated keepOrNot info */ - Teuchos::ArrayRCP bColList(nBlks); /* accumulated bcols info */ + + Teuchos::ArrayRCP keepStatus(nBlks, true); /* accumulated keepOrNot info */ + Teuchos::ArrayRCP bColList(nBlks); /* accumulated bcols info */ /* for an entire block as */ /* opposed to a single row */ /* Additionally, keepOrNot[j] */ @@ -305,125 +298,123 @@ namespace MueLu { /* whether the jth block is */ /* kept within the block row. */ - Teuchos::ArrayRCP alreadyOnBColList(nBlks,false); /* used to avoid recording the*/ - /* same block column when */ - /* processing different pt */ - /* rows within a block. */ - - Teuchos::ArrayRCP boundaryNodes(nBlks,false); + Teuchos::ArrayRCP alreadyOnBColList(nBlks, false); /* used to avoid recording the*/ + /* same block column when */ + /* processing different pt */ + /* rows within a block. */ + Teuchos::ArrayRCP boundaryNodes(nBlks, false); - for (LO i = 0; i < maxNzPerRow; i++) + for (LO i = 0; i < maxNzPerRow; i++) penalties[i] = penaltyPolyCoef[poly0thOrderCoef] + - penaltyPolyCoef[poly1stOrderCoef]*(as(i)) + - penaltyPolyCoef[poly2ndOrderCoef]*(as(i*i)) + - (penaltyPolyCoef[poly3rdOrderCoef]*(as(i*i))*(as(i))) + //perhaps avoids overflow? - (penaltyPolyCoef[poly4thOrderCoef]*(as(i*i))*(as(i*i))); - - LO nzTotal = 0, numBCols = 0, row = -1, Nbcols, bcol; - newRowPtr[0] = 0; - - /* proceed block by block */ - for (LO i = 0; i < as(nBlks); i++) { - newRowPtr[i+1] = newRowPtr[i]; - for (LO j = 0; j < nPDEs; j++) { - row = row + 1; - - Teuchos::ArrayView indices; - Teuchos::ArrayView vals; - - Amat.getLocalRowView(row, indices, vals); - - if (indices.size() > maxNzPerRow) { - LO oldSize = maxNzPerRow; - maxNzPerRow = indices.size() + 100; - penalties.resize(as(maxNzPerRow),0.0); - for (LO k = oldSize; k < maxNzPerRow; k++) - penalties[k] = penaltyPolyCoef[poly0thOrderCoef] + - penaltyPolyCoef[poly1stOrderCoef]*(as(i)) + - penaltyPolyCoef[poly2ndOrderCoef]*(as(i*i)) + - (penaltyPolyCoef[poly3rdOrderCoef]*(as(i*i))*(as(i))) + - (penaltyPolyCoef[poly4thOrderCoef]*(as(i*i))*(as(i*i))); - } - badGuysDropfunc(row, indices, vals, testVecs, nPDEs, penalties, nearNull, bcols,keepOrNot,Nbcols,nLoc); - for (LO k=0; k < Nbcols; k++) { - bcol = bcols[k]; - - /* add to bColList if not already on it */ - - if (alreadyOnBColList[bcol] == false) {/* for PDE systems only record */ - bColList[numBCols++] = bcol; /* neighboring block one time */ - alreadyOnBColList[bcol] = true; - } - /* drop if any pt row within block indicates entry should be dropped */ - - if (keepOrNot[k] == false) keepStatus[bcol] = false; - - } /* for (k=0; k < Nbcols; k++) */ - } /* for (j = 0; i < nPDEs; j++) */ - - /* finished with block row. Now record block entries that we keep */ - /* and reset keepStatus, bColList, and alreadyOnBColList. */ - - if ( numBCols < 2) boundaryNodes[i] = true; - for (LO j=0; j < numBCols; j++) { - bcol = bColList[j]; - if (keepStatus[bcol] == true) { - newCols[nzTotal] = bColList[j]; - newRowPtr[i+1]++; - nzTotal = nzTotal + 1; - } - keepStatus[bcol] = true; - alreadyOnBColList[bcol] = false; - bColList[j] = 0; - } - numBCols = 0; - } /* for (i = 0; i < nBlks; i++) */ - - /* create array of the correct size and copy over newCols to it */ - - Teuchos::ArrayRCP finalCols(nzTotal); - for (LO i = 0; i < nzTotal; i++) finalCols[i] = newCols[i]; - - // Not using column map because we do not allow for any off-proc stuff. - // Not sure if this is okay. FIXME - - RCP rowMap = Amat.getRowMap(); // , colMap = Amat.getColMap(); - - LO nAmalgNodesOnProc = rowMap->getLocalNumElements()/nPDEs; - Teuchos::Array nodalGIDs(nAmalgNodesOnProc); - typename Teuchos::ScalarTraits::coordinateType temp; - for (size_t i = 0; i < as(nAmalgNodesOnProc); i++ ) { - GO gid = rowMap->getGlobalElement(i*nPDEs); - temp = ((typename Teuchos::ScalarTraits::coordinateType) (gid))/((typename Teuchos::ScalarTraits::coordinateType) (nPDEs)); - nodalGIDs[i] = as(floor(temp)); - } - GO nAmalgNodesGlobal = rowMap->getGlobalNumElements(); - GO nBlkGlobal = nAmalgNodesGlobal/nPDEs; - if (nBlkGlobal*nPDEs != nAmalgNodesGlobal) - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of global dofs not divisible by BlkSize"); - - Teuchos::RCP AmalgRowMap = MapFactory::Build(rowMap->lib(), nBlkGlobal, - nodalGIDs(),0,rowMap->getComm()); - - filteredGraph = rcp(new LWGraph(newRowPtr, finalCols, AmalgRowMap, AmalgRowMap, "thresholded graph of A")); - filteredGraph->SetBoundaryNodeMap(boundaryNodes); + penaltyPolyCoef[poly1stOrderCoef] * (as(i)) + + penaltyPolyCoef[poly2ndOrderCoef] * (as(i * i)) + + (penaltyPolyCoef[poly3rdOrderCoef] * (as(i * i)) * (as(i))) + // perhaps avoids overflow? + (penaltyPolyCoef[poly4thOrderCoef] * (as(i * i)) * (as(i * i))); + + LO nzTotal = 0, numBCols = 0, row = -1, Nbcols, bcol; + newRowPtr[0] = 0; + + /* proceed block by block */ + for (LO i = 0; i < as(nBlks); i++) { + newRowPtr[i + 1] = newRowPtr[i]; + for (LO j = 0; j < nPDEs; j++) { + row = row + 1; + + Teuchos::ArrayView indices; + Teuchos::ArrayView vals; + + Amat.getLocalRowView(row, indices, vals); + + if (indices.size() > maxNzPerRow) { + LO oldSize = maxNzPerRow; + maxNzPerRow = indices.size() + 100; + penalties.resize(as(maxNzPerRow), 0.0); + for (LO k = oldSize; k < maxNzPerRow; k++) + penalties[k] = penaltyPolyCoef[poly0thOrderCoef] + + penaltyPolyCoef[poly1stOrderCoef] * (as(i)) + + penaltyPolyCoef[poly2ndOrderCoef] * (as(i * i)) + + (penaltyPolyCoef[poly3rdOrderCoef] * (as(i * i)) * (as(i))) + + (penaltyPolyCoef[poly4thOrderCoef] * (as(i * i)) * (as(i * i))); + } + badGuysDropfunc(row, indices, vals, testVecs, nPDEs, penalties, nearNull, bcols, keepOrNot, Nbcols, nLoc); + for (LO k = 0; k < Nbcols; k++) { + bcol = bcols[k]; + + /* add to bColList if not already on it */ + if (alreadyOnBColList[bcol] == false) { /* for PDE systems only record */ + bColList[numBCols++] = bcol; /* neighboring block one time */ + alreadyOnBColList[bcol] = true; + } + /* drop if any pt row within block indicates entry should be dropped */ + + if (keepOrNot[k] == false) keepStatus[bcol] = false; + + } /* for (k=0; k < Nbcols; k++) */ + } /* for (j = 0; i < nPDEs; j++) */ + + /* finished with block row. Now record block entries that we keep */ + /* and reset keepStatus, bColList, and alreadyOnBColList. */ + + if (numBCols < 2) boundaryNodes[i] = true; + for (LO j = 0; j < numBCols; j++) { + bcol = bColList[j]; + if (keepStatus[bcol] == true) { + newCols[nzTotal] = bColList[j]; + newRowPtr[i + 1]++; + nzTotal = nzTotal + 1; + } + keepStatus[bcol] = true; + alreadyOnBColList[bcol] = false; + bColList[j] = 0; + } + numBCols = 0; + } /* for (i = 0; i < nBlks; i++) */ + + /* create array of the correct size and copy over newCols to it */ + + Teuchos::ArrayRCP finalCols(nzTotal); + for (LO i = 0; i < nzTotal; i++) finalCols[i] = newCols[i]; + + // Not using column map because we do not allow for any off-proc stuff. + // Not sure if this is okay. FIXME + + RCP rowMap = Amat.getRowMap(); // , colMap = Amat.getColMap(); + + LO nAmalgNodesOnProc = rowMap->getLocalNumElements() / nPDEs; + Teuchos::Array nodalGIDs(nAmalgNodesOnProc); + typename Teuchos::ScalarTraits::coordinateType temp; + for (size_t i = 0; i < as(nAmalgNodesOnProc); i++) { + GO gid = rowMap->getGlobalElement(i * nPDEs); + temp = ((typename Teuchos::ScalarTraits::coordinateType)(gid)) / ((typename Teuchos::ScalarTraits::coordinateType)(nPDEs)); + nodalGIDs[i] = as(floor(temp)); } + GO nAmalgNodesGlobal = rowMap->getGlobalNumElements(); + GO nBlkGlobal = nAmalgNodesGlobal / nPDEs; + if (nBlkGlobal * nPDEs != nAmalgNodesGlobal) + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of global dofs not divisible by BlkSize"); + + Teuchos::RCP AmalgRowMap = MapFactory::Build(rowMap->lib(), nBlkGlobal, + nodalGIDs(), 0, rowMap->getComm()); - template - void SmooVecCoalesceDropFactory::badGuysDropfunc(LO row, const Teuchos::ArrayView& cols, const Teuchos::ArrayView& vals, const MultiVector& testVecs, LO nPDEs, Teuchos::ArrayRCP & penalties, const MultiVector& nearNull, Teuchos::ArrayRCP& Bcols, Teuchos::ArrayRCP& keepOrNot, LO &Nbcols, LO nLoc) const { - using TST=Teuchos::ScalarTraits; + filteredGraph = rcp(new LWGraph(newRowPtr, finalCols, AmalgRowMap, AmalgRowMap, "thresholded graph of A")); + filteredGraph->SetBoundaryNodeMap(boundaryNodes); +} - LO nLeng = cols.size(); +template +void SmooVecCoalesceDropFactory::badGuysDropfunc(LO row, const Teuchos::ArrayView& cols, const Teuchos::ArrayView& vals, const MultiVector& testVecs, LO nPDEs, Teuchos::ArrayRCP& penalties, const MultiVector& nearNull, Teuchos::ArrayRCP& Bcols, Teuchos::ArrayRCP& keepOrNot, LO& Nbcols, LO nLoc) const { + using TST = Teuchos::ScalarTraits; + + LO nLeng = cols.size(); typename TST::coordinateType temp; - temp = ((typename TST::coordinateType) (row))/((typename TST::coordinateType) (nPDEs)); + temp = ((typename TST::coordinateType)(row)) / ((typename TST::coordinateType)(nPDEs)); LO blkRow = as(floor(temp)); - Teuchos::ArrayRCP badGuy( nLeng, 0.0); - Teuchos::ArrayRCP subNull(nLeng, 0.0); /* subset of nearNull */ - /* associated with current */ - /* dof within node. */ - + Teuchos::ArrayRCP badGuy(nLeng, 0.0); + Teuchos::ArrayRCP subNull(nLeng, 0.0); /* subset of nearNull */ + /* associated with current */ + /* dof within node. */ + /* Only consider testVecs associated with same dof & on processor. Further */ /* collapse testVecs to a single badGuy vector by basically taking the worst */ /* (least smooth) values for each of the off diags. In particular, we look at*/ @@ -433,39 +424,37 @@ namespace MueLu { /* two guys are aggregated. So, the biggest ratio mismatch is used to choose */ /* the testVec entry associated with each off-diagonal entry. */ - for (LO i = 0; i < nLeng; i++) keepOrNot[i] = false; - LO diagInd = -1; - Nbcols = 0; - LO rowDof = row - blkRow*nPDEs; - Teuchos::ArrayRCP< const Scalar > oneNull = nearNull.getData( as(rowDof)); + LO diagInd = -1; + Nbcols = 0; + LO rowDof = row - blkRow * nPDEs; + Teuchos::ArrayRCP oneNull = nearNull.getData(as(rowDof)); for (LO i = 0; i < nLeng; i++) { - if ((cols[i] < nLoc ) && (TST::magnitude(vals[i]) != 0.0)) { /* on processor */ - temp = ((typename TST::coordinateType) (cols[i]))/((typename TST::coordinateType) (nPDEs)); - LO colDof = cols[i] - (as(floor( temp )))*nPDEs; + if ((cols[i] < nLoc) && (TST::magnitude(vals[i]) != 0.0)) { /* on processor */ + temp = ((typename TST::coordinateType)(cols[i])) / ((typename TST::coordinateType)(nPDEs)); + LO colDof = cols[i] - (as(floor(temp))) * nPDEs; if (colDof == rowDof) { /* same dof within node as row */ - Bcols[ Nbcols] = (cols[i] - colDof)/nPDEs; + Bcols[Nbcols] = (cols[i] - colDof) / nPDEs; subNull[Nbcols] = oneNull[cols[i]]; if (cols[i] != row) { /* not diagonal */ - Scalar worstRatio = -TST::one(); - Scalar targetRatio = subNull[Nbcols]/oneNull[row]; + Scalar worstRatio = -TST::one(); + Scalar targetRatio = subNull[Nbcols] / oneNull[row]; Scalar actualRatio; - for (size_t kk = 0; kk < testVecs.getNumVectors(); kk++ ) { - Teuchos::ArrayRCP< const Scalar > curVec = testVecs.getData(kk); - actualRatio = curVec[cols[i]]/curVec[row]; + for (size_t kk = 0; kk < testVecs.getNumVectors(); kk++) { + Teuchos::ArrayRCP curVec = testVecs.getData(kk); + actualRatio = curVec[cols[i]] / curVec[row]; if (TST::magnitude(actualRatio - targetRatio) > TST::magnitude(worstRatio)) { - badGuy[Nbcols] = actualRatio; - worstRatio = Teuchos::ScalarTraits::magnitude(actualRatio - targetRatio); + badGuy[Nbcols] = actualRatio; + worstRatio = Teuchos::ScalarTraits::magnitude(actualRatio - targetRatio); } } - } - else { - badGuy[ Nbcols] = 1.; + } else { + badGuy[Nbcols] = 1.; keepOrNot[Nbcols] = true; - diagInd = Nbcols; + diagInd = Nbcols; } (Nbcols)++; } @@ -475,18 +464,18 @@ namespace MueLu { /* Make sure that diagonal entry is in block col list */ if (diagInd == -1) { - Bcols[ Nbcols] = (row - rowDof)/nPDEs; - subNull[ Nbcols] = 1.; - badGuy[ Nbcols] = 1.; + Bcols[Nbcols] = (row - rowDof) / nPDEs; + subNull[Nbcols] = 1.; + badGuy[Nbcols] = 1.; keepOrNot[Nbcols] = true; - diagInd = Nbcols; + diagInd = Nbcols; (Nbcols)++; } - Scalar currentRP = oneNull[row]*oneNull[row]; - Scalar currentRTimesBadGuy= oneNull[row]*badGuy[diagInd]; - Scalar currentScore = penalties[0]; /* (I - P inv(R*P)*R )=0 for size */ - /* size 1 agg, so fit is perfect */ + Scalar currentRP = oneNull[row] * oneNull[row]; + Scalar currentRTimesBadGuy = oneNull[row] * badGuy[diagInd]; + Scalar currentScore = penalties[0]; /* (I - P inv(R*P)*R )=0 for size */ + /* size 1 agg, so fit is perfect */ /* starting from a set that only includes the diagonal entry consider adding */ /* one off-diagonal at a time until the fitValue exceeds the penalty term. */ @@ -496,7 +485,7 @@ namespace MueLu { /* includes the diagonal, all already determined neighbors, and the potential*/ /* new neighbor */ - LO nKeep = 1, flag = 1, minId; + LO nKeep = 1, flag = 1, minId; Scalar minFit, minFitRP = 0., minFitRTimesBadGuy = 0.; Scalar newRP, newRTimesBadGuy; @@ -507,45 +496,46 @@ namespace MueLu { minFit = 1000000.; minId = -1; - for (LO i=0; i < Nbcols; i++) { + for (LO i = 0; i < Nbcols; i++) { if (keepOrNot[i] == false) { - keepOrNot[i] = true; /* temporarily view i as non-dropped neighbor */ - newRP = currentRP + subNull[i]*subNull[i]; - newRTimesBadGuy= currentRTimesBadGuy + subNull[i]*badGuy[i]; - Scalar ratio = newRTimesBadGuy/newRP; + keepOrNot[i] = true; /* temporarily view i as non-dropped neighbor */ + newRP = currentRP + subNull[i] * subNull[i]; + newRTimesBadGuy = currentRTimesBadGuy + subNull[i] * badGuy[i]; + Scalar ratio = newRTimesBadGuy / newRP; Scalar newFit = 0.0; - for (LO k=0; k < Nbcols; k++) { + for (LO k = 0; k < Nbcols; k++) { if (keepOrNot[k] == true) { - Scalar diff = badGuy[k] - ratio*subNull[k]; - newFit = newFit + diff*diff; + Scalar diff = badGuy[k] - ratio * subNull[k]; + newFit = newFit + diff * diff; } } if (Teuchos::ScalarTraits::magnitude(newFit) < Teuchos::ScalarTraits::magnitude(minFit)) { - minId = i; - minFit = newFit; - minFitRP = newRP; - minFitRTimesBadGuy= newRTimesBadGuy; + minId = i; + minFit = newFit; + minFitRP = newRP; + minFitRTimesBadGuy = newRTimesBadGuy; } keepOrNot[i] = false; } } - if (minId == -1) flag = 0; + if (minId == -1) + flag = 0; else { - minFit = sqrt(minFit); + minFit = sqrt(minFit); Scalar newScore = penalties[nKeep] + minFit; if (Teuchos::ScalarTraits::magnitude(newScore) < Teuchos::ScalarTraits::magnitude(currentScore)) { - nKeep = nKeep + 1; - keepOrNot[minId]= true; - currentScore = newScore; - currentRP = minFitRP; - currentRTimesBadGuy= minFitRTimesBadGuy; - } - else flag = 0; + nKeep = nKeep + 1; + keepOrNot[minId] = true; + currentScore = newScore; + currentRP = minFitRP; + currentRTimesBadGuy = minFitRTimesBadGuy; + } else + flag = 0; } } - } +} -} //namespace MueLu +} // namespace MueLu -#endif // MUELU_SMOOVECCOALESCEDROPFACTORY_DEF_HPP +#endif // MUELU_SMOOVECCOALESCEDROPFACTORY_DEF_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_decl.hpp index 80b779e26a7d..a66e5570c962 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_decl.hpp @@ -54,85 +54,79 @@ namespace MueLu { - /*! - @class UnsmooshFactory class. - @brief Factory for building "unsmooshed" transfer operators from transfer operators associated with a scalar helper problem (built by the VariableDofLaplacianFactory) - - The output prolongation operator P is compatible to the input matrix A. The input prolongation matrix P is supposed to be built by the VariableDofLaplacianFactory with 1 DOF per node. - The DofStatus array declares for each row, whether it is a standard dof or a padded/pseudo Dirichlet dof. - The user has to provide the parameter "maxDofPerNode", since the coarse matrices are always padded by construction (i.e. constant number of DOFs per node). This parameter is needed for building the column map of the unsmooshed prolongation operator. - The parameter "fineIsPadded" allows to specify whether the input matrix A on the finest level is padded or not. In the padded case we have an artificially extended input matrix with pseudo Dirichlet rows on inactive rows. In the non-padded version - we have "variable" number of Dofs per node. - - @ingroup MueLuGraphClasses - - ## Input/output of UnsmooshFactory ## - - ### User parameters of UnsmooshFactory ### - Parameter | type | default | master.xml | validated | requested | description - ----------|------|---------|:----------:|:---------:|:---------:|------------ - A | Factory | null | | * | * | Generating factory of the input matrix A with potentially variable number of DOFs. Might be padded or non-padded. Padded means, that the matrix has additional artificial rows and columns to have a constant number of DOFs per node. Needed to match the row map for the unsmooshed version of P with the row map of the unamalgamated input matrix A. - P | Factory | null | | * | * | Generating factory of the (amalgamated) prolongator P generated from a (pseudo Laplacian) with 1 Dofs per node. Will be unsmooshed to be compatible with input matrix A in this factory. - DofStatus |Facotry | null | | * | * | Generating factory for dofStatus array (usually generated by the VariableDofLaplacdianFactory). It is a Teuchos::Array of size number of Rows of input matrix P multiplied by the maximum possible number of Dofs per node. - maxDofPerNode | int | 1 | | * | | Maximum number of DOFs per node. Needed for generating unsmooshed P. Note, that the coarse matrices are always padded, i.e. we have constant number of DOFs per node. - fineIsPadded | bool | false | * | | True if finest level input matrix is padded (default is false) - - - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see UnsmooshFactory::GetValidParameters).
- The * in the @c requested column states that the data is requested as input with all dependencies (see UnsmooshFactory::DeclareInput). - - ### Variables provided by UnsmooshFactory ### - - After UnsmooshFactory::Build the following data is available (if requested) - - Parameter | generated by | description - ----------|--------------|------------ - | P | UnsmooshFactory | Unsmooshed prolongation operator - */ - template - class UnsmooshFactory : public PFactory { +/*! + @class UnsmooshFactory class. + @brief Factory for building "unsmooshed" transfer operators from transfer operators associated with a scalar helper problem (built by the VariableDofLaplacianFactory) + + The output prolongation operator P is compatible to the input matrix A. The input prolongation matrix P is supposed to be built by the VariableDofLaplacianFactory with 1 DOF per node. + The DofStatus array declares for each row, whether it is a standard dof or a padded/pseudo Dirichlet dof. + The user has to provide the parameter "maxDofPerNode", since the coarse matrices are always padded by construction (i.e. constant number of DOFs per node). This parameter is needed for building the column map of the unsmooshed prolongation operator. + The parameter "fineIsPadded" allows to specify whether the input matrix A on the finest level is padded or not. In the padded case we have an artificially extended input matrix with pseudo Dirichlet rows on inactive rows. In the non-padded version + we have "variable" number of Dofs per node. + + @ingroup MueLuGraphClasses + + ## Input/output of UnsmooshFactory ## + + ### User parameters of UnsmooshFactory ### + Parameter | type | default | master.xml | validated | requested | description + ----------|------|---------|:----------:|:---------:|:---------:|------------ + A | Factory | null | | * | * | Generating factory of the input matrix A with potentially variable number of DOFs. Might be padded or non-padded. Padded means, that the matrix has additional artificial rows and columns to have a constant number of DOFs per node. Needed to match the row map for the unsmooshed version of P with the row map of the unamalgamated input matrix A. + P | Factory | null | | * | * | Generating factory of the (amalgamated) prolongator P generated from a (pseudo Laplacian) with 1 Dofs per node. Will be unsmooshed to be compatible with input matrix A in this factory. + DofStatus |Facotry | null | | * | * | Generating factory for dofStatus array (usually generated by the VariableDofLaplacdianFactory). It is a Teuchos::Array of size number of Rows of input matrix P multiplied by the maximum possible number of Dofs per node. + maxDofPerNode | int | 1 | | * | | Maximum number of DOFs per node. Needed for generating unsmooshed P. Note, that the coarse matrices are always padded, i.e. we have constant number of DOFs per node. + fineIsPadded | bool | false | * | | True if finest level input matrix is padded (default is false) + + + The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
+ The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see UnsmooshFactory::GetValidParameters).
+ The * in the @c requested column states that the data is requested as input with all dependencies (see UnsmooshFactory::DeclareInput). + + ### Variables provided by UnsmooshFactory ### + + After UnsmooshFactory::Build the following data is available (if requested) + + Parameter | generated by | description + ----------|--------------|------------ + | P | UnsmooshFactory | Unsmooshed prolongation operator +*/ +template +class UnsmooshFactory : public PFactory { #undef MUELU_UNSMOOSHFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: + public: + //! @name Constructors/Destructors. + //@{ - //! @name Constructors/Destructors. - //@{ + //! Constructor + UnsmooshFactory(); - //! Constructor - UnsmooshFactory(); + //! Destructor + virtual ~UnsmooshFactory() {} - //! Destructor - virtual ~UnsmooshFactory() { } + RCP GetValidParameterList() const; - RCP GetValidParameterList() const; + //@} - //@} + //! Input + //@{ - //! Input - //@{ + void DeclareInput(Level &fineLevel, Level &coarseLevel) const; - void DeclareInput(Level &fineLevel, Level &coarseLevel) const; + //@} - //@} + void Build(Level &fineLevel, Level &coarseLevel) const; // Build + void BuildP(Level & /* fineLevel */, Level & /* coarseLevel */) const {}; // TAW no real need for an extra BuildP routine. Just use Build - void Build (Level &fineLevel, Level &coarseLevel) const; // Build - void BuildP(Level &/* fineLevel */, Level &/* coarseLevel */) const {}; // TAW no real need for an extra BuildP routine. Just use Build + private: +}; // class UnsmooshFactory - private: - - - - - }; //class UnsmooshFactory - -} //namespace MueLu +} // namespace MueLu #define MUELU_UNSMOOSHFACTORY_SHORT - #endif /* PACKAGES_MUELU_SRC_GRAPH_MUELU_UNSMOOSHFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_def.hpp index f49f488463ea..cc87c1c8988f 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_def.hpp @@ -53,198 +53,193 @@ namespace MueLu { - template - UnsmooshFactory::UnsmooshFactory() { } - - template - RCP UnsmooshFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - validParamList->set< RCP >("A", Teuchos::null, "Generating factory for unamalgamated matrix. Row map of (unamalgamted) output prolongation operator should match row map of this A."); - validParamList->set< RCP >("P", Teuchos::null, "Generating factory of the (amalgamated) prolongator P"); - validParamList->set< RCP >("DofStatus", Teuchos::null, "Generating factory for dofStatus array (usually the VariableDofLaplacdianFactory)"); - - validParamList->set< int > ("maxDofPerNode", 1, "Maximum number of DOFs per node"); - validParamList->set< bool > ("fineIsPadded" , false, "true if finest level input matrix is padded"); - - return validParamList; - } - - template - void UnsmooshFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { - //const ParameterList& pL = GetParameterList(); - Input(fineLevel, "A"); - Input(coarseLevel, "P"); - - // DofStatus only provided on the finest level (by user) - // On the coarser levels it is auto-generated using the DBC information from the unamalgamated matrix A - if(fineLevel.GetLevelID() == 0) - Input(fineLevel, "DofStatus"); - } - - template - void UnsmooshFactory::Build(Level &fineLevel, Level &coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); - typedef Teuchos::ScalarTraits STS; - - const ParameterList & pL = GetParameterList(); - - // extract matrices (unamalgamated A and amalgamated P) - RCP unamalgA = Get< RCP >(fineLevel, "A"); - RCP amalgP = Get< RCP >(coarseLevel, "P"); - - // extract user parameters - int maxDofPerNode = pL.get ("maxDofPerNode"); - bool fineIsPadded = pL.get("fineIsPadded"); - - // get dofStatus information - // On the finest level it is provided by the user. On the coarser levels it is constructed - // using the DBC information of the matrix A - Teuchos::Array dofStatus; - if(fineLevel.GetLevelID() == 0) { - dofStatus = Get >(fineLevel, "DofStatus"); - } else { - // dof status is the dirichlet information of unsmooshed/unamalgamated A (fine level) - dofStatus = Teuchos::Array(unamalgA->getRowMap()->getLocalNumElements() /*amalgP->getRowMap()->getLocalNumElements() * maxDofPerNode*/,'s'); - - bool bHasZeroDiagonal = false; - Teuchos::ArrayRCP dirOrNot = MueLu::Utilities::DetectDirichletRowsExt(*unamalgA,bHasZeroDiagonal,STS::magnitude(0.5)); - - TEUCHOS_TEST_FOR_EXCEPTION(dirOrNot.size() != dofStatus.size(), MueLu::Exceptions::RuntimeError,"MueLu::UnsmooshFactory::Build: inconsistent number of coarse DBC array and dofStatus array. dirOrNot.size() = " << dirOrNot.size() << " dofStatus.size() = " << dofStatus.size()); - for(decltype(dirOrNot.size()) i = 0; i < dirOrNot.size(); ++i) { - if(dirOrNot[i] == true) dofStatus[i] = 'p'; - } +template +UnsmooshFactory::UnsmooshFactory() {} + +template +RCP UnsmooshFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + validParamList->set >("A", Teuchos::null, "Generating factory for unamalgamated matrix. Row map of (unamalgamted) output prolongation operator should match row map of this A."); + validParamList->set >("P", Teuchos::null, "Generating factory of the (amalgamated) prolongator P"); + validParamList->set >("DofStatus", Teuchos::null, "Generating factory for dofStatus array (usually the VariableDofLaplacdianFactory)"); + + validParamList->set("maxDofPerNode", 1, "Maximum number of DOFs per node"); + validParamList->set("fineIsPadded", false, "true if finest level input matrix is padded"); + + return validParamList; +} + +template +void UnsmooshFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { + // const ParameterList& pL = GetParameterList(); + Input(fineLevel, "A"); + Input(coarseLevel, "P"); + + // DofStatus only provided on the finest level (by user) + // On the coarser levels it is auto-generated using the DBC information from the unamalgamated matrix A + if (fineLevel.GetLevelID() == 0) + Input(fineLevel, "DofStatus"); +} + +template +void UnsmooshFactory::Build(Level &fineLevel, Level &coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); + typedef Teuchos::ScalarTraits STS; + + const ParameterList &pL = GetParameterList(); + + // extract matrices (unamalgamated A and amalgamated P) + RCP unamalgA = Get >(fineLevel, "A"); + RCP amalgP = Get >(coarseLevel, "P"); + + // extract user parameters + int maxDofPerNode = pL.get("maxDofPerNode"); + bool fineIsPadded = pL.get("fineIsPadded"); + + // get dofStatus information + // On the finest level it is provided by the user. On the coarser levels it is constructed + // using the DBC information of the matrix A + Teuchos::Array dofStatus; + if (fineLevel.GetLevelID() == 0) { + dofStatus = Get >(fineLevel, "DofStatus"); + } else { + // dof status is the dirichlet information of unsmooshed/unamalgamated A (fine level) + dofStatus = Teuchos::Array(unamalgA->getRowMap()->getLocalNumElements() /*amalgP->getRowMap()->getLocalNumElements() * maxDofPerNode*/, 's'); + + bool bHasZeroDiagonal = false; + Teuchos::ArrayRCP dirOrNot = MueLu::Utilities::DetectDirichletRowsExt(*unamalgA, bHasZeroDiagonal, STS::magnitude(0.5)); + + TEUCHOS_TEST_FOR_EXCEPTION(dirOrNot.size() != dofStatus.size(), MueLu::Exceptions::RuntimeError, "MueLu::UnsmooshFactory::Build: inconsistent number of coarse DBC array and dofStatus array. dirOrNot.size() = " << dirOrNot.size() << " dofStatus.size() = " << dofStatus.size()); + for (decltype(dirOrNot.size()) i = 0; i < dirOrNot.size(); ++i) { + if (dirOrNot[i] == true) dofStatus[i] = 'p'; } + } - // TODO: TAW the following check is invalid for SA-AMG based input prolongators - //TEUCHOS_TEST_FOR_EXCEPTION(amalgP->getDomainMap()->isSameAs(*amalgP->getColMap()) == false, MueLu::Exceptions::RuntimeError,"MueLu::UnsmooshFactory::Build: only support for non-overlapping aggregates. (column map of Ptent must be the same as domain map of Ptent)"); - - // extract CRS information from amalgamated prolongation operator - Teuchos::ArrayRCP amalgRowPtr(amalgP->getLocalNumRows()); - Teuchos::ArrayRCP amalgCols(amalgP->getLocalNumEntries()); - Teuchos::ArrayRCP amalgVals(amalgP->getLocalNumEntries()); - Teuchos::RCP amalgPwrap = Teuchos::rcp_dynamic_cast(amalgP); - Teuchos::RCP amalgPcrs = amalgPwrap->getCrsMatrix(); - amalgPcrs->getAllValues(amalgRowPtr, amalgCols, amalgVals); - - // calculate number of dof rows for new prolongator - size_t paddedNrows = amalgP->getRowMap()->getLocalNumElements() * Teuchos::as(maxDofPerNode); - - // reserve CSR arrays for new prolongation operator - Teuchos::ArrayRCP newPRowPtr(paddedNrows+1); - Teuchos::ArrayRCP newPCols(amalgP->getLocalNumEntries() * maxDofPerNode); - Teuchos::ArrayRCP newPVals(amalgP->getLocalNumEntries() * maxDofPerNode); - - size_t rowCount = 0; // actual number of (local) in unamalgamated prolongator - if(fineIsPadded == true || fineLevel.GetLevelID() > 0) { - - // build prolongation operator for padded fine level matrices. - // Note: padded fine level dofs are transferred by injection. - // That is, these interpolation stencils do not take averages of - // coarse level variables. Further, fine level Dirichlet points - // also use injection. - - size_t cnt = 0; // local id counter - for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { - // determine number of entries in amalgamated dof row i - size_t rowLength = amalgRowPtr[i+1] - amalgRowPtr[i]; - - // loop over dofs per node (unamalgamation) - for(int j = 0; j < maxDofPerNode; j++) { - newPRowPtr[i*maxDofPerNode+j] = cnt; - if (dofStatus[i*maxDofPerNode+j] == 's') { // add only "standard" dofs to unamalgamated prolongator - // loop over column entries in amalgamated P - for (size_t k = 0; k < rowLength; k++) { - newPCols[cnt ] = amalgCols[k+amalgRowPtr[i]] * maxDofPerNode + j; - newPVals[cnt++] = amalgVals[k+amalgRowPtr[i]]; - } - + // TODO: TAW the following check is invalid for SA-AMG based input prolongators + // TEUCHOS_TEST_FOR_EXCEPTION(amalgP->getDomainMap()->isSameAs(*amalgP->getColMap()) == false, MueLu::Exceptions::RuntimeError,"MueLu::UnsmooshFactory::Build: only support for non-overlapping aggregates. (column map of Ptent must be the same as domain map of Ptent)"); + + // extract CRS information from amalgamated prolongation operator + Teuchos::ArrayRCP amalgRowPtr(amalgP->getLocalNumRows()); + Teuchos::ArrayRCP amalgCols(amalgP->getLocalNumEntries()); + Teuchos::ArrayRCP amalgVals(amalgP->getLocalNumEntries()); + Teuchos::RCP amalgPwrap = Teuchos::rcp_dynamic_cast(amalgP); + Teuchos::RCP amalgPcrs = amalgPwrap->getCrsMatrix(); + amalgPcrs->getAllValues(amalgRowPtr, amalgCols, amalgVals); + + // calculate number of dof rows for new prolongator + size_t paddedNrows = amalgP->getRowMap()->getLocalNumElements() * Teuchos::as(maxDofPerNode); + + // reserve CSR arrays for new prolongation operator + Teuchos::ArrayRCP newPRowPtr(paddedNrows + 1); + Teuchos::ArrayRCP newPCols(amalgP->getLocalNumEntries() * maxDofPerNode); + Teuchos::ArrayRCP newPVals(amalgP->getLocalNumEntries() * maxDofPerNode); + + size_t rowCount = 0; // actual number of (local) in unamalgamated prolongator + if (fineIsPadded == true || fineLevel.GetLevelID() > 0) { + // build prolongation operator for padded fine level matrices. + // Note: padded fine level dofs are transferred by injection. + // That is, these interpolation stencils do not take averages of + // coarse level variables. Further, fine level Dirichlet points + // also use injection. + + size_t cnt = 0; // local id counter + for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { + // determine number of entries in amalgamated dof row i + size_t rowLength = amalgRowPtr[i + 1] - amalgRowPtr[i]; + + // loop over dofs per node (unamalgamation) + for (int j = 0; j < maxDofPerNode; j++) { + newPRowPtr[i * maxDofPerNode + j] = cnt; + if (dofStatus[i * maxDofPerNode + j] == 's') { // add only "standard" dofs to unamalgamated prolongator + // loop over column entries in amalgamated P + for (size_t k = 0; k < rowLength; k++) { + newPCols[cnt] = amalgCols[k + amalgRowPtr[i]] * maxDofPerNode + j; + newPVals[cnt++] = amalgVals[k + amalgRowPtr[i]]; } } } + } - newPRowPtr[paddedNrows] = cnt; // close row CSR array - rowCount = paddedNrows; - } else { - // Build prolongation operator for non-padded fine level matrices. - // Need to map from non-padded dofs to padded dofs. For this, look - // at the status array and skip padded dofs. - - size_t cnt = 0; // local id counter - - for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { - // determine number of entries in amalgamated dof row i - size_t rowLength = amalgRowPtr[i+1] - amalgRowPtr[i]; - - // loop over dofs per node (unamalgamation) - for(int j = 0; j < maxDofPerNode; j++) { - // no interpolation for padded fine dofs as they do not exist - - if (dofStatus[i*maxDofPerNode+j] == 's') { // add only "standard" dofs to unamalgamated prolongator - newPRowPtr[rowCount++] = cnt; - // loop over column entries in amalgamated P - for (size_t k = 0; k < rowLength; k++) { - newPCols[cnt ] = amalgCols[k+amalgRowPtr[i]] * maxDofPerNode + j; - newPVals[cnt++] = amalgVals[k+amalgRowPtr[i]]; - } - - } - if (dofStatus[i*maxDofPerNode+j] == 'd') { // Dirichlet handling - newPRowPtr[rowCount++] = cnt; + newPRowPtr[paddedNrows] = cnt; // close row CSR array + rowCount = paddedNrows; + } else { + // Build prolongation operator for non-padded fine level matrices. + // Need to map from non-padded dofs to padded dofs. For this, look + // at the status array and skip padded dofs. + + size_t cnt = 0; // local id counter + + for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { + // determine number of entries in amalgamated dof row i + size_t rowLength = amalgRowPtr[i + 1] - amalgRowPtr[i]; + + // loop over dofs per node (unamalgamation) + for (int j = 0; j < maxDofPerNode; j++) { + // no interpolation for padded fine dofs as they do not exist + + if (dofStatus[i * maxDofPerNode + j] == 's') { // add only "standard" dofs to unamalgamated prolongator + newPRowPtr[rowCount++] = cnt; + // loop over column entries in amalgamated P + for (size_t k = 0; k < rowLength; k++) { + newPCols[cnt] = amalgCols[k + amalgRowPtr[i]] * maxDofPerNode + j; + newPVals[cnt++] = amalgVals[k + amalgRowPtr[i]]; } } - } - newPRowPtr[rowCount] = cnt; // close row CSR array - } // fineIsPadded == false - - // generate coarse domain map - // So far no support for gid offset or strided maps. This information - // could be gathered easily from the unamalgamated fine level operator A. - std::vector stridingInfo(1, maxDofPerNode); - - GlobalOrdinal nCoarseDofs = amalgP->getDomainMap()->getLocalNumElements() * maxDofPerNode; - GlobalOrdinal indexBase = amalgP->getDomainMap()->getIndexBase(); - RCP coarseDomainMap = StridedMapFactory::Build(amalgP->getDomainMap()->lib(), - Teuchos::OrdinalTraits::invalid(), - nCoarseDofs, - indexBase, - stridingInfo, - amalgP->getDomainMap()->getComm(), - -1 /* stridedBlockId */, - 0 /*domainGidOffset */); - - size_t nColCoarseDofs = Teuchos::as(amalgP->getColMap()->getLocalNumElements() * maxDofPerNode); - Teuchos::Array unsmooshColMapGIDs(nColCoarseDofs); - for(size_t c = 0; c < amalgP->getColMap()->getLocalNumElements(); ++c) { - GlobalOrdinal gid = (amalgP->getColMap()->getGlobalElement(c)-indexBase) * maxDofPerNode + indexBase; - - for(int i = 0; i < maxDofPerNode; ++i) { - unsmooshColMapGIDs[c * maxDofPerNode + i] = gid + i; + if (dofStatus[i * maxDofPerNode + j] == 'd') { // Dirichlet handling + newPRowPtr[rowCount++] = cnt; + } } } - Teuchos::RCP coarseColMap = MapFactory::Build(amalgP->getDomainMap()->lib(), - Teuchos::OrdinalTraits::invalid(), - unsmooshColMapGIDs(), //View, - indexBase, - amalgP->getDomainMap()->getComm()); - - // Assemble unamalgamated P - Teuchos::RCP unamalgPCrs = CrsMatrixFactory::Build(unamalgA->getRowMap(), - coarseColMap, - maxDofPerNode*amalgP->getLocalMaxNumRowEntries()); - for (size_t i = 0; i < rowCount; i++) { - unamalgPCrs->insertLocalValues(i, - newPCols.view(newPRowPtr[i], newPRowPtr[i+1] - newPRowPtr[i]), - newPVals.view(newPRowPtr[i], newPRowPtr[i+1] - newPRowPtr[i])); + newPRowPtr[rowCount] = cnt; // close row CSR array + } // fineIsPadded == false + + // generate coarse domain map + // So far no support for gid offset or strided maps. This information + // could be gathered easily from the unamalgamated fine level operator A. + std::vector stridingInfo(1, maxDofPerNode); + + GlobalOrdinal nCoarseDofs = amalgP->getDomainMap()->getLocalNumElements() * maxDofPerNode; + GlobalOrdinal indexBase = amalgP->getDomainMap()->getIndexBase(); + RCP coarseDomainMap = StridedMapFactory::Build(amalgP->getDomainMap()->lib(), + Teuchos::OrdinalTraits::invalid(), + nCoarseDofs, + indexBase, + stridingInfo, + amalgP->getDomainMap()->getComm(), + -1 /* stridedBlockId */, + 0 /*domainGidOffset */); + + size_t nColCoarseDofs = Teuchos::as(amalgP->getColMap()->getLocalNumElements() * maxDofPerNode); + Teuchos::Array unsmooshColMapGIDs(nColCoarseDofs); + for (size_t c = 0; c < amalgP->getColMap()->getLocalNumElements(); ++c) { + GlobalOrdinal gid = (amalgP->getColMap()->getGlobalElement(c) - indexBase) * maxDofPerNode + indexBase; + + for (int i = 0; i < maxDofPerNode; ++i) { + unsmooshColMapGIDs[c * maxDofPerNode + i] = gid + i; } - unamalgPCrs->fillComplete(coarseDomainMap, unamalgA->getRowMap()); - - Teuchos::RCP unamalgP = Teuchos::rcp(new CrsMatrixWrap(unamalgPCrs)); - - Set(coarseLevel,"P",unamalgP); } + Teuchos::RCP coarseColMap = MapFactory::Build(amalgP->getDomainMap()->lib(), + Teuchos::OrdinalTraits::invalid(), + unsmooshColMapGIDs(), // View, + indexBase, + amalgP->getDomainMap()->getComm()); + + // Assemble unamalgamated P + Teuchos::RCP unamalgPCrs = CrsMatrixFactory::Build(unamalgA->getRowMap(), + coarseColMap, + maxDofPerNode * amalgP->getLocalMaxNumRowEntries()); + for (size_t i = 0; i < rowCount; i++) { + unamalgPCrs->insertLocalValues(i, + newPCols.view(newPRowPtr[i], newPRowPtr[i + 1] - newPRowPtr[i]), + newPVals.view(newPRowPtr[i], newPRowPtr[i + 1] - newPRowPtr[i])); + } + unamalgPCrs->fillComplete(coarseDomainMap, unamalgA->getRowMap()); + Teuchos::RCP unamalgP = Teuchos::rcp(new CrsMatrixWrap(unamalgPCrs)); -} /* MueLu */ + Set(coarseLevel, "P", unamalgP); +} +} // namespace MueLu #endif /* PACKAGES_MUELU_SRC_GRAPH_MUELU_UNSMOOSHFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_decl.hpp index 51a333e52d81..319fbfcc56eb 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_decl.hpp @@ -47,7 +47,6 @@ #ifndef PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DECL_HPP_ #define PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DECL_HPP_ - #include "MueLu_ConfigDefs.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" #include "MueLu_VariableDofLaplacianFactory_fwd.hpp" @@ -57,309 +56,295 @@ namespace MueLu { - /*! - @class VariableDofLaplacianFactory class. - @brief Factory for building scalar Laplace operator (that is used as fake operator for variable dof size problems) +/*! + @class VariableDofLaplacianFactory class. + @brief Factory for building scalar Laplace operator (that is used as fake operator for variable dof size problems) - Build distance Laplacian associated with input matrix A (which might have a variable number of DOFs per node). - Coordinates are needed to calculate the distance laplacian values. The user-provided array "DofPresent" stores whether - an array is present (=1) or not (=0) in the matrix. The length of the array is number of nodes * maxDofPerNode and - therefore it might be larger or equal than the number of rows in the input matrix. + Build distance Laplacian associated with input matrix A (which might have a variable number of DOFs per node). + Coordinates are needed to calculate the distance laplacian values. The user-provided array "DofPresent" stores whether + an array is present (=1) or not (=0) in the matrix. The length of the array is number of nodes * maxDofPerNode and + therefore it might be larger or equal than the number of rows in the input matrix. - The factory produces the distance laplacian matrix A as output (with one dof per node) as well as the coarse version - of the DofStatus (needed for the next coarser level), containing information about (artificial) Dirichlet rows in the matrix. + The factory produces the distance laplacian matrix A as output (with one dof per node) as well as the coarse version + of the DofStatus (needed for the next coarser level), containing information about (artificial) Dirichlet rows in the matrix. - @ingroup MueLuGraphClasses + @ingroup MueLuGraphClasses - ## Input/output of VariableDofLaplacianFactory ## + ## Input/output of VariableDofLaplacianFactory ## - ### User parameters of VariableDofLaplacianFactory ### - Parameter | type | default | master.xml | validated | requested | description - ----------|------|---------|:----------:|:---------:|:---------:|------------ - A | Factory | null | | * | * | Generating factory of the input matrix A with potentially variable number of DOFs. Might be padded or non-padded. Padded means, that the matrix has additional artificial rows and columns to have a constant number of DOFs per node. - Coordinates | Factory | null | | * | * | Generating factory for Coordinates needed for building distance laplacian. - DofPresent | Teuchos::ArrayRCP | NoFactory | | | (*) | Optional array containing information whether DOF is actually present in matrix or not. - Advanced Dirichlet: threshold | double | 1e-5 | | * | | Drop tolerance for Dirichlet detection - Variable DOF amalgamation: threshold | double | 1.8e-9 | | * | | Drop tolerance for amalgamation process - maxDofPerNode | int | 1 | | * | | Maximum number of DOFs per node + ### User parameters of VariableDofLaplacianFactory ### + Parameter | type | default | master.xml | validated | requested | description + ----------|------|---------|:----------:|:---------:|:---------:|------------ + A | Factory | null | | * | * | Generating factory of the input matrix A with potentially variable number of DOFs. Might be padded or non-padded. Padded means, that the matrix has additional artificial rows and columns to have a constant number of DOFs per node. + Coordinates | Factory | null | | * | * | Generating factory for Coordinates needed for building distance laplacian. + DofPresent | Teuchos::ArrayRCP | NoFactory | | | (*) | Optional array containing information whether DOF is actually present in matrix or not. + Advanced Dirichlet: threshold | double | 1e-5 | | * | | Drop tolerance for Dirichlet detection + Variable DOF amalgamation: threshold | double | 1.8e-9 | | * | | Drop tolerance for amalgamation process + maxDofPerNode | int | 1 | | * | | Maximum number of DOFs per node - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see VariableDofLaplacianFactory::GetValidParameters).
- The * in the @c requested column states that the data is requested as input with all dependencies (see VariableDofLaplacianFactory::DeclareInput). + The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
+ The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see VariableDofLaplacianFactory::GetValidParameters).
+ The * in the @c requested column states that the data is requested as input with all dependencies (see VariableDofLaplacianFactory::DeclareInput). - ### Variables provided by VariableDofLaplacianFactory ### + ### Variables provided by VariableDofLaplacianFactory ### - After TentativePFactory::Build the following data is available (if requested) + After TentativePFactory::Build the following data is available (if requested) - Parameter | generated by | description - ----------|--------------|------------ - | A | VariableDofLaplacianFactory | Laplacian operator - | DofStatus | VariableDofLaplacianFactory | Status array for next coarse level - */ - template - class VariableDofLaplacianFactory : public SingleLevelFactoryBase { + Parameter | generated by | description + ----------|--------------|------------ + | A | VariableDofLaplacianFactory | Laplacian operator + | DofStatus | VariableDofLaplacianFactory | Status array for next coarse level +*/ +template +class VariableDofLaplacianFactory : public SingleLevelFactoryBase { #undef MUELU_VARIABLEDOFLAPLACIANFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ - - //! Constructor - VariableDofLaplacianFactory(); + public: + //! @name Constructors/Destructors. + //@{ - //! Destructor - virtual ~VariableDofLaplacianFactory() { } + //! Constructor + VariableDofLaplacianFactory(); - RCP GetValidParameterList() const; + //! Destructor + virtual ~VariableDofLaplacianFactory() {} - //@} + RCP GetValidParameterList() const; - //! Input - //@{ + //@} - void DeclareInput(Level ¤tLevel) const; + //! Input + //@{ - //@} + void DeclareInput(Level& currentLevel) const; - void Build(Level ¤tLevel) const; // Build + //@} - private: + void Build(Level& currentLevel) const; // Build - void buildPaddedMap(const Teuchos::ArrayRCP & dofPresent, std::vector & map, size_t nDofs) const; - void assignGhostLocalNodeIds(const Teuchos::RCP & rowDofMap, const Teuchos::RCP & colDofMap, std::vector & myLocalNodeIds, const std::vector & dofMap, size_t maxDofPerNode, size_t& nLocalNodes, size_t& nLocalPlusGhostNodes, Teuchos::RCP< const Teuchos::Comm< int > > comm) const; - void squeezeOutNnzs(Teuchos::ArrayRCP & rowPtr, Teuchos::ArrayRCP & cols, Teuchos::ArrayRCP & vals, const std::vector& keep) const; - void buildLaplacian(const Teuchos::ArrayRCP& rowPtr, const Teuchos::ArrayRCP& cols, Teuchos::ArrayRCP& vals, const size_t& numdim, const RCP::magnitudeType,LocalOrdinal,GlobalOrdinal,Node> > & ghostedCoords) const; + private: + void buildPaddedMap(const Teuchos::ArrayRCP& dofPresent, std::vector& map, size_t nDofs) const; + void assignGhostLocalNodeIds(const Teuchos::RCP& rowDofMap, const Teuchos::RCP& colDofMap, std::vector& myLocalNodeIds, const std::vector& dofMap, size_t maxDofPerNode, size_t& nLocalNodes, size_t& nLocalPlusGhostNodes, Teuchos::RCP > comm) const; + void squeezeOutNnzs(Teuchos::ArrayRCP& rowPtr, Teuchos::ArrayRCP& cols, Teuchos::ArrayRCP& vals, const std::vector& keep) const; + void buildLaplacian(const Teuchos::ArrayRCP& rowPtr, const Teuchos::ArrayRCP& cols, Teuchos::ArrayRCP& vals, const size_t& numdim, const RCP::magnitudeType, LocalOrdinal, GlobalOrdinal, Node> >& ghostedCoords) const; - template - void MueLu_az_sort(listType list[], size_t N, size_t list2[], Scalar list3[]) const { - /* local variables */ + template + void MueLu_az_sort(listType list[], size_t N, size_t list2[], Scalar list3[]) const { + /* local variables */ - listType RR, K; - size_t l, r, j, i; - int flag; - size_t RR2; - Scalar RR3; + listType RR, K; + size_t l, r, j, i; + int flag; + size_t RR2; + Scalar RR3; - /*********************** execution begins ******************************/ + /*********************** execution begins ******************************/ - if (N <= 1) return; + if (N <= 1) return; - l = N / 2 + 1; - r = N - 1; - l = l - 1; - RR = list[l - 1]; - K = list[l - 1]; + l = N / 2 + 1; + r = N - 1; + l = l - 1; + RR = list[l - 1]; + K = list[l - 1]; - if ((list2 != NULL) && (list3 != NULL)) { - RR2 = list2[l - 1]; - RR3 = list3[l - 1]; - while (r != 0) { - j = l; - flag = 1; + if ((list2 != NULL) && (list3 != NULL)) { + RR2 = list2[l - 1]; + RR3 = list3[l - 1]; + while (r != 0) { + j = l; + flag = 1; - while (flag == 1) { - i = j; - j = j + j; + while (flag == 1) { + i = j; + j = j + j; - if (j > r + 1) + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (list[j] > list[j - 1]) j = j + 1; + + if (list[j - 1] > K) { + list[i - 1] = list[j - 1]; + list2[i - 1] = list2[j - 1]; + list3[i - 1] = list3[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (list[j] > list[j - 1]) j = j + 1; - - if (list[j - 1] > K) { - list[ i - 1] = list[ j - 1]; - list2[i - 1] = list2[j - 1]; - list3[i - 1] = list3[j - 1]; - } - else { - flag = 0; - } } } - - list[ i - 1] = RR; - list2[i - 1] = RR2; - list3[i - 1] = RR3; - - if (l == 1) { - RR = list [r]; - RR2 = list2[r]; - RR3 = list3[r]; - - K = list[r]; - list[r ] = list[0]; - list2[r] = list2[0]; - list3[r] = list3[0]; - r = r - 1; - } - else { - l = l - 1; - RR = list[ l - 1]; - RR2 = list2[l - 1]; - RR3 = list3[l - 1]; - K = list[l - 1]; - } } - list[ 0] = RR; - list2[0] = RR2; - list3[0] = RR3; + list[i - 1] = RR; + list2[i - 1] = RR2; + list3[i - 1] = RR3; + + if (l == 1) { + RR = list[r]; + RR2 = list2[r]; + RR3 = list3[r]; + + K = list[r]; + list[r] = list[0]; + list2[r] = list2[0]; + list3[r] = list3[0]; + r = r - 1; + } else { + l = l - 1; + RR = list[l - 1]; + RR2 = list2[l - 1]; + RR3 = list3[l - 1]; + K = list[l - 1]; + } } - else if (list2 != NULL) { - RR2 = list2[l - 1]; - while (r != 0) { - j = l; - flag = 1; - while (flag == 1) { - i = j; - j = j + j; + list[0] = RR; + list2[0] = RR2; + list3[0] = RR3; + } else if (list2 != NULL) { + RR2 = list2[l - 1]; + while (r != 0) { + j = l; + flag = 1; + + while (flag == 1) { + i = j; + j = j + j; + + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (list[j] > list[j - 1]) j = j + 1; - if (j > r + 1) + if (list[j - 1] > K) { + list[i - 1] = list[j - 1]; + list2[i - 1] = list2[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (list[j] > list[j - 1]) j = j + 1; - - if (list[j - 1] > K) { - list[ i - 1] = list[ j - 1]; - list2[i - 1] = list2[j - 1]; - } - else { - flag = 0; - } } } - - list[ i - 1] = RR; - list2[i - 1] = RR2; - - if (l == 1) { - RR = list [r]; - RR2 = list2[r]; - - K = list[r]; - list[r ] = list[0]; - list2[r] = list2[0]; - r = r - 1; - } - else { - l = l - 1; - RR = list[ l - 1]; - RR2 = list2[l - 1]; - K = list[l - 1]; - } } - list[ 0] = RR; - list2[0] = RR2; + list[i - 1] = RR; + list2[i - 1] = RR2; + + if (l == 1) { + RR = list[r]; + RR2 = list2[r]; + + K = list[r]; + list[r] = list[0]; + list2[r] = list2[0]; + r = r - 1; + } else { + l = l - 1; + RR = list[l - 1]; + RR2 = list2[l - 1]; + K = list[l - 1]; + } } - else if (list3 != NULL) { - RR3 = list3[l - 1]; - while (r != 0) { - j = l; - flag = 1; - while (flag == 1) { - i = j; - j = j + j; + list[0] = RR; + list2[0] = RR2; + } else if (list3 != NULL) { + RR3 = list3[l - 1]; + while (r != 0) { + j = l; + flag = 1; + + while (flag == 1) { + i = j; + j = j + j; - if (j > r + 1) + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (list[j] > list[j - 1]) j = j + 1; + + if (list[j - 1] > K) { + list[i - 1] = list[j - 1]; + list3[i - 1] = list3[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (list[j] > list[j - 1]) j = j + 1; - - if (list[j - 1] > K) { - list[ i - 1] = list[ j - 1]; - list3[i - 1] = list3[j - 1]; - } - else { - flag = 0; - } } } + } - list[ i - 1] = RR; - list3[i - 1] = RR3; - - if (l == 1) { - RR = list [r]; - RR3 = list3[r]; - - K = list[r]; - list[r ] = list[0]; - list3[r] = list3[0]; - r = r - 1; - } - else { - l = l - 1; - RR = list[ l - 1]; - RR3 = list3[l - 1]; - K = list[l - 1]; - } + list[i - 1] = RR; + list3[i - 1] = RR3; + + if (l == 1) { + RR = list[r]; + RR3 = list3[r]; + + K = list[r]; + list[r] = list[0]; + list3[r] = list3[0]; + r = r - 1; + } else { + l = l - 1; + RR = list[l - 1]; + RR3 = list3[l - 1]; + K = list[l - 1]; } + } - list[ 0] = RR; - list3[0] = RR3; + list[0] = RR; + list3[0] = RR3; - } - else { - while (r != 0) { - j = l; - flag = 1; + } else { + while (r != 0) { + j = l; + flag = 1; - while (flag == 1) { - i = j; - j = j + j; + while (flag == 1) { + i = j; + j = j + j; - if (j > r + 1) + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (list[j] > list[j - 1]) j = j + 1; + + if (list[j - 1] > K) { + list[i - 1] = list[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (list[j] > list[j - 1]) j = j + 1; - - if (list[j - 1] > K) { - list[ i - 1] = list[ j - 1]; - } - else { - flag = 0; - } } } + } - list[ i - 1] = RR; + list[i - 1] = RR; - if (l == 1) { - RR = list [r]; + if (l == 1) { + RR = list[r]; - K = list[r]; - list[r ] = list[0]; - r = r - 1; - } - else { - l = l - 1; - RR = list[ l - 1]; - K = list[l - 1]; - } + K = list[r]; + list[r] = list[0]; + r = r - 1; + } else { + l = l - 1; + RR = list[l - 1]; + K = list[l - 1]; } - - list[ 0] = RR; } + + list[0] = RR; } + } - }; //class CoalesceDropFactory +}; // class CoalesceDropFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_VARIABLEDOFLAPLACIANFACTORY_SHORT - #endif /* PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_def.hpp index 21aa1c77369d..3742e119387f 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_def.hpp @@ -47,559 +47,551 @@ #ifndef PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DEF_HPP_ #define PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DEF_HPP_ - #include "MueLu_Monitor.hpp" #include "MueLu_VariableDofLaplacianFactory_decl.hpp" namespace MueLu { - template - RCP VariableDofLaplacianFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP VariableDofLaplacianFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - validParamList->set< double > ("Advanced Dirichlet: threshold", 1e-5, "Drop tolerance for Dirichlet detection"); - validParamList->set< double > ("Variable DOF amalgamation: threshold", 1.8e-9, "Drop tolerance for amalgamation process"); - validParamList->set< int > ("maxDofPerNode", 1, "Maximum number of DOFs per node"); + validParamList->set("Advanced Dirichlet: threshold", 1e-5, "Drop tolerance for Dirichlet detection"); + validParamList->set("Variable DOF amalgamation: threshold", 1.8e-9, "Drop tolerance for amalgamation process"); + validParamList->set("maxDofPerNode", 1, "Maximum number of DOFs per node"); - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< RCP >("Coordinates", Teuchos::null, "Generating factory for Coordinates"); + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set >("Coordinates", Teuchos::null, "Generating factory for Coordinates"); - return validParamList; - } + return validParamList; +} - template - VariableDofLaplacianFactory::VariableDofLaplacianFactory() { } +template +VariableDofLaplacianFactory::VariableDofLaplacianFactory() {} - template - void VariableDofLaplacianFactory::DeclareInput(Level ¤tLevel) const { - Input(currentLevel, "A"); - Input(currentLevel, "Coordinates"); +template +void VariableDofLaplacianFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + Input(currentLevel, "Coordinates"); - //if (currentLevel.GetLevelID() == 0) // TODO check for finest level (special treatment) - if (currentLevel.IsAvailable("DofPresent", NoFactory::get())) { - currentLevel.DeclareInput("DofPresent", NoFactory::get(), this); - } + // if (currentLevel.GetLevelID() == 0) // TODO check for finest level (special treatment) + if (currentLevel.IsAvailable("DofPresent", NoFactory::get())) { + currentLevel.DeclareInput("DofPresent", NoFactory::get(), this); } +} - template - void VariableDofLaplacianFactory::Build(Level ¤tLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - typedef Teuchos::ScalarTraits STS; +template +void VariableDofLaplacianFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + typedef Teuchos::ScalarTraits STS; - const ParameterList & pL = GetParameterList(); + const ParameterList& pL = GetParameterList(); - RCP A = Get< RCP >(currentLevel, "A"); + RCP A = Get >(currentLevel, "A"); - Teuchos::RCP< const Teuchos::Comm< int > > comm = A->getRowMap()->getComm(); - Xpetra::UnderlyingLib lib = A->getRowMap()->lib(); + Teuchos::RCP > comm = A->getRowMap()->getComm(); + Xpetra::UnderlyingLib lib = A->getRowMap()->lib(); - typedef Xpetra::MultiVector::magnitudeType,LO,GO,NO> dxMV; - RCP Coords = Get< RCP::magnitudeType,LO,GO,NO> > >(currentLevel, "Coordinates"); + typedef Xpetra::MultiVector::magnitudeType, LO, GO, NO> dxMV; + RCP Coords = Get::magnitudeType, LO, GO, NO> > >(currentLevel, "Coordinates"); - int maxDofPerNode = pL.get("maxDofPerNode"); - Scalar dirDropTol = Teuchos::as(pL.get("Advanced Dirichlet: threshold")); // "ML advnaced Dirichlet: threshold" - Scalar amalgDropTol = Teuchos::as(pL.get("Variable DOF amalgamation: threshold")); //"variable DOF amalgamation: threshold") + int maxDofPerNode = pL.get("maxDofPerNode"); + Scalar dirDropTol = Teuchos::as(pL.get("Advanced Dirichlet: threshold")); // "ML advnaced Dirichlet: threshold" + Scalar amalgDropTol = Teuchos::as(pL.get("Variable DOF amalgamation: threshold")); //"variable DOF amalgamation: threshold") - bool bHasZeroDiagonal = false; - Teuchos::ArrayRCP dirOrNot = MueLu::Utilities::DetectDirichletRowsExt(*A,bHasZeroDiagonal,STS::magnitude(dirDropTol)); + bool bHasZeroDiagonal = false; + Teuchos::ArrayRCP dirOrNot = MueLu::Utilities::DetectDirichletRowsExt(*A, bHasZeroDiagonal, STS::magnitude(dirDropTol)); - // check availability of DofPresent array - Teuchos::ArrayRCP dofPresent; - if (currentLevel.IsAvailable("DofPresent", NoFactory::get())) { - dofPresent = currentLevel.Get< Teuchos::ArrayRCP >("DofPresent", NoFactory::get()); - } else { - // TAW: not sure about size of array. We cannot determine the expected size in the non-padded case correctly... - dofPresent = Teuchos::ArrayRCP(A->getRowMap()->getLocalNumElements(),1); - } + // check availability of DofPresent array + Teuchos::ArrayRCP dofPresent; + if (currentLevel.IsAvailable("DofPresent", NoFactory::get())) { + dofPresent = currentLevel.Get >("DofPresent", NoFactory::get()); + } else { + // TAW: not sure about size of array. We cannot determine the expected size in the non-padded case correctly... + dofPresent = Teuchos::ArrayRCP(A->getRowMap()->getLocalNumElements(), 1); + } - // map[k] indicates that the kth dof in the variable dof matrix A would - // correspond to the map[k]th dof in the padded system. If, i.e., it is - // map[35] = 39 then dof no 35 in the variable dof matrix A corresponds to - // row map id 39 in an imaginary padded matrix Apadded. - // The padded system is never built but would be the associated matrix if - // every node had maxDofPerNode dofs. - std::vector map(A->getLocalNumRows()); - this->buildPaddedMap(dofPresent, map, A->getLocalNumRows()); + // map[k] indicates that the kth dof in the variable dof matrix A would + // correspond to the map[k]th dof in the padded system. If, i.e., it is + // map[35] = 39 then dof no 35 in the variable dof matrix A corresponds to + // row map id 39 in an imaginary padded matrix Apadded. + // The padded system is never built but would be the associated matrix if + // every node had maxDofPerNode dofs. + std::vector map(A->getLocalNumRows()); + this->buildPaddedMap(dofPresent, map, A->getLocalNumRows()); - // map of size of number of DOFs containing local node id (dof id -> node id, inclusive ghosted dofs/nodes) - std::vector myLocalNodeIds(A->getColMap()->getLocalNumElements()); // possible maximum (we need the ghost nodes, too) + // map of size of number of DOFs containing local node id (dof id -> node id, inclusive ghosted dofs/nodes) + std::vector myLocalNodeIds(A->getColMap()->getLocalNumElements()); // possible maximum (we need the ghost nodes, too) - // assign the local node ids for the ghosted nodes - size_t nLocalNodes, nLocalPlusGhostNodes; - this->assignGhostLocalNodeIds(A->getRowMap(), A->getColMap(), myLocalNodeIds, map, maxDofPerNode, nLocalNodes, nLocalPlusGhostNodes, comm); + // assign the local node ids for the ghosted nodes + size_t nLocalNodes, nLocalPlusGhostNodes; + this->assignGhostLocalNodeIds(A->getRowMap(), A->getColMap(), myLocalNodeIds, map, maxDofPerNode, nLocalNodes, nLocalPlusGhostNodes, comm); - //RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)," ",0,false,10,false, true); + // RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)," ",0,false,10,false, true); - TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as(dofPresent.size()) != Teuchos::as(nLocalNodes * maxDofPerNode),MueLu::Exceptions::RuntimeError,"VariableDofLaplacianFactory: size of provided DofPresent array is " << dofPresent.size() << " but should be " << nLocalNodes * maxDofPerNode << " on the current processor."); + TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as(dofPresent.size()) != Teuchos::as(nLocalNodes * maxDofPerNode), MueLu::Exceptions::RuntimeError, "VariableDofLaplacianFactory: size of provided DofPresent array is " << dofPresent.size() << " but should be " << nLocalNodes * maxDofPerNode << " on the current processor."); - // put content of assignGhostLocalNodeIds here... + // put content of assignGhostLocalNodeIds here... - // fill nodal maps + // fill nodal maps - Teuchos::ArrayView< const GlobalOrdinal > myGids = A->getColMap()->getLocalElementList(); + Teuchos::ArrayView myGids = A->getColMap()->getLocalElementList(); - // vector containing row/col gids of amalgamated matrix (with holes) + // vector containing row/col gids of amalgamated matrix (with holes) - size_t nLocalDofs = A->getRowMap()->getLocalNumElements(); - size_t nLocalPlusGhostDofs = A->getColMap()->getLocalNumElements(); + size_t nLocalDofs = A->getRowMap()->getLocalNumElements(); + size_t nLocalPlusGhostDofs = A->getColMap()->getLocalNumElements(); - // myLocalNodeIds (dof -> node) + // myLocalNodeIds (dof -> node) - Teuchos::Array amalgRowMapGIDs(nLocalNodes); - Teuchos::Array amalgColMapGIDs(nLocalPlusGhostNodes); + Teuchos::Array amalgRowMapGIDs(nLocalNodes); + Teuchos::Array amalgColMapGIDs(nLocalPlusGhostNodes); - // initialize - size_t count = 0; - if (nLocalDofs > 0) { - amalgRowMapGIDs[count] = myGids[0]; - amalgColMapGIDs[count] = myGids[0]; + // initialize + size_t count = 0; + if (nLocalDofs > 0) { + amalgRowMapGIDs[count] = myGids[0]; + amalgColMapGIDs[count] = myGids[0]; + count++; + } + + for (size_t i = 1; i < nLocalDofs; i++) { + if (myLocalNodeIds[i] != myLocalNodeIds[i - 1]) { + amalgRowMapGIDs[count] = myGids[i]; + amalgColMapGIDs[count] = myGids[i]; count++; } + } - for(size_t i = 1; i < nLocalDofs; i++) { - if (myLocalNodeIds[i] != myLocalNodeIds[i-1]) { - amalgRowMapGIDs[count] = myGids[i]; - amalgColMapGIDs[count] = myGids[i]; - count++; - } - } + RCP tempAmalgColVec = GOVectorFactory::Build(A->getDomainMap()); + { + Teuchos::ArrayRCP tempAmalgColVecData = tempAmalgColVec->getDataNonConst(0); + for (size_t i = 0; i < A->getDomainMap()->getLocalNumElements(); i++) + tempAmalgColVecData[i] = amalgColMapGIDs[myLocalNodeIds[i]]; + } - RCP tempAmalgColVec = GOVectorFactory::Build(A->getDomainMap()); - { - Teuchos::ArrayRCP tempAmalgColVecData = tempAmalgColVec->getDataNonConst(0); - for (size_t i = 0; i < A->getDomainMap()->getLocalNumElements(); i++) - tempAmalgColVecData[i] = amalgColMapGIDs[ myLocalNodeIds[i]]; - } + RCP tempAmalgColVecTarget = GOVectorFactory::Build(A->getColMap()); + Teuchos::RCP dofImporter = ImportFactory::Build(A->getDomainMap(), A->getColMap()); + tempAmalgColVecTarget->doImport(*tempAmalgColVec, *dofImporter, Xpetra::INSERT); - RCP tempAmalgColVecTarget = GOVectorFactory::Build(A->getColMap()); - Teuchos::RCP dofImporter = ImportFactory::Build(A->getDomainMap(), A->getColMap()); - tempAmalgColVecTarget->doImport(*tempAmalgColVec, *dofImporter, Xpetra::INSERT); + { + Teuchos::ArrayRCP tempAmalgColVecBData = tempAmalgColVecTarget->getData(0); + // copy from dof vector to nodal vector + for (size_t i = 0; i < myLocalNodeIds.size(); i++) + amalgColMapGIDs[myLocalNodeIds[i]] = tempAmalgColVecBData[i]; + } - { - Teuchos::ArrayRCP tempAmalgColVecBData = tempAmalgColVecTarget->getData(0); - // copy from dof vector to nodal vector - for (size_t i = 0; i < myLocalNodeIds.size(); i++) - amalgColMapGIDs[ myLocalNodeIds[i]] = tempAmalgColVecBData[i]; + Teuchos::RCP amalgRowMap = MapFactory::Build(lib, + Teuchos::OrdinalTraits::invalid(), + amalgRowMapGIDs(), // View, + A->getRowMap()->getIndexBase(), + comm); + + Teuchos::RCP amalgColMap = MapFactory::Build(lib, + Teuchos::OrdinalTraits::invalid(), + amalgColMapGIDs(), // View, + A->getRangeMap()->getIndexBase(), + comm); + + // end fill nodal maps + + // start variable dof amalgamation + + Teuchos::RCP Awrap = Teuchos::rcp_dynamic_cast(A); + Teuchos::RCP Acrs = Awrap->getCrsMatrix(); + // Acrs->describe(*fancy, Teuchos::VERB_EXTREME); + + size_t nNonZeros = 0; + std::vector isNonZero(nLocalPlusGhostDofs, false); + std::vector nonZeroList(nLocalPlusGhostDofs); // ??? + + // also used in DetectDirichletExt + Teuchos::RCP diagVecUnique = VectorFactory::Build(A->getRowMap()); + Teuchos::RCP diagVec = VectorFactory::Build(A->getColMap()); + A->getLocalDiagCopy(*diagVecUnique); + diagVec->doImport(*diagVecUnique, *dofImporter, Xpetra::INSERT); + Teuchos::ArrayRCP diagVecData = diagVec->getData(0); + + Teuchos::ArrayRCP rowptr(Acrs->getLocalNumRows()); + Teuchos::ArrayRCP colind(Acrs->getLocalNumEntries()); + Teuchos::ArrayRCP values(Acrs->getLocalNumEntries()); + Acrs->getAllValues(rowptr, colind, values); + + // create arrays for amalgamated matrix + Teuchos::ArrayRCP amalgRowPtr(nLocalNodes + 1); + Teuchos::ArrayRCP amalgCols(rowptr[rowptr.size() - 1]); + + LocalOrdinal oldBlockRow = 0; + LocalOrdinal blockRow = 0; + LocalOrdinal blockColumn = 0; + + size_t newNzs = 0; + amalgRowPtr[0] = newNzs; + + bool doNotDrop = false; + if (amalgDropTol == Teuchos::ScalarTraits::zero()) doNotDrop = true; + if (values.size() == 0) doNotDrop = true; + + for (decltype(rowptr.size()) i = 0; i < rowptr.size() - 1; i++) { + blockRow = std::floor(map[i] / maxDofPerNode); + if (blockRow != oldBlockRow) { + // zero out info recording nonzeros in oldBlockRow + for (size_t j = 0; j < nNonZeros; j++) isNonZero[nonZeroList[j]] = false; + nNonZeros = 0; + amalgRowPtr[blockRow] = newNzs; // record start of next row } - - Teuchos::RCP amalgRowMap = MapFactory::Build(lib, - Teuchos::OrdinalTraits::invalid(), - amalgRowMapGIDs(), //View, - A->getRowMap()->getIndexBase(), - comm); - - Teuchos::RCP amalgColMap = MapFactory::Build(lib, - Teuchos::OrdinalTraits::invalid(), - amalgColMapGIDs(), //View, - A->getRangeMap()->getIndexBase(), - comm); - - // end fill nodal maps - - - // start variable dof amalgamation - - Teuchos::RCP Awrap = Teuchos::rcp_dynamic_cast(A); - Teuchos::RCP Acrs = Awrap->getCrsMatrix(); - //Acrs->describe(*fancy, Teuchos::VERB_EXTREME); - - size_t nNonZeros = 0; - std::vector isNonZero(nLocalPlusGhostDofs,false); - std::vector nonZeroList(nLocalPlusGhostDofs); // ??? - - // also used in DetectDirichletExt - Teuchos::RCP diagVecUnique = VectorFactory::Build(A->getRowMap()); - Teuchos::RCP diagVec = VectorFactory::Build(A->getColMap()); - A->getLocalDiagCopy(*diagVecUnique); - diagVec->doImport(*diagVecUnique, *dofImporter, Xpetra::INSERT); - Teuchos::ArrayRCP< const Scalar > diagVecData = diagVec->getData(0); - - Teuchos::ArrayRCP rowptr(Acrs->getLocalNumRows()); - Teuchos::ArrayRCP colind(Acrs->getLocalNumEntries()); - Teuchos::ArrayRCP values(Acrs->getLocalNumEntries()); - Acrs->getAllValues(rowptr, colind, values); - - - // create arrays for amalgamated matrix - Teuchos::ArrayRCP amalgRowPtr(nLocalNodes+1); - Teuchos::ArrayRCP amalgCols(rowptr[rowptr.size()-1]); - - LocalOrdinal oldBlockRow = 0; - LocalOrdinal blockRow = 0; - LocalOrdinal blockColumn = 0; - - size_t newNzs = 0; - amalgRowPtr[0] = newNzs; - - bool doNotDrop = false; - if (amalgDropTol == Teuchos::ScalarTraits::zero()) doNotDrop = true; - if (values.size() == 0) doNotDrop = true; - - for(decltype(rowptr.size()) i = 0; i < rowptr.size()-1; i++) { - blockRow = std::floor( map[i] / maxDofPerNode); - if (blockRow != oldBlockRow) { - // zero out info recording nonzeros in oldBlockRow - for(size_t j = 0; j < nNonZeros; j++) isNonZero[nonZeroList[j]] = false; - nNonZeros = 0; - amalgRowPtr[blockRow] = newNzs; // record start of next row - } - for (size_t j = rowptr[i]; j < rowptr[i+1]; j++) { - if(doNotDrop == true || - ( STS::magnitude(values[j] / STS::magnitude(sqrt(STS::magnitude(diagVecData[i]) * STS::magnitude(diagVecData[colind[j]]))) ) >= STS::magnitude(amalgDropTol) )) { - blockColumn = myLocalNodeIds[colind[j]]; - if(isNonZero[blockColumn] == false) { - isNonZero[blockColumn] = true; - nonZeroList[nNonZeros++] = blockColumn; - amalgCols[newNzs++] = blockColumn; - } + for (size_t j = rowptr[i]; j < rowptr[i + 1]; j++) { + if (doNotDrop == true || + (STS::magnitude(values[j] / STS::magnitude(sqrt(STS::magnitude(diagVecData[i]) * STS::magnitude(diagVecData[colind[j]])))) >= STS::magnitude(amalgDropTol))) { + blockColumn = myLocalNodeIds[colind[j]]; + if (isNonZero[blockColumn] == false) { + isNonZero[blockColumn] = true; + nonZeroList[nNonZeros++] = blockColumn; + amalgCols[newNzs++] = blockColumn; } } - oldBlockRow = blockRow; } - amalgRowPtr[blockRow+1] = newNzs; - - TEUCHOS_TEST_FOR_EXCEPTION((blockRow+1 != Teuchos::as(nLocalNodes)) && (nLocalNodes !=0), MueLu::Exceptions::RuntimeError, "VariableDofsPerNodeAmalgamation: error, computed # block rows (" << blockRow+1 <<") != nLocalNodes (" << nLocalNodes <<")"); - - amalgCols.resize(amalgRowPtr[nLocalNodes]); - - // end variableDofAmalg - - // begin rm differentDofsCrossings - - // Remove matrix entries (i,j) where the ith node and the jth node have - // different dofs that are 'present' - // Specifically, on input: - // dofPresent[i*maxDofPerNode+k] indicates whether or not the kth - // dof at the ith node is present in the - // variable dof matrix (e.g., the ith node - // has an air pressure dof). true means - // the dof is present while false means it - // is not. - // We create a unique id for the ith node (i.e. uniqueId[i]) via - // sum_{k=0 to maxDofPerNode-1} dofPresent[i*maxDofPerNode+k]*2^k - // and use this unique idea to remove entries (i,j) when uniqueId[i]!=uniqueId[j] - - Teuchos::ArrayRCP uniqueId(nLocalPlusGhostNodes); // unique id associated with DOF - std::vector keep(amalgRowPtr[amalgRowPtr.size()-1],true); // keep connection associated with node - - size_t ii = 0; // iteration index for present dofs - for(decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size()-1; i++) { - LocalOrdinal temp = 1; // basis for dof-id - uniqueId[i] = 0; - for (decltype(maxDofPerNode) j = 0; j < maxDofPerNode; j++) { - if (dofPresent[ii++]) uniqueId[i] += temp; // encode dof to be present - temp = temp * 2; // check next dof - } + oldBlockRow = blockRow; + } + amalgRowPtr[blockRow + 1] = newNzs; + + TEUCHOS_TEST_FOR_EXCEPTION((blockRow + 1 != Teuchos::as(nLocalNodes)) && (nLocalNodes != 0), MueLu::Exceptions::RuntimeError, "VariableDofsPerNodeAmalgamation: error, computed # block rows (" << blockRow + 1 << ") != nLocalNodes (" << nLocalNodes << ")"); + + amalgCols.resize(amalgRowPtr[nLocalNodes]); + + // end variableDofAmalg + + // begin rm differentDofsCrossings + + // Remove matrix entries (i,j) where the ith node and the jth node have + // different dofs that are 'present' + // Specifically, on input: + // dofPresent[i*maxDofPerNode+k] indicates whether or not the kth + // dof at the ith node is present in the + // variable dof matrix (e.g., the ith node + // has an air pressure dof). true means + // the dof is present while false means it + // is not. + // We create a unique id for the ith node (i.e. uniqueId[i]) via + // sum_{k=0 to maxDofPerNode-1} dofPresent[i*maxDofPerNode+k]*2^k + // and use this unique idea to remove entries (i,j) when uniqueId[i]!=uniqueId[j] + + Teuchos::ArrayRCP uniqueId(nLocalPlusGhostNodes); // unique id associated with DOF + std::vector keep(amalgRowPtr[amalgRowPtr.size() - 1], true); // keep connection associated with node + + size_t ii = 0; // iteration index for present dofs + for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { + LocalOrdinal temp = 1; // basis for dof-id + uniqueId[i] = 0; + for (decltype(maxDofPerNode) j = 0; j < maxDofPerNode; j++) { + if (dofPresent[ii++]) uniqueId[i] += temp; // encode dof to be present + temp = temp * 2; // check next dof } + } - Teuchos::RCP nodeImporter = ImportFactory::Build(amalgRowMap, amalgColMap); + Teuchos::RCP nodeImporter = ImportFactory::Build(amalgRowMap, amalgColMap); - RCP nodeIdSrc = Xpetra::VectorFactory::Build(amalgRowMap,true); - RCP nodeIdTarget = Xpetra::VectorFactory::Build(amalgColMap,true); + RCP nodeIdSrc = Xpetra::VectorFactory::Build(amalgRowMap, true); + RCP nodeIdTarget = Xpetra::VectorFactory::Build(amalgColMap, true); - Teuchos::ArrayRCP< LocalOrdinal > nodeIdSrcData = nodeIdSrc->getDataNonConst(0); - for(decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size()-1; i++) { - nodeIdSrcData[i] = uniqueId[i]; - } + Teuchos::ArrayRCP nodeIdSrcData = nodeIdSrc->getDataNonConst(0); + for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { + nodeIdSrcData[i] = uniqueId[i]; + } - nodeIdTarget->doImport(*nodeIdSrc, *nodeImporter, Xpetra::INSERT); + nodeIdTarget->doImport(*nodeIdSrc, *nodeImporter, Xpetra::INSERT); - Teuchos::ArrayRCP< const LocalOrdinal > nodeIdTargetData = nodeIdTarget->getData(0); - for(decltype(uniqueId.size()) i = 0; i < uniqueId.size(); i++) { - uniqueId[i] = nodeIdTargetData[i]; - } + Teuchos::ArrayRCP nodeIdTargetData = nodeIdTarget->getData(0); + for (decltype(uniqueId.size()) i = 0; i < uniqueId.size(); i++) { + uniqueId[i] = nodeIdTargetData[i]; + } - // nodal comm uniqueId, myLocalNodeIds + // nodal comm uniqueId, myLocalNodeIds - // uniqueId now should contain ghosted data + // uniqueId now should contain ghosted data - for(decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size()-1; i++) { - for(size_t j = amalgRowPtr[i]; j < amalgRowPtr[i+1]; j++) { - if (uniqueId[i] != uniqueId[amalgCols[j]]) keep [j] = false; - } + for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { + for (size_t j = amalgRowPtr[i]; j < amalgRowPtr[i + 1]; j++) { + if (uniqueId[i] != uniqueId[amalgCols[j]]) keep[j] = false; } + } - // squeeze out hard-coded zeros from CSR arrays - Teuchos::ArrayRCP amalgVals; - this->squeezeOutNnzs(amalgRowPtr,amalgCols,amalgVals,keep); + // squeeze out hard-coded zeros from CSR arrays + Teuchos::ArrayRCP amalgVals; + this->squeezeOutNnzs(amalgRowPtr, amalgCols, amalgVals, keep); - typedef Xpetra::MultiVectorFactory::magnitudeType,LO,GO,NO> dxMVf; - RCP ghostedCoords = dxMVf::Build(amalgColMap,Coords->getNumVectors()); + typedef Xpetra::MultiVectorFactory::magnitudeType, LO, GO, NO> dxMVf; + RCP ghostedCoords = dxMVf::Build(amalgColMap, Coords->getNumVectors()); - TEUCHOS_TEST_FOR_EXCEPTION(amalgRowMap->getLocalNumElements() != Coords->getMap()->getLocalNumElements(), MueLu::Exceptions::RuntimeError, "MueLu::VariableDofLaplacianFactory: the number of Coordinates and amalgamated nodes is inconsistent."); + TEUCHOS_TEST_FOR_EXCEPTION(amalgRowMap->getLocalNumElements() != Coords->getMap()->getLocalNumElements(), MueLu::Exceptions::RuntimeError, "MueLu::VariableDofLaplacianFactory: the number of Coordinates and amalgamated nodes is inconsistent."); - // Coords might live on a special nodeMap with consecutive ids (the natural numbering) - // The amalgRowMap might have the same number of entries, but with holes in the ids. - // e.g. 0,3,6,9,... as GIDs. - // We need the ghosted Coordinates in the buildLaplacian routine. But we access the data - // through getData only, i.e., the global ids are not interesting as long as we do not change - // the ordering of the entries - Coords->replaceMap(amalgRowMap); - ghostedCoords->doImport(*Coords, *nodeImporter, Xpetra::INSERT); + // Coords might live on a special nodeMap with consecutive ids (the natural numbering) + // The amalgRowMap might have the same number of entries, but with holes in the ids. + // e.g. 0,3,6,9,... as GIDs. + // We need the ghosted Coordinates in the buildLaplacian routine. But we access the data + // through getData only, i.e., the global ids are not interesting as long as we do not change + // the ordering of the entries + Coords->replaceMap(amalgRowMap); + ghostedCoords->doImport(*Coords, *nodeImporter, Xpetra::INSERT); - Teuchos::ArrayRCP lapVals(amalgRowPtr[nLocalNodes]); - this->buildLaplacian(amalgRowPtr, amalgCols, lapVals, Coords->getNumVectors(), ghostedCoords); + Teuchos::ArrayRCP lapVals(amalgRowPtr[nLocalNodes]); + this->buildLaplacian(amalgRowPtr, amalgCols, lapVals, Coords->getNumVectors(), ghostedCoords); - // sort column GIDs - for(decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size()-1; i++) { - size_t j = amalgRowPtr[i]; - this->MueLu_az_sort(&(amalgCols[j]), amalgRowPtr[i+1] - j, NULL, &(lapVals[j])); - } + // sort column GIDs + for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { + size_t j = amalgRowPtr[i]; + this->MueLu_az_sort(&(amalgCols[j]), amalgRowPtr[i + 1] - j, NULL, &(lapVals[j])); + } - // Caluclate status array for next level - Teuchos::Array status(nLocalNodes * maxDofPerNode); + // Caluclate status array for next level + Teuchos::Array status(nLocalNodes * maxDofPerNode); - // dir or not Teuchos::ArrayRCP dirOrNot - for(decltype(status.size()) i = 0; i < status.size(); i++) status[i] = 's'; - for(decltype(status.size()) i = 0; i < status.size(); i++) { - if(dofPresent[i] == false) status[i] = 'p'; - } - if(dirOrNot.size() > 0) { - for(decltype(map.size()) i = 0; i < map.size(); i++) { - if(dirOrNot[i] == true){ - status[map[i]] = 'd'; - } + // dir or not Teuchos::ArrayRCP dirOrNot + for (decltype(status.size()) i = 0; i < status.size(); i++) status[i] = 's'; + for (decltype(status.size()) i = 0; i < status.size(); i++) { + if (dofPresent[i] == false) status[i] = 'p'; + } + if (dirOrNot.size() > 0) { + for (decltype(map.size()) i = 0; i < map.size(); i++) { + if (dirOrNot[i] == true) { + status[map[i]] = 'd'; } } - Set(currentLevel,"DofStatus",status); - - // end status array - - Teuchos::RCP lapCrsMat = CrsMatrixFactory::Build(amalgRowMap, amalgColMap, 10); // TODO better approx for max nnz per row + } + Set(currentLevel, "DofStatus", status); - for (size_t i = 0; i < nLocalNodes; i++) { - lapCrsMat->insertLocalValues(i, amalgCols.view(amalgRowPtr[i],amalgRowPtr[i+1]-amalgRowPtr[i]), - lapVals.view(amalgRowPtr[i],amalgRowPtr[i+1]-amalgRowPtr[i])); - } - lapCrsMat->fillComplete(amalgRowMap,amalgRowMap); + // end status array - //lapCrsMat->describe(*fancy, Teuchos::VERB_EXTREME); + Teuchos::RCP lapCrsMat = CrsMatrixFactory::Build(amalgRowMap, amalgColMap, 10); // TODO better approx for max nnz per row - Teuchos::RCP lapMat = Teuchos::rcp(new CrsMatrixWrap(lapCrsMat)); - Set(currentLevel,"A",lapMat); + for (size_t i = 0; i < nLocalNodes; i++) { + lapCrsMat->insertLocalValues(i, amalgCols.view(amalgRowPtr[i], amalgRowPtr[i + 1] - amalgRowPtr[i]), + lapVals.view(amalgRowPtr[i], amalgRowPtr[i + 1] - amalgRowPtr[i])); } - - template - void VariableDofLaplacianFactory::buildLaplacian(const Teuchos::ArrayRCP& rowPtr, const Teuchos::ArrayRCP& cols, Teuchos::ArrayRCP& vals,const size_t& numdim, const RCP::magnitudeType,LocalOrdinal,GlobalOrdinal,Node> > & ghostedCoords) const { - TEUCHOS_TEST_FOR_EXCEPTION(numdim != 2 && numdim !=3, MueLu::Exceptions::RuntimeError,"buildLaplacian only works for 2d or 3d examples. numdim = " << numdim); - - if(numdim == 2) { // 2d - Teuchos::ArrayRCP< const typename Teuchos::ScalarTraits::magnitudeType > x = ghostedCoords->getData(0); - Teuchos::ArrayRCP< const typename Teuchos::ScalarTraits::magnitudeType > y = ghostedCoords->getData(1); - - for(decltype(rowPtr.size()) i = 0; i < rowPtr.size() - 1; i++) { - Scalar sum = Teuchos::ScalarTraits::zero(); - LocalOrdinal diag = -1; - for(size_t j = rowPtr[i]; j < rowPtr[i+1]; j++) { - if(cols[j] != Teuchos::as(i)){ - vals[j] = std::sqrt( (x[i]-x[cols[j]]) * (x[i]-x[cols[j]]) + - (y[i]-y[cols[j]]) * (y[i]-y[cols[j]]) ); - TEUCHOS_TEST_FOR_EXCEPTION(vals[j] == Teuchos::ScalarTraits::zero(), MueLu::Exceptions::RuntimeError, "buildLaplacian: error, " << i << " and " << cols[j] << " have same coordinates: " << x[i] << " and " << y[i]); - vals[j] = -Teuchos::ScalarTraits::one()/vals[j]; - sum = sum - vals[j]; - } - else diag = j; - } - if(sum == Teuchos::ScalarTraits::zero()) sum = Teuchos::ScalarTraits::one(); - TEUCHOS_TEST_FOR_EXCEPTION(diag == -1, MueLu::Exceptions::RuntimeError, "buildLaplacian: error, row " << i << " has zero diagonal!"); - - vals[diag] = sum; + lapCrsMat->fillComplete(amalgRowMap, amalgRowMap); + + // lapCrsMat->describe(*fancy, Teuchos::VERB_EXTREME); + + Teuchos::RCP lapMat = Teuchos::rcp(new CrsMatrixWrap(lapCrsMat)); + Set(currentLevel, "A", lapMat); +} + +template +void VariableDofLaplacianFactory::buildLaplacian(const Teuchos::ArrayRCP& rowPtr, const Teuchos::ArrayRCP& cols, Teuchos::ArrayRCP& vals, const size_t& numdim, const RCP::magnitudeType, LocalOrdinal, GlobalOrdinal, Node> >& ghostedCoords) const { + TEUCHOS_TEST_FOR_EXCEPTION(numdim != 2 && numdim != 3, MueLu::Exceptions::RuntimeError, "buildLaplacian only works for 2d or 3d examples. numdim = " << numdim); + + if (numdim == 2) { // 2d + Teuchos::ArrayRCP::magnitudeType> x = ghostedCoords->getData(0); + Teuchos::ArrayRCP::magnitudeType> y = ghostedCoords->getData(1); + + for (decltype(rowPtr.size()) i = 0; i < rowPtr.size() - 1; i++) { + Scalar sum = Teuchos::ScalarTraits::zero(); + LocalOrdinal diag = -1; + for (size_t j = rowPtr[i]; j < rowPtr[i + 1]; j++) { + if (cols[j] != Teuchos::as(i)) { + vals[j] = std::sqrt((x[i] - x[cols[j]]) * (x[i] - x[cols[j]]) + + (y[i] - y[cols[j]]) * (y[i] - y[cols[j]])); + TEUCHOS_TEST_FOR_EXCEPTION(vals[j] == Teuchos::ScalarTraits::zero(), MueLu::Exceptions::RuntimeError, "buildLaplacian: error, " << i << " and " << cols[j] << " have same coordinates: " << x[i] << " and " << y[i]); + vals[j] = -Teuchos::ScalarTraits::one() / vals[j]; + sum = sum - vals[j]; + } else + diag = j; } - } else { // 3d - Teuchos::ArrayRCP< const typename Teuchos::ScalarTraits::magnitudeType > x = ghostedCoords->getData(0); - Teuchos::ArrayRCP< const typename Teuchos::ScalarTraits::magnitudeType > y = ghostedCoords->getData(1); - Teuchos::ArrayRCP< const typename Teuchos::ScalarTraits::magnitudeType > z = ghostedCoords->getData(2); - - for(decltype(rowPtr.size()) i = 0; i < rowPtr.size() - 1; i++) { - Scalar sum = Teuchos::ScalarTraits::zero(); - LocalOrdinal diag = -1; - for(size_t j = rowPtr[i]; j < rowPtr[i+1]; j++) { - if(cols[j] != Teuchos::as(i)){ - vals[j] = std::sqrt( (x[i]-x[cols[j]]) * (x[i]-x[cols[j]]) + - (y[i]-y[cols[j]]) * (y[i]-y[cols[j]]) + - (z[i]-z[cols[j]]) * (z[i]-z[cols[j]]) ); - - TEUCHOS_TEST_FOR_EXCEPTION(vals[j] == Teuchos::ScalarTraits::zero(), MueLu::Exceptions::RuntimeError, "buildLaplacian: error, " << i << " and " << cols[j] << " have same coordinates: " << x[i] << " and " << y[i] << " and " << z[i]); - - vals[j] = -Teuchos::ScalarTraits::one()/vals[j]; - sum = sum - vals[j]; - } - else diag = j; - } - if(sum == Teuchos::ScalarTraits::zero()) sum = Teuchos::ScalarTraits::one(); - TEUCHOS_TEST_FOR_EXCEPTION(diag == -1, MueLu::Exceptions::RuntimeError, "buildLaplacian: error, row " << i << " has zero diagonal!"); + if (sum == Teuchos::ScalarTraits::zero()) sum = Teuchos::ScalarTraits::one(); + TEUCHOS_TEST_FOR_EXCEPTION(diag == -1, MueLu::Exceptions::RuntimeError, "buildLaplacian: error, row " << i << " has zero diagonal!"); - vals[diag] = sum; + vals[diag] = sum; + } + } else { // 3d + Teuchos::ArrayRCP::magnitudeType> x = ghostedCoords->getData(0); + Teuchos::ArrayRCP::magnitudeType> y = ghostedCoords->getData(1); + Teuchos::ArrayRCP::magnitudeType> z = ghostedCoords->getData(2); + + for (decltype(rowPtr.size()) i = 0; i < rowPtr.size() - 1; i++) { + Scalar sum = Teuchos::ScalarTraits::zero(); + LocalOrdinal diag = -1; + for (size_t j = rowPtr[i]; j < rowPtr[i + 1]; j++) { + if (cols[j] != Teuchos::as(i)) { + vals[j] = std::sqrt((x[i] - x[cols[j]]) * (x[i] - x[cols[j]]) + + (y[i] - y[cols[j]]) * (y[i] - y[cols[j]]) + + (z[i] - z[cols[j]]) * (z[i] - z[cols[j]])); + + TEUCHOS_TEST_FOR_EXCEPTION(vals[j] == Teuchos::ScalarTraits::zero(), MueLu::Exceptions::RuntimeError, "buildLaplacian: error, " << i << " and " << cols[j] << " have same coordinates: " << x[i] << " and " << y[i] << " and " << z[i]); + + vals[j] = -Teuchos::ScalarTraits::one() / vals[j]; + sum = sum - vals[j]; + } else + diag = j; } + if (sum == Teuchos::ScalarTraits::zero()) sum = Teuchos::ScalarTraits::one(); + TEUCHOS_TEST_FOR_EXCEPTION(diag == -1, MueLu::Exceptions::RuntimeError, "buildLaplacian: error, row " << i << " has zero diagonal!"); + + vals[diag] = sum; } } - - template - void VariableDofLaplacianFactory::squeezeOutNnzs(Teuchos::ArrayRCP & rowPtr, Teuchos::ArrayRCP & cols, Teuchos::ArrayRCP & vals, const std::vector& keep) const { - // get rid of nonzero entries that have 0's in them and properly change - // the row ptr array to reflect this removal (either vals == NULL or vals != NULL) - // Note, the arrays are squeezed. No memory is freed. - - size_t count = 0; - - size_t nRows = rowPtr.size()-1; - if(vals.size() > 0) { - for(size_t i = 0; i < nRows; i++) { - size_t newStart = count; - for(size_t j = rowPtr[i]; j < rowPtr[i+1]; j++) { - if(vals[j] != Teuchos::ScalarTraits::zero()) { - cols[count ] = cols[j]; - vals[count++] = vals[j]; - } +} + +template +void VariableDofLaplacianFactory::squeezeOutNnzs(Teuchos::ArrayRCP& rowPtr, Teuchos::ArrayRCP& cols, Teuchos::ArrayRCP& vals, const std::vector& keep) const { + // get rid of nonzero entries that have 0's in them and properly change + // the row ptr array to reflect this removal (either vals == NULL or vals != NULL) + // Note, the arrays are squeezed. No memory is freed. + + size_t count = 0; + + size_t nRows = rowPtr.size() - 1; + if (vals.size() > 0) { + for (size_t i = 0; i < nRows; i++) { + size_t newStart = count; + for (size_t j = rowPtr[i]; j < rowPtr[i + 1]; j++) { + if (vals[j] != Teuchos::ScalarTraits::zero()) { + cols[count] = cols[j]; + vals[count++] = vals[j]; } - rowPtr[i] = newStart; } - } else { - for (size_t i = 0; i < nRows; i++) { - size_t newStart = count; - for(size_t j = rowPtr[i]; j < rowPtr[i+1]; j++) { - if (keep[j] == true) { - cols[count++] = cols[j]; - } + rowPtr[i] = newStart; + } + } else { + for (size_t i = 0; i < nRows; i++) { + size_t newStart = count; + for (size_t j = rowPtr[i]; j < rowPtr[i + 1]; j++) { + if (keep[j] == true) { + cols[count++] = cols[j]; } - rowPtr[i] = newStart; } + rowPtr[i] = newStart; } - rowPtr[nRows] = count; } - - template - void VariableDofLaplacianFactory::buildPaddedMap(const Teuchos::ArrayRCP & dofPresent, std::vector & map, size_t nDofs) const { - size_t count = 0; - for (decltype(dofPresent.size()) i = 0; i < dofPresent.size(); i++) - if(dofPresent[i] == 1) map[count++] = Teuchos::as(i); - TEUCHOS_TEST_FOR_EXCEPTION(nDofs != count, MueLu::Exceptions::RuntimeError, "VariableDofLaplacianFactory::buildPaddedMap: #dofs in dofPresent does not match the expected value (number of rows of A): " << nDofs << " vs. " << count); + rowPtr[nRows] = count; +} + +template +void VariableDofLaplacianFactory::buildPaddedMap(const Teuchos::ArrayRCP& dofPresent, std::vector& map, size_t nDofs) const { + size_t count = 0; + for (decltype(dofPresent.size()) i = 0; i < dofPresent.size(); i++) + if (dofPresent[i] == 1) map[count++] = Teuchos::as(i); + TEUCHOS_TEST_FOR_EXCEPTION(nDofs != count, MueLu::Exceptions::RuntimeError, "VariableDofLaplacianFactory::buildPaddedMap: #dofs in dofPresent does not match the expected value (number of rows of A): " << nDofs << " vs. " << count); +} + +template +void VariableDofLaplacianFactory::assignGhostLocalNodeIds(const Teuchos::RCP& rowDofMap, const Teuchos::RCP& colDofMap, std::vector& myLocalNodeIds, const std::vector& dofMap, size_t maxDofPerNode, size_t& nLocalNodes, size_t& nLocalPlusGhostNodes, Teuchos::RCP > comm) const { + size_t nLocalDofs = rowDofMap->getLocalNumElements(); + size_t nLocalPlusGhostDofs = colDofMap->getLocalNumElements(); // TODO remove parameters + + // create importer for dof-based information + Teuchos::RCP importer = ImportFactory::Build(rowDofMap, colDofMap); + + // create a vector living on column map of A (dof based) + Teuchos::RCP localNodeIdsTemp = LOVectorFactory::Build(rowDofMap, true); + Teuchos::RCP localNodeIds = LOVectorFactory::Build(colDofMap, true); + + // fill local dofs (padded local ids) + { + Teuchos::ArrayRCP localNodeIdsTempData = localNodeIdsTemp->getDataNonConst(0); + for (size_t i = 0; i < localNodeIdsTemp->getLocalLength(); i++) + localNodeIdsTempData[i] = std::floor(dofMap[i] / maxDofPerNode); } - template - void VariableDofLaplacianFactory::assignGhostLocalNodeIds(const Teuchos::RCP & rowDofMap, const Teuchos::RCP & colDofMap, std::vector & myLocalNodeIds, const std::vector & dofMap, size_t maxDofPerNode, size_t& nLocalNodes, size_t& nLocalPlusGhostNodes, Teuchos::RCP< const Teuchos::Comm< int > > comm) const { + localNodeIds->doImport(*localNodeIdsTemp, *importer, Xpetra::INSERT); + Teuchos::ArrayRCP localNodeIdsData = localNodeIds->getData(0); - size_t nLocalDofs = rowDofMap->getLocalNumElements(); - size_t nLocalPlusGhostDofs = colDofMap->getLocalNumElements(); // TODO remove parameters + // Note: localNodeIds contains local ids for the padded version as vector values - // create importer for dof-based information - Teuchos::RCP importer = ImportFactory::Build(rowDofMap, colDofMap); + // we use Scalar instead of int as type + Teuchos::RCP myProcTemp = LOVectorFactory::Build(rowDofMap, true); + Teuchos::RCP myProc = LOVectorFactory::Build(colDofMap, true); - // create a vector living on column map of A (dof based) - Teuchos::RCP localNodeIdsTemp = LOVectorFactory::Build(rowDofMap,true); - Teuchos::RCP localNodeIds = LOVectorFactory::Build(colDofMap,true); - - // fill local dofs (padded local ids) - { - Teuchos::ArrayRCP< LocalOrdinal > localNodeIdsTempData = localNodeIdsTemp->getDataNonConst(0); - for(size_t i = 0; i < localNodeIdsTemp->getLocalLength(); i++) - localNodeIdsTempData[i] = std::floor( dofMap[i] / maxDofPerNode ); - } - - localNodeIds->doImport(*localNodeIdsTemp, *importer, Xpetra::INSERT); - Teuchos::ArrayRCP< const LocalOrdinal > localNodeIdsData = localNodeIds->getData(0); - - // Note: localNodeIds contains local ids for the padded version as vector values - - - // we use Scalar instead of int as type - Teuchos::RCP myProcTemp = LOVectorFactory::Build(rowDofMap,true); - Teuchos::RCP myProc = LOVectorFactory::Build(colDofMap,true); - - // fill local dofs (padded local ids) - { - Teuchos::ArrayRCP< LocalOrdinal > myProcTempData = myProcTemp->getDataNonConst(0); - for(size_t i = 0; i < myProcTemp->getLocalLength(); i++) - myProcTempData[i] = Teuchos::as(comm->getRank()); - } - myProc->doImport(*myProcTemp, *importer, Xpetra::INSERT); - Teuchos::ArrayRCP myProcData = myProc->getDataNonConst(0); // we have to modify the data (therefore the non-const version) - - // At this point, the ghost part of localNodeIds corresponds to the local ids - // associated with the current owning processor. We want to convert these to - // local ids associated with the processor on which these are ghosts. - // Thus we have to re-number them. In doing this re-numbering we must make sure - // that we find all ghosts with the same id & proc and assign a unique local - // id to this group (id&proc). To do this find, we sort all ghost entries in - // localNodeIds that are owned by the same processor. Then we can look for - // duplicates (i.e., several ghost entries corresponding to dofs with the same - // node id) easily and make sure these are all assigned to the same local id. - // To do the sorting we'll make a temporary copy of the ghosts via tempId and - // tempProc and sort this multiple times for each group owned by the same proc. - - - std::vector location(nLocalPlusGhostDofs - nLocalDofs + 1); - std::vector tempId (nLocalPlusGhostDofs - nLocalDofs + 1); - std::vector tempProc(nLocalPlusGhostDofs - nLocalDofs + 1); - - size_t notProcessed = nLocalDofs; // iteration index over all ghosted dofs - size_t tempIndex = 0; - size_t first = tempIndex; - LocalOrdinal neighbor; - - while (notProcessed < nLocalPlusGhostDofs) { - neighbor = myProcData[notProcessed]; // get processor id of not-processed element - first = tempIndex; - location[tempIndex] = notProcessed; - tempId[tempIndex++] = localNodeIdsData[notProcessed]; - myProcData[notProcessed] = -1 - neighbor; - - for(size_t i = notProcessed + 1; i < nLocalPlusGhostDofs; i++) { - if(myProcData[i] == neighbor) { - location[tempIndex] = i; - tempId[tempIndex++] = localNodeIdsData[i]; - myProcData[i] = -1; // mark as visited - } + // fill local dofs (padded local ids) + { + Teuchos::ArrayRCP myProcTempData = myProcTemp->getDataNonConst(0); + for (size_t i = 0; i < myProcTemp->getLocalLength(); i++) + myProcTempData[i] = Teuchos::as(comm->getRank()); + } + myProc->doImport(*myProcTemp, *importer, Xpetra::INSERT); + Teuchos::ArrayRCP myProcData = myProc->getDataNonConst(0); // we have to modify the data (therefore the non-const version) + + // At this point, the ghost part of localNodeIds corresponds to the local ids + // associated with the current owning processor. We want to convert these to + // local ids associated with the processor on which these are ghosts. + // Thus we have to re-number them. In doing this re-numbering we must make sure + // that we find all ghosts with the same id & proc and assign a unique local + // id to this group (id&proc). To do this find, we sort all ghost entries in + // localNodeIds that are owned by the same processor. Then we can look for + // duplicates (i.e., several ghost entries corresponding to dofs with the same + // node id) easily and make sure these are all assigned to the same local id. + // To do the sorting we'll make a temporary copy of the ghosts via tempId and + // tempProc and sort this multiple times for each group owned by the same proc. + + std::vector location(nLocalPlusGhostDofs - nLocalDofs + 1); + std::vector tempId(nLocalPlusGhostDofs - nLocalDofs + 1); + std::vector tempProc(nLocalPlusGhostDofs - nLocalDofs + 1); + + size_t notProcessed = nLocalDofs; // iteration index over all ghosted dofs + size_t tempIndex = 0; + size_t first = tempIndex; + LocalOrdinal neighbor; + + while (notProcessed < nLocalPlusGhostDofs) { + neighbor = myProcData[notProcessed]; // get processor id of not-processed element + first = tempIndex; + location[tempIndex] = notProcessed; + tempId[tempIndex++] = localNodeIdsData[notProcessed]; + myProcData[notProcessed] = -1 - neighbor; + + for (size_t i = notProcessed + 1; i < nLocalPlusGhostDofs; i++) { + if (myProcData[i] == neighbor) { + location[tempIndex] = i; + tempId[tempIndex++] = localNodeIdsData[i]; + myProcData[i] = -1; // mark as visited } - this->MueLu_az_sort(&(tempId[first]), tempIndex - first, &(location[first]), NULL); - for(size_t i = first; i < tempIndex; i++) tempProc[i] = neighbor; - - // increment index. Find next notProcessed dof index corresponding to first non-visited element - notProcessed++; - while ( (notProcessed < nLocalPlusGhostDofs) && (myProcData[notProcessed] < 0)) - notProcessed++; } - TEUCHOS_TEST_FOR_EXCEPTION(tempIndex != nLocalPlusGhostDofs-nLocalDofs, MueLu::Exceptions::RuntimeError,"Number of nonzero ghosts is inconsistent."); - - // Now assign ids to all ghost nodes (giving the same id to those with the - // same myProc[] and the same local id on the proc that actually owns the - // variable associated with the ghost - - nLocalNodes = 0; // initialize return value - if(nLocalDofs > 0) nLocalNodes = localNodeIdsData[nLocalDofs-1] + 1; + this->MueLu_az_sort(&(tempId[first]), tempIndex - first, &(location[first]), NULL); + for (size_t i = first; i < tempIndex; i++) tempProc[i] = neighbor; - nLocalPlusGhostNodes = nLocalNodes; // initialize return value - if(nLocalDofs < nLocalPlusGhostDofs) nLocalPlusGhostNodes++; // 1st ghost node is unique (not accounted for). number will be increased later, if there are more ghost nodes + // increment index. Find next notProcessed dof index corresponding to first non-visited element + notProcessed++; + while ((notProcessed < nLocalPlusGhostDofs) && (myProcData[notProcessed] < 0)) + notProcessed++; + } + TEUCHOS_TEST_FOR_EXCEPTION(tempIndex != nLocalPlusGhostDofs - nLocalDofs, MueLu::Exceptions::RuntimeError, "Number of nonzero ghosts is inconsistent."); - // check if two adjacent ghost dofs correspond to different nodes. To do this, - // check if they are from different processors or whether they have different - // local node ids + // Now assign ids to all ghost nodes (giving the same id to those with the + // same myProc[] and the same local id on the proc that actually owns the + // variable associated with the ghost - // loop over all (remaining) ghost dofs - for (size_t i = nLocalDofs+1; i < nLocalPlusGhostDofs; i++) { - size_t lagged = nLocalPlusGhostNodes-1; + nLocalNodes = 0; // initialize return value + if (nLocalDofs > 0) nLocalNodes = localNodeIdsData[nLocalDofs - 1] + 1; - // i is a new unique ghost node (not already accounted for) - if ((tempId[i-nLocalDofs] != tempId[i-1-nLocalDofs]) || - (tempProc[i-nLocalDofs] != tempProc[i-1-nLocalDofs])) - nLocalPlusGhostNodes++; // update number of ghost nodes - tempId[i-1-nLocalDofs] = lagged; - } - if (nLocalPlusGhostDofs > nLocalDofs) - tempId[nLocalPlusGhostDofs-1-nLocalDofs] = nLocalPlusGhostNodes - 1; + nLocalPlusGhostNodes = nLocalNodes; // initialize return value + if (nLocalDofs < nLocalPlusGhostDofs) nLocalPlusGhostNodes++; // 1st ghost node is unique (not accounted for). number will be increased later, if there are more ghost nodes - // fill myLocalNodeIds array. Start with local part (not ghosted) - for(size_t i = 0; i < nLocalDofs; i++) - myLocalNodeIds[i] = std::floor( dofMap[i] / maxDofPerNode ); + // check if two adjacent ghost dofs correspond to different nodes. To do this, + // check if they are from different processors or whether they have different + // local node ids - // copy ghosted nodal ids into myLocalNodeIds - for(size_t i = nLocalDofs; i < nLocalPlusGhostDofs; i++) - myLocalNodeIds[location[i-nLocalDofs]] = tempId[i-nLocalDofs]; + // loop over all (remaining) ghost dofs + for (size_t i = nLocalDofs + 1; i < nLocalPlusGhostDofs; i++) { + size_t lagged = nLocalPlusGhostNodes - 1; + // i is a new unique ghost node (not already accounted for) + if ((tempId[i - nLocalDofs] != tempId[i - 1 - nLocalDofs]) || + (tempProc[i - nLocalDofs] != tempProc[i - 1 - nLocalDofs])) + nLocalPlusGhostNodes++; // update number of ghost nodes + tempId[i - 1 - nLocalDofs] = lagged; } + if (nLocalPlusGhostDofs > nLocalDofs) + tempId[nLocalPlusGhostDofs - 1 - nLocalDofs] = nLocalPlusGhostNodes - 1; + + // fill myLocalNodeIds array. Start with local part (not ghosted) + for (size_t i = 0; i < nLocalDofs; i++) + myLocalNodeIds[i] = std::floor(dofMap[i] / maxDofPerNode); -} /* MueLu */ + // copy ghosted nodal ids into myLocalNodeIds + for (size_t i = nLocalDofs; i < nLocalPlusGhostDofs; i++) + myLocalNodeIds[location[i - nLocalDofs]] = tempId[i - nLocalDofs]; +} +} // namespace MueLu #endif /* PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase.hpp b/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase.hpp index 8a78e56cd9ca..3ab43d690931 100644 --- a/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase.hpp +++ b/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase.hpp @@ -57,42 +57,40 @@ namespace MueLu { - /*! - @class AggregationAlgorithmBase - @brief Pure virtual base class for all MueLu aggregation algorithms +/*! + @class AggregationAlgorithmBase + @brief Pure virtual base class for all MueLu aggregation algorithms - @ingroup MueLuBaseClasses - */ - template - class AggregationAlgorithmBase : public BaseClass { + @ingroup MueLuBaseClasses + */ +template +class AggregationAlgorithmBase : public BaseClass { #undef MUELU_AGGREGATIONALGORITHMBASE_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: + public: + //! @name Constructors/Destructors + //@{ - //! @name Constructors/Destructors - //@{ + //! Destructor. + virtual ~AggregationAlgorithmBase() {} - //! Destructor. - virtual ~AggregationAlgorithmBase() {} + //@} - //@} + //! @name Build routines + //@{ - //! @name Build routines - //@{ + //! BuildAggregates routine. + virtual void BuildAggregates(const Teuchos::ParameterList& params, + const GraphBase& graph, + Aggregates& aggregates, + std::vector& aggStat, + LO& numNonAggregatedNodes) const = 0; + //@} +}; - //! BuildAggregates routine. - virtual void BuildAggregates(const Teuchos::ParameterList& params, - const GraphBase& graph, - Aggregates& aggregates, - std::vector& aggStat, - LO& numNonAggregatedNodes) const = 0; - //@} - - }; - -} // namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONALGORITHMBASE_SHORT #endif /* MUELU_AGGREGATIONALGORITHMBASE_HPP_ */ diff --git a/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase_kokkos.hpp b/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase_kokkos.hpp index e758bdf1c84c..b79f6cf8fd9c 100644 --- a/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase_kokkos.hpp +++ b/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase_kokkos.hpp @@ -59,42 +59,42 @@ namespace MueLu { - /*! - @class AggregationAlgorithmBase - @brief Pure virtual base class for all MueLu aggregation algorithms +/*! + @class AggregationAlgorithmBase + @brief Pure virtual base class for all MueLu aggregation algorithms - @ingroup MueLuBaseClasses - */ - template - class AggregationAlgorithmBase_kokkos : public BaseClass { + @ingroup MueLuBaseClasses + */ +template +class AggregationAlgorithmBase_kokkos : public BaseClass { #undef MUELU_AGGREGATIONALGORITHMBASE_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; + public: + using device_type = typename LWGraph_kokkos::device_type; - //! @name Constructors/Destructors - //@{ + //! @name Constructors/Destructors + //@{ - //! Destructor. - virtual ~AggregationAlgorithmBase_kokkos() {} + //! Destructor. + virtual ~AggregationAlgorithmBase_kokkos() {} - //@} + //@} - //! @name Build routines - //@{ + //! @name Build routines + //@{ - //! BuildAggregates routine. - virtual void BuildAggregates(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const = 0; - //@} - }; + //! BuildAggregates routine. + virtual void BuildAggregates(const Teuchos::ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const = 0; + //@} +}; -} // namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONALGORITHMBASE_KOKKOS_SHORT -#endif // MUELU_AGGREGATIONALGORITHMBASE_KOKKOS_HPP +#endif // MUELU_AGGREGATIONALGORITHMBASE_KOKKOS_HPP diff --git a/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_decl.hpp b/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_decl.hpp index ccdf3e3a7782..7252edd8ea03 100644 --- a/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_decl.hpp +++ b/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_decl.hpp @@ -48,11 +48,9 @@ #include "MueLu_ConfigDefs.hpp" - #include #include - #include #include "MueLu_GraphBase_fwd.hpp" @@ -67,15 +65,15 @@ namespace MueLu { -template +template class NotayAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_NOTAYAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" -public: + public: //! @name typedefs //@{ using local_matrix_type = typename Matrix::local_matrix_type; @@ -86,15 +84,14 @@ class NotayAggregationFactory : public SingleLevelFactoryBase { using row_sum_type = typename Kokkos::View; //@} - //! @name Constructors/Destructors. //@{ //! Constructor. - NotayAggregationFactory() { }; + NotayAggregationFactory(){}; //! Destructor. - virtual ~NotayAggregationFactory() { } + virtual ~NotayAggregationFactory() {} RCP GetValidParameterList() const; @@ -108,7 +105,7 @@ class NotayAggregationFactory : public SingleLevelFactoryBase { //! Input //@{ - void DeclareInput(Level ¤tLevel) const; + void DeclareInput(Level& currentLevel) const; //@} @@ -116,7 +113,7 @@ class NotayAggregationFactory : public SingleLevelFactoryBase { //@{ /*! @brief Build aggregates. */ - void Build(Level ¤tLevel) const; + void Build(Level& currentLevel) const; /*! @brief Initial aggregation phase. */ void BuildInitialAggregates(const Teuchos::ParameterList& params, @@ -131,7 +128,7 @@ class NotayAggregationFactory : public SingleLevelFactoryBase { /*! @brief Further aggregation phase increases coarsening rate by a factor of ~2 per iteration. */ void BuildFurtherAggregates(const Teuchos::ParameterList& params, const RCP& A, - const Teuchos::ArrayView & orderingVector, + const Teuchos::ArrayView& orderingVector, const local_matrix_type& coarseA, const magnitude_type kappa, const row_sum_type& rowSum, @@ -160,13 +157,12 @@ class NotayAggregationFactory : public SingleLevelFactoryBase { const std::string matrixLabel, local_matrix_type& C) const; - //@} -private: -}; // class NotayAggregationFactory + private: +}; // class NotayAggregationFactory -} +} // namespace MueLu #define MUELU_NOTAYAGGREGATIONFACTORY_SHORT #endif /* MUELU_NOTAYAGGREGATIONFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp b/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp index b84022432e41..60949d3ad932 100644 --- a/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp @@ -66,880 +66,865 @@ #include "MueLu_Types.hpp" #include "MueLu_Utilities.hpp" - namespace MueLu { - namespace NotayUtils { - template - LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max) { - return min + as((max-min+1) * (static_cast(std::rand()) / (RAND_MAX + 1.0))); - } - - template - void RandomReorder(Teuchos::Array & list) { - typedef LocalOrdinal LO; - LO n = Teuchos::as(list.size()); - for(LO i = 0; i < n-1; i++) - std::swap(list[i], list[RandomOrdinal(i,n-1)]); - } - } - - template - RCP NotayAggregationFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - +namespace NotayUtils { +template +LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max) { + return min + as((max - min + 1) * (static_cast(std::rand()) / (RAND_MAX + 1.0))); +} + +template +void RandomReorder(Teuchos::Array& list) { + typedef LocalOrdinal LO; + LO n = Teuchos::as(list.size()); + for (LO i = 0; i < n - 1; i++) + std::swap(list[i], list[RandomOrdinal(i, n - 1)]); +} +} // namespace NotayUtils + +template +RCP NotayAggregationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: pairwise: size"); - SET_VALID_ENTRY("aggregation: pairwise: tie threshold"); - SET_VALID_ENTRY("aggregation: compute aggregate qualities"); - SET_VALID_ENTRY("aggregation: Dirichlet threshold"); - SET_VALID_ENTRY("aggregation: ordering"); + SET_VALID_ENTRY("aggregation: pairwise: size"); + SET_VALID_ENTRY("aggregation: pairwise: tie threshold"); + SET_VALID_ENTRY("aggregation: compute aggregate qualities"); + SET_VALID_ENTRY("aggregation: Dirichlet threshold"); + SET_VALID_ENTRY("aggregation: ordering"); #undef SET_VALID_ENTRY - // general variables needed in AggregationFactory - validParamList->set< RCP >("A", null, "Generating factory of the matrix"); - validParamList->set< RCP >("Graph", null, "Generating factory of the graph"); - validParamList->set< RCP >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); - validParamList->set< RCP >("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'"); + // general variables needed in AggregationFactory + validParamList->set >("A", null, "Generating factory of the matrix"); + validParamList->set >("Graph", null, "Generating factory of the graph"); + validParamList->set >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); + validParamList->set >("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'"); + return validParamList; +} - return validParamList; - } - - template - void NotayAggregationFactory::DeclareInput(Level& currentLevel) const { - const ParameterList& pL = GetParameterList(); - - Input(currentLevel, "A"); - Input(currentLevel, "Graph"); - Input(currentLevel, "DofsPerNode"); - if (pL.get("aggregation: compute aggregate qualities")) { - Input(currentLevel, "AggregateQualities"); - } - +template +void NotayAggregationFactory::DeclareInput(Level& currentLevel) const { + const ParameterList& pL = GetParameterList(); + Input(currentLevel, "A"); + Input(currentLevel, "Graph"); + Input(currentLevel, "DofsPerNode"); + if (pL.get("aggregation: compute aggregate qualities")) { + Input(currentLevel, "AggregateQualities"); + } +} + +template +void NotayAggregationFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + using STS = Teuchos::ScalarTraits; + using MT = typename STS::magnitudeType; + + const MT MT_TWO = Teuchos::ScalarTraits::one() + Teuchos::ScalarTraits::one(); + + RCP out; + if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); } + const ParameterList& pL = GetParameterList(); + + const MT kappa = static_cast(pL.get("aggregation: Dirichlet threshold")); + TEUCHOS_TEST_FOR_EXCEPTION(kappa <= MT_TWO, + Exceptions::RuntimeError, + "Pairwise requires kappa > 2" + " otherwise all rows are considered as Dirichlet rows."); + + // Parameters + int maxNumIter = 3; + if (pL.isParameter("aggregation: pairwise: size")) + maxNumIter = pL.get("aggregation: pairwise: size"); + TEUCHOS_TEST_FOR_EXCEPTION(maxNumIter < 1, + Exceptions::RuntimeError, + "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\"" + " must be a strictly positive integer"); + + RCP graph = Get >(currentLevel, "Graph"); + RCP A = Get >(currentLevel, "A"); + + // Setup aggregates & aggStat objects + RCP aggregates = rcp(new Aggregates(*graph)); + aggregates->setObjectLabel("PW"); + + const LO numRows = graph->GetNodeNumVertices(); + + // construct aggStat information + std::vector aggStat(numRows, READY); + + const int DofsPerNode = Get(currentLevel, "DofsPerNode"); + TEUCHOS_TEST_FOR_EXCEPTION(DofsPerNode != 1, Exceptions::RuntimeError, + "Pairwise only supports one dof per node"); + + // This follows the paper: + // Notay, "Aggregation-based algebraic multigrid for convection-diffusion equations", + // SISC 34(3), pp. A2288-2316. + + // Handle Ordering + std::string orderingStr = pL.get("aggregation: ordering"); + enum { + O_NATURAL, + O_RANDOM, + O_CUTHILL_MCKEE, + } ordering; + + ordering = O_NATURAL; + if (orderingStr == "random") + ordering = O_RANDOM; + else if (orderingStr == "natural") { + } else if (orderingStr == "cuthill-mckee" || orderingStr == "cm") + ordering = O_CUTHILL_MCKEE; + else { + TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError, "Invalid ordering type"); + } - template - void NotayAggregationFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - using STS = Teuchos::ScalarTraits; - using MT = typename STS::magnitudeType; - - const MT MT_TWO = Teuchos::ScalarTraits::one() + Teuchos::ScalarTraits::one(); - - RCP out; - if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - const ParameterList& pL = GetParameterList(); - - const MT kappa = static_cast(pL.get("aggregation: Dirichlet threshold")); - TEUCHOS_TEST_FOR_EXCEPTION(kappa <= MT_TWO, - Exceptions::RuntimeError, - "Pairwise requires kappa > 2" - " otherwise all rows are considered as Dirichlet rows."); - - // Parameters - int maxNumIter = 3; - if (pL.isParameter("aggregation: pairwise: size")) - maxNumIter = pL.get("aggregation: pairwise: size"); - TEUCHOS_TEST_FOR_EXCEPTION(maxNumIter < 1, - Exceptions::RuntimeError, - "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\"" - " must be a strictly positive integer"); - - - RCP graph = Get< RCP >(currentLevel, "Graph"); - RCP A = Get< RCP >(currentLevel, "A"); - - // Setup aggregates & aggStat objects - RCP aggregates = rcp(new Aggregates(*graph)); - aggregates->setObjectLabel("PW"); - - const LO numRows = graph->GetNodeNumVertices(); - - // construct aggStat information - std::vector aggStat(numRows, READY); - - - const int DofsPerNode = Get(currentLevel,"DofsPerNode"); - TEUCHOS_TEST_FOR_EXCEPTION(DofsPerNode != 1, Exceptions::RuntimeError, - "Pairwise only supports one dof per node"); + // Get an ordering vector + // NOTE: The orderingVector only orders *rows* of the matrix. Off-proc columns + // will get ignored in the aggregation phases, so we don't need to worry about + // running off the end. + Array orderingVector(numRows); + for (LO i = 0; i < numRows; i++) + orderingVector[i] = i; + if (ordering == O_RANDOM) + MueLu::NotayUtils::RandomReorder(orderingVector); + else if (ordering == O_CUTHILL_MCKEE) { + RCP > rcmVector = MueLu::Utilities::CuthillMcKee(*A); + auto localVector = rcmVector->getData(0); + for (LO i = 0; i < numRows; i++) + orderingVector[i] = localVector[i]; + } - // This follows the paper: - // Notay, "Aggregation-based algebraic multigrid for convection-diffusion equations", - // SISC 34(3), pp. A2288-2316. + // Get the party stated + LO numNonAggregatedNodes = numRows, numDirichletNodes = 0; + BuildInitialAggregates(pL, A, orderingVector(), kappa, + *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes); + TEUCHOS_TEST_FOR_EXCEPTION(0 < numNonAggregatedNodes, Exceptions::RuntimeError, + "Initial pairwise aggregation failed to aggregate all nodes"); + LO numLocalAggregates = aggregates->GetNumAggregates(); + GetOStream(Statistics0) << "Init : " << numLocalAggregates << " - " + << A->getLocalNumRows() / numLocalAggregates << std::endl; + + // Temporary data storage for further aggregation steps + local_matrix_type intermediateP; + local_matrix_type coarseLocalA; + + // Compute the on rank part of the local matrix + // that the square submatrix that only contains + // columns corresponding to local rows. + LO numLocalDirichletNodes = numDirichletNodes; + Array localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows)); + BuildOnRankLocalMatrix(A->getLocalMatrixDevice(), coarseLocalA); + for (LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) { + // Compute the intermediate prolongator + BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates, + localVertex2AggId(), intermediateP); + + // Compute the coarse local matrix and coarse row sum + BuildCoarseLocalMatrix(intermediateP, coarseLocalA); + + // Directly compute rowsum from A, rather than coarseA + row_sum_type rowSum("rowSum", numLocalAggregates); + { + std::vector > agg2vertex(numLocalAggregates); + auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0); + for (LO i = 0; i < (LO)numRows; i++) { + if (aggStat[i] != AGGREGATED) + continue; + LO agg = vertex2AggId[i]; + agg2vertex[agg].push_back(i); + } - // Handle Ordering - std::string orderingStr = pL.get("aggregation: ordering"); - enum { - O_NATURAL, - O_RANDOM, - O_CUTHILL_MCKEE, - } ordering; + typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum); + for (LO i = 0; i < numRows; i++) { + // If not aggregated already, skip this guy + if (aggStat[i] != AGGREGATED) + continue; + int agg = vertex2AggId[i]; + std::vector& myagg = agg2vertex[agg]; + + size_t nnz = A->getNumEntriesInLocalRow(i); + ArrayView indices; + ArrayView vals; + A->getLocalRowView(i, indices, vals); + + SC mysum = Teuchos::ScalarTraits::zero(); + for (LO colidx = 0; colidx < static_cast(nnz); colidx++) { + bool found = false; + if (indices[colidx] < numRows) { + for (LO j = 0; j < (LO)myagg.size(); j++) + if (vertex2AggId[indices[colidx]] == agg) + found = true; + } + if (!found) { + *out << "- ADDING col " << indices[colidx] << " = " << vals[colidx] << std::endl; + mysum += vals[colidx]; + } else { + *out << "- NOT ADDING col " << indices[colidx] << " = " << vals[colidx] << std::endl; + } + } - ordering = O_NATURAL; - if (orderingStr == "random" ) ordering = O_RANDOM; - else if(orderingStr == "natural") {} - else if(orderingStr == "cuthill-mckee" || orderingStr == "cm") ordering = O_CUTHILL_MCKEE; - else { - TEUCHOS_TEST_FOR_EXCEPTION(1,Exceptions::RuntimeError,"Invalid ordering type"); + rowSum_h[agg] = mysum; + } + Kokkos::deep_copy(rowSum, rowSum_h); } - // Get an ordering vector - // NOTE: The orderingVector only orders *rows* of the matrix. Off-proc columns - // will get ignored in the aggregation phases, so we don't need to worry about - // running off the end. - Array orderingVector(numRows); + // Get local orderingVector + Array localOrderingVector(numRows); for (LO i = 0; i < numRows; i++) - orderingVector[i] = i; + localOrderingVector[i] = i; if (ordering == O_RANDOM) - MueLu::NotayUtils::RandomReorder(orderingVector); + MueLu::NotayUtils::RandomReorder(localOrderingVector); else if (ordering == O_CUTHILL_MCKEE) { - RCP > rcmVector = MueLu::Utilities::CuthillMcKee(*A); - auto localVector = rcmVector->getData(0); + RCP > rcmVector = MueLu::Utilities::CuthillMcKee(*A); + auto localVector = rcmVector->getData(0); for (LO i = 0; i < numRows; i++) - orderingVector[i] = localVector[i]; + localOrderingVector[i] = localVector[i]; } - // Get the party stated - LO numNonAggregatedNodes = numRows, numDirichletNodes = 0; - BuildInitialAggregates(pL, A, orderingVector(), kappa, - *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes); - TEUCHOS_TEST_FOR_EXCEPTION(0 < numNonAggregatedNodes, Exceptions::RuntimeError, - "Initial pairwise aggregation failed to aggregate all nodes"); - LO numLocalAggregates = aggregates->GetNumAggregates(); - GetOStream(Statistics0) << "Init : " << numLocalAggregates << " - " - << A->getLocalNumRows() / numLocalAggregates << std::endl; - - // Temporary data storage for further aggregation steps - local_matrix_type intermediateP; - local_matrix_type coarseLocalA; - - // Compute the on rank part of the local matrix - // that the square submatrix that only contains - // columns corresponding to local rows. - LO numLocalDirichletNodes = numDirichletNodes; - Array localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows)); - BuildOnRankLocalMatrix(A->getLocalMatrixDevice(), coarseLocalA); - for(LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) { - // Compute the intermediate prolongator - BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates, - localVertex2AggId(), intermediateP); - - // Compute the coarse local matrix and coarse row sum - BuildCoarseLocalMatrix(intermediateP, coarseLocalA); - - // Directly compute rowsum from A, rather than coarseA - row_sum_type rowSum("rowSum", numLocalAggregates); - { - std::vector > agg2vertex(numLocalAggregates); - auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0); - for(LO i=0; i<(LO)numRows; i++) { - if(aggStat[i] != AGGREGATED) - continue; - LO agg=vertex2AggId[i]; - agg2vertex[agg].push_back(i); - } - - typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum); - for(LO i = 0; i < numRows; i++) { - // If not aggregated already, skip this guy - if(aggStat[i] != AGGREGATED) - continue; - int agg = vertex2AggId[i]; - std::vector & myagg = agg2vertex[agg]; - - size_t nnz = A->getNumEntriesInLocalRow(i); - ArrayView indices; - ArrayView vals; - A->getLocalRowView(i, indices, vals); - - SC mysum = Teuchos::ScalarTraits::zero(); - for (LO colidx = 0; colidx < static_cast(nnz); colidx++) { - bool found = false; - if(indices[colidx] < numRows) { - for(LO j=0; j<(LO)myagg.size(); j++) - if (vertex2AggId[indices[colidx]] == agg) - found=true; - } - if(!found) { - *out << "- ADDING col "<getLocalNumRows() / numLocalAggregates << std::endl; } - aggregates->SetNumAggregates(numLocalAggregates); - aggregates->AggregatesCrossProcessors(false); - aggregates->ComputeAggregateSizes(true/*forceRecompute*/); - // DO stuff - Set(currentLevel, "Aggregates", aggregates); - GetOStream(Statistics0) << aggregates->description() << std::endl; + // We could probably print some better statistics at some point + GetOStream(Statistics0) << "Iter " << aggregationIter << ": " << numLocalAggregates << " - " + << A->getLocalNumRows() / numLocalAggregates << std::endl; + } + aggregates->SetNumAggregates(numLocalAggregates); + aggregates->AggregatesCrossProcessors(false); + aggregates->ComputeAggregateSizes(true /*forceRecompute*/); + + // DO stuff + Set(currentLevel, "Aggregates", aggregates); + GetOStream(Statistics0) << aggregates->description() << std::endl; +} + +template +void NotayAggregationFactory:: + BuildInitialAggregates(const Teuchos::ParameterList& params, + const RCP& A, + const Teuchos::ArrayView& orderingVector, + const typename Teuchos::ScalarTraits::magnitudeType kappa, + Aggregates& aggregates, + std::vector& aggStat, + LO& numNonAggregatedNodes, + LO& numDirichletNodes) const { + Monitor m(*this, "BuildInitialAggregates"); + using STS = Teuchos::ScalarTraits; + using MT = typename STS::magnitudeType; + using RealValuedVector = Xpetra::Vector; + + RCP out; + if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); } - - template - void NotayAggregationFactory:: - BuildInitialAggregates(const Teuchos::ParameterList& params, - const RCP& A, - const Teuchos::ArrayView & orderingVector, - const typename Teuchos::ScalarTraits::magnitudeType kappa, - Aggregates& aggregates, - std::vector& aggStat, - LO& numNonAggregatedNodes, - LO& numDirichletNodes) const { - - Monitor m(*this, "BuildInitialAggregates"); - using STS = Teuchos::ScalarTraits; - using MT = typename STS::magnitudeType; - using RealValuedVector = Xpetra::Vector; - - RCP out; - if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - - const SC SC_ZERO = Teuchos::ScalarTraits::zero(); - const MT MT_ZERO = Teuchos::ScalarTraits::zero(); - const MT MT_ONE = Teuchos::ScalarTraits::one(); - const MT MT_TWO = MT_ONE + MT_ONE; - const LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); - const LO LO_ZERO = Teuchos::OrdinalTraits::zero(); - - const MT kappa_init = kappa / (kappa - MT_TWO); - const LO numRows = aggStat.size(); - const int myRank = A->getMap()->getComm()->getRank(); - - // For finding "ties" where we fall back to the ordering. Making this larger than - // hard zero substantially increases code robustness. - double tie_criterion = params.get("aggregation: pairwise: tie threshold"); - double tie_less = 1.0 - tie_criterion; - double tie_more = 1.0 + tie_criterion; - - // NOTE: Assumes 1 dof per node. This constraint is enforced in Build(), - // and so we're not doing again here. - // This should probably be fixed at some point. - - // Extract diagonal, rowsums, etc - // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S - RCP ghostedDiag = MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); - RCP ghostedRowSum = MueLu::Utilities::GetMatrixOverlappedDeletedRowsum(*A); - RCP ghostedAbsRowSum = MueLu::Utilities::GetMatrixOverlappedAbsDeletedRowsum(*A); - const ArrayRCP D = ghostedDiag->getData(0); - const ArrayRCP S = ghostedRowSum->getData(0); - const ArrayRCP AbsRs = ghostedAbsRowSum->getData(0); - - // Aggregates stuff - ArrayRCP vertex2AggId_rcp = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner_rcp = aggregates.GetProcWinner() ->getDataNonConst(0); - ArrayView vertex2AggId = vertex2AggId_rcp(); - ArrayView procWinner = procWinner_rcp(); - - // Algorithm 4.2 - - // 0,1 : Initialize: Flag boundary conditions - // Modification: We assume symmetry here aij = aji - for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { - MT aii = STS::magnitude(D[row]); - MT rowsum = AbsRs[row]; - - if(aii >= kappa_init * rowsum) { - *out << "Flagging index " << row << " as dirichlet " - "aii >= kappa*rowsum = " << aii << " >= " << kappa_init << " " << rowsum << std::endl; - aggStat[row] = IGNORED; - --numNonAggregatedNodes; - ++numDirichletNodes; - } + const SC SC_ZERO = Teuchos::ScalarTraits::zero(); + const MT MT_ZERO = Teuchos::ScalarTraits::zero(); + const MT MT_ONE = Teuchos::ScalarTraits::one(); + const MT MT_TWO = MT_ONE + MT_ONE; + const LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); + const LO LO_ZERO = Teuchos::OrdinalTraits::zero(); + + const MT kappa_init = kappa / (kappa - MT_TWO); + const LO numRows = aggStat.size(); + const int myRank = A->getMap()->getComm()->getRank(); + + // For finding "ties" where we fall back to the ordering. Making this larger than + // hard zero substantially increases code robustness. + double tie_criterion = params.get("aggregation: pairwise: tie threshold"); + double tie_less = 1.0 - tie_criterion; + double tie_more = 1.0 + tie_criterion; + + // NOTE: Assumes 1 dof per node. This constraint is enforced in Build(), + // and so we're not doing again here. + // This should probably be fixed at some point. + + // Extract diagonal, rowsums, etc + // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S + RCP ghostedDiag = MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); + RCP ghostedRowSum = MueLu::Utilities::GetMatrixOverlappedDeletedRowsum(*A); + RCP ghostedAbsRowSum = MueLu::Utilities::GetMatrixOverlappedAbsDeletedRowsum(*A); + const ArrayRCP D = ghostedDiag->getData(0); + const ArrayRCP S = ghostedRowSum->getData(0); + const ArrayRCP AbsRs = ghostedAbsRowSum->getData(0); + + // Aggregates stuff + ArrayRCP vertex2AggId_rcp = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner_rcp = aggregates.GetProcWinner()->getDataNonConst(0); + ArrayView vertex2AggId = vertex2AggId_rcp(); + ArrayView procWinner = procWinner_rcp(); + + // Algorithm 4.2 + + // 0,1 : Initialize: Flag boundary conditions + // Modification: We assume symmetry here aij = aji + for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { + MT aii = STS::magnitude(D[row]); + MT rowsum = AbsRs[row]; + + if (aii >= kappa_init * rowsum) { + *out << "Flagging index " << row << " as dirichlet " + "aii >= kappa*rowsum = " + << aii << " >= " << kappa_init << " " << rowsum << std::endl; + aggStat[row] = IGNORED; + --numNonAggregatedNodes; + ++numDirichletNodes; } + } - - // 2 : Iteration - LO aggIndex = LO_ZERO; - for(LO i = 0; i < numRows; i++) { - LO current_idx = orderingVector[i]; - // If we're aggregated already, skip this guy - if(aggStat[current_idx] != READY) + // 2 : Iteration + LO aggIndex = LO_ZERO; + for (LO i = 0; i < numRows; i++) { + LO current_idx = orderingVector[i]; + // If we're aggregated already, skip this guy + if (aggStat[current_idx] != READY) + continue; + + MT best_mu = MT_ZERO; + LO best_idx = LO_INVALID; + + size_t nnz = A->getNumEntriesInLocalRow(current_idx); + ArrayView indices; + ArrayView vals; + A->getLocalRowView(current_idx, indices, vals); + + MT aii = STS::real(D[current_idx]); + MT si = STS::real(S[current_idx]); + for (LO colidx = 0; colidx < static_cast(nnz); colidx++) { + // Skip aggregated neighbors, off-rank neighbors, hard zeros and self + LO col = indices[colidx]; + SC val = vals[colidx]; + if (current_idx == col || col >= numRows || aggStat[col] != READY || val == SC_ZERO) continue; - MT best_mu = MT_ZERO; - LO best_idx = LO_INVALID; - - size_t nnz = A->getNumEntriesInLocalRow(current_idx); - ArrayView indices; - ArrayView vals; - A->getLocalRowView(current_idx, indices, vals); - - MT aii = STS::real(D[current_idx]); - MT si = STS::real(S[current_idx]); - for (LO colidx = 0; colidx < static_cast(nnz); colidx++) { - // Skip aggregated neighbors, off-rank neighbors, hard zeros and self - LO col = indices[colidx]; - SC val = vals[colidx]; - if(current_idx == col || col >= numRows || aggStat[col] != READY || val == SC_ZERO) - continue; - - MT aij = STS::real(val); - MT ajj = STS::real(D[col]); - MT sj = - STS::real(S[col]); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S - if(aii - si + ajj - sj >= MT_ZERO) { - // Modification: We assume symmetry here aij = aji - MT mu_top = MT_TWO / ( MT_ONE / aii + MT_ONE / ajj); - MT mu_bottom = - aij + MT_ONE / ( MT_ONE / (aii - si) + MT_ONE / (ajj - sj) ); - MT mu = mu_top / mu_bottom; - - // Modification: Explicitly check the tie criterion here - if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less || - (mu < best_mu*tie_more && orderingVector[col] < orderingVector[best_idx]))) { - best_mu = mu; - best_idx = col; - *out << "[" << current_idx << "] Column UPDATED " << col << ": " - << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj - << " = " << aii - si + ajj - sj<< ", aij = "<= MT_ZERO) { + // Modification: We assume symmetry here aij = aji + MT mu_top = MT_TWO / (MT_ONE / aii + MT_ONE / ajj); + MT mu_bottom = -aij + MT_ONE / (MT_ONE / (aii - si) + MT_ONE / (ajj - sj)); + MT mu = mu_top / mu_bottom; + + // Modification: Explicitly check the tie criterion here + if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less || + (mu < best_mu * tie_more && orderingVector[col] < orderingVector[best_idx]))) { + best_mu = mu; + best_idx = col; + *out << "[" << current_idx << "] Column UPDATED " << col << ": " << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj - << " = " << aii - si + ajj - sj << ", aij = "<(vertex2AggId.size()); ++i) { - *out << i << "(" << vertex2AggId[i] << ")"; - } - *out << std::endl; - - // update aggregate object - aggregates.SetNumAggregates(aggIndex); - } // BuildInitialAggregates - - template - void NotayAggregationFactory:: - BuildFurtherAggregates(const Teuchos::ParameterList& params, - const RCP& A, - const Teuchos::ArrayView & orderingVector, - const typename Matrix::local_matrix_type& coarseA, - const typename Teuchos::ScalarTraits::magnitudeType kappa, - const Kokkos::View::val_type*, - Kokkos::LayoutLeft, - typename Matrix::local_matrix_type::device_type>& rowSum, - std::vector& localAggStat, - Teuchos::Array& localVertex2AggID, - LO& numLocalAggregates, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildFurtherAggregates"); - - // Set debug outputs based on environment variable - RCP out; - if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } + aggStat[current_idx] = ONEPT; + vertex2AggId[current_idx] = aggIndex; + procWinner[current_idx] = myRank; + numNonAggregatedNodes--; + aggIndex++; + } // best_mu + } // best_idx + } // end Algorithm 4.2 - using value_type = typename local_matrix_type::value_type; - const value_type KAT_zero = Kokkos::ArithTraits::zero(); - const magnitude_type MT_zero = Teuchos::ScalarTraits::zero(); - const magnitude_type MT_one = Teuchos::ScalarTraits::one(); - const magnitude_type MT_two = MT_one + MT_one; - const LO LO_INVALID = Teuchos::OrdinalTraits::invalid() ; - - // For finding "ties" where we fall back to the ordering. Making this larger than - // hard zero substantially increases code robustness. - double tie_criterion = params.get("aggregation: pairwise: tie threshold"); - double tie_less = 1.0 - tie_criterion; - double tie_more = 1.0 + tie_criterion; - - typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum); - Kokkos::deep_copy(rowSum_h, rowSum); - - // Extracting the diagonal of a KokkosSparse::CrsMatrix - // is not currently provided in kokkos-kernels so here - // is an ugly way to get that done... - const LO numRows = static_cast(coarseA.numRows()); - typename local_matrix_type::values_type::HostMirror diagA_h("diagA host", numRows); - typename local_matrix_type::row_map_type::HostMirror row_map_h - = Kokkos::create_mirror_view(coarseA.graph.row_map); - Kokkos::deep_copy(row_map_h, coarseA.graph.row_map); - typename local_matrix_type::index_type::HostMirror entries_h - = Kokkos::create_mirror_view(coarseA.graph.entries); - Kokkos::deep_copy(entries_h, coarseA.graph.entries); - typename local_matrix_type::values_type::HostMirror values_h - = Kokkos::create_mirror_view(coarseA.values); - Kokkos::deep_copy(values_h, coarseA.values); - for(LO rowIdx = 0; rowIdx < numRows; ++rowIdx) { - for(LO entryIdx = static_cast(row_map_h(rowIdx)); - entryIdx < static_cast(row_map_h(rowIdx + 1)); - ++entryIdx) { - if(rowIdx == static_cast(entries_h(entryIdx))) { - diagA_h(rowIdx) = values_h(entryIdx); - } + *out << "vertex2aggid :"; + for (int i = 0; i < static_cast(vertex2AggId.size()); ++i) { + *out << i << "(" << vertex2AggId[i] << ")"; + } + *out << std::endl; + + // update aggregate object + aggregates.SetNumAggregates(aggIndex); +} // BuildInitialAggregates + +template +void NotayAggregationFactory:: + BuildFurtherAggregates(const Teuchos::ParameterList& params, + const RCP& A, + const Teuchos::ArrayView& orderingVector, + const typename Matrix::local_matrix_type& coarseA, + const typename Teuchos::ScalarTraits::magnitudeType kappa, + const Kokkos::View::val_type*, + Kokkos::LayoutLeft, + typename Matrix::local_matrix_type::device_type>& rowSum, + std::vector& localAggStat, + Teuchos::Array& localVertex2AggID, + LO& numLocalAggregates, + LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildFurtherAggregates"); + + // Set debug outputs based on environment variable + RCP out; + if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + using value_type = typename local_matrix_type::value_type; + const value_type KAT_zero = Kokkos::ArithTraits::zero(); + const magnitude_type MT_zero = Teuchos::ScalarTraits::zero(); + const magnitude_type MT_one = Teuchos::ScalarTraits::one(); + const magnitude_type MT_two = MT_one + MT_one; + const LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); + + // For finding "ties" where we fall back to the ordering. Making this larger than + // hard zero substantially increases code robustness. + double tie_criterion = params.get("aggregation: pairwise: tie threshold"); + double tie_less = 1.0 - tie_criterion; + double tie_more = 1.0 + tie_criterion; + + typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum); + Kokkos::deep_copy(rowSum_h, rowSum); + + // Extracting the diagonal of a KokkosSparse::CrsMatrix + // is not currently provided in kokkos-kernels so here + // is an ugly way to get that done... + const LO numRows = static_cast(coarseA.numRows()); + typename local_matrix_type::values_type::HostMirror diagA_h("diagA host", numRows); + typename local_matrix_type::row_map_type::HostMirror row_map_h = Kokkos::create_mirror_view(coarseA.graph.row_map); + Kokkos::deep_copy(row_map_h, coarseA.graph.row_map); + typename local_matrix_type::index_type::HostMirror entries_h = Kokkos::create_mirror_view(coarseA.graph.entries); + Kokkos::deep_copy(entries_h, coarseA.graph.entries); + typename local_matrix_type::values_type::HostMirror values_h = Kokkos::create_mirror_view(coarseA.values); + Kokkos::deep_copy(values_h, coarseA.values); + for (LO rowIdx = 0; rowIdx < numRows; ++rowIdx) { + for (LO entryIdx = static_cast(row_map_h(rowIdx)); + entryIdx < static_cast(row_map_h(rowIdx + 1)); + ++entryIdx) { + if (rowIdx == static_cast(entries_h(entryIdx))) { + diagA_h(rowIdx) = values_h(entryIdx); } } + } + + for (LO currentIdx = 0; currentIdx < numRows; ++currentIdx) { + if (localAggStat[currentIdx] != READY) { + continue; + } - for(LO currentIdx = 0; currentIdx < numRows; ++currentIdx) { - if(localAggStat[currentIdx] != READY) { + LO bestIdx = Teuchos::OrdinalTraits::invalid(); + magnitude_type best_mu = Teuchos::ScalarTraits::zero(); + const magnitude_type aii = Teuchos::ScalarTraits::real(diagA_h(currentIdx)); + const magnitude_type si = Teuchos::ScalarTraits::real(rowSum_h(currentIdx)); + for (auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) { + const LO colIdx = static_cast(entries_h(entryIdx)); + if (currentIdx == colIdx || colIdx >= numRows || localAggStat[colIdx] != READY || values_h(entryIdx) == KAT_zero) { continue; } - LO bestIdx = Teuchos::OrdinalTraits::invalid(); - magnitude_type best_mu = Teuchos::ScalarTraits::zero(); - const magnitude_type aii = Teuchos::ScalarTraits::real(diagA_h(currentIdx)); - const magnitude_type si = Teuchos::ScalarTraits::real(rowSum_h(currentIdx)); - for(auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) { - const LO colIdx = static_cast(entries_h(entryIdx)); - if(currentIdx == colIdx || colIdx >= numRows || localAggStat[colIdx] != READY || values_h(entryIdx) == KAT_zero) { - continue; - } - - const magnitude_type aij = Teuchos::ScalarTraits::real(values_h(entryIdx)); - const magnitude_type ajj = Teuchos::ScalarTraits::real(diagA_h(colIdx)); - const magnitude_type sj = - Teuchos::ScalarTraits::real(rowSum_h(colIdx)); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S - if(aii - si + ajj - sj >= MT_zero) { - const magnitude_type mu_top = MT_two / ( MT_one/aii + MT_one/ajj ); - const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj)); - const magnitude_type mu = mu_top / mu_bottom; - - // Modification: Explicitly check the tie criterion here - if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less || - (mu < best_mu*tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) { - best_mu = mu; - bestIdx = colIdx; - *out << "[" << currentIdx << "] Column UPDATED " << colIdx << ": " - << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj - << " = " << aii - si + ajj - sj << ", aij = "< - void NotayAggregationFactory:: - BuildOnRankLocalMatrix(const typename Matrix::local_matrix_type& localA, - typename Matrix::local_matrix_type& onrankA) const { - Monitor m(*this, "BuildOnRankLocalMatrix"); - - // Set debug outputs based on environment variable - RCP out; - if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); } + } // end loop over matrix rows + +} // BuildFurtherAggregates + +template +void NotayAggregationFactory:: + BuildOnRankLocalMatrix(const typename Matrix::local_matrix_type& localA, + typename Matrix::local_matrix_type& onrankA) const { + Monitor m(*this, "BuildOnRankLocalMatrix"); + + // Set debug outputs based on environment variable + RCP out; + if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } - using local_graph_type = typename local_matrix_type::staticcrsgraph_type; - using values_type = typename local_matrix_type::values_type; - using size_type = typename local_graph_type::size_type; - using col_index_type = typename local_graph_type::data_type; - using array_layout = typename local_graph_type::array_layout; - using memory_traits = typename local_graph_type::memory_traits; - using row_pointer_type = Kokkos::View; - using col_indices_type = Kokkos::View; - // Extract on rank part of A - // Simply check that the column index is less than the number of local rows - // otherwise remove it. - - const int numRows = static_cast(localA.numRows()); - row_pointer_type rowPtr("onrankA row pointer", numRows + 1); - typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr); - typename local_graph_type::row_map_type::HostMirror origRowPtr_h - = Kokkos::create_mirror_view(localA.graph.row_map); - typename local_graph_type::entries_type::HostMirror origColind_h - = Kokkos::create_mirror_view(localA.graph.entries); - typename values_type::HostMirror origValues_h - = Kokkos::create_mirror_view(localA.values); - Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map); - Kokkos::deep_copy(origColind_h, localA.graph.entries); - Kokkos::deep_copy(origValues_h, localA.values); - - // Compute the number of nnz entries per row - rowPtr_h(0) = 0; - for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) { - for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) { - if(origColind_h(entryIdx) < numRows) {rowPtr_h(rowIdx + 1) += 1;} + using local_graph_type = typename local_matrix_type::staticcrsgraph_type; + using values_type = typename local_matrix_type::values_type; + using size_type = typename local_graph_type::size_type; + using col_index_type = typename local_graph_type::data_type; + using array_layout = typename local_graph_type::array_layout; + using memory_traits = typename local_graph_type::memory_traits; + using row_pointer_type = Kokkos::View; + using col_indices_type = Kokkos::View; + // Extract on rank part of A + // Simply check that the column index is less than the number of local rows + // otherwise remove it. + + const int numRows = static_cast(localA.numRows()); + row_pointer_type rowPtr("onrankA row pointer", numRows + 1); + typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr); + typename local_graph_type::row_map_type::HostMirror origRowPtr_h = Kokkos::create_mirror_view(localA.graph.row_map); + typename local_graph_type::entries_type::HostMirror origColind_h = Kokkos::create_mirror_view(localA.graph.entries); + typename values_type::HostMirror origValues_h = Kokkos::create_mirror_view(localA.values); + Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map); + Kokkos::deep_copy(origColind_h, localA.graph.entries); + Kokkos::deep_copy(origValues_h, localA.values); + + // Compute the number of nnz entries per row + rowPtr_h(0) = 0; + for (int rowIdx = 0; rowIdx < numRows; ++rowIdx) { + for (size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) { + if (origColind_h(entryIdx) < numRows) { + rowPtr_h(rowIdx + 1) += 1; } - rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx); } - Kokkos::deep_copy(rowPtr, rowPtr_h); - - const LO nnzOnrankA = rowPtr_h(numRows); - - // Now use nnz per row to allocate matrix views and store column indices and values - col_indices_type colInd("onrankA column indices", rowPtr_h(numRows)); - values_type values("onrankA values", rowPtr_h(numRows)); - typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd); - typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values); - int entriesInRow; - for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) { - entriesInRow = 0; - for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) { - if(origColind_h(entryIdx) < numRows) { - colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx); - values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx); - ++entriesInRow; - } + rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx); + } + Kokkos::deep_copy(rowPtr, rowPtr_h); + + const LO nnzOnrankA = rowPtr_h(numRows); + + // Now use nnz per row to allocate matrix views and store column indices and values + col_indices_type colInd("onrankA column indices", rowPtr_h(numRows)); + values_type values("onrankA values", rowPtr_h(numRows)); + typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd); + typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values); + int entriesInRow; + for (int rowIdx = 0; rowIdx < numRows; ++rowIdx) { + entriesInRow = 0; + for (size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) { + if (origColind_h(entryIdx) < numRows) { + colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx); + values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx); + ++entriesInRow; } } - Kokkos::deep_copy(colInd, colInd_h); - Kokkos::deep_copy(values, values_h); - - onrankA = local_matrix_type("onrankA", numRows, numRows, - nnzOnrankA, values, rowPtr, colInd); + } + Kokkos::deep_copy(colInd, colInd_h); + Kokkos::deep_copy(values, values_h); + + onrankA = local_matrix_type("onrankA", numRows, numRows, + nnzOnrankA, values, rowPtr, colInd); +} + +template +void NotayAggregationFactory:: + BuildIntermediateProlongator(const LocalOrdinal numRows, + const LocalOrdinal numDirichletNodes, + const LocalOrdinal numLocalAggregates, + const Teuchos::ArrayView& localVertex2AggID, + typename Matrix::local_matrix_type& intermediateP) const { + Monitor m(*this, "BuildIntermediateProlongator"); + + // Set debug outputs based on environment variable + RCP out; + if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); } - template - void NotayAggregationFactory:: - BuildIntermediateProlongator(const LocalOrdinal numRows, - const LocalOrdinal numDirichletNodes, - const LocalOrdinal numLocalAggregates, - const Teuchos::ArrayView& localVertex2AggID, - typename Matrix::local_matrix_type& intermediateP) const { - Monitor m(*this, "BuildIntermediateProlongator"); - - // Set debug outputs based on environment variable - RCP out; - if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); + using local_graph_type = typename local_matrix_type::staticcrsgraph_type; + using values_type = typename local_matrix_type::values_type; + using size_type = typename local_graph_type::size_type; + using col_index_type = typename local_graph_type::data_type; + using array_layout = typename local_graph_type::array_layout; + using memory_traits = typename local_graph_type::memory_traits; + using row_pointer_type = Kokkos::View; + using col_indices_type = Kokkos::View; + + const LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); + + const int intermediatePnnz = numRows - numDirichletNodes; + row_pointer_type rowPtr("intermediateP row pointer", numRows + 1); + col_indices_type colInd("intermediateP column indices", intermediatePnnz); + values_type values("intermediateP values", intermediatePnnz); + typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr); + typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd); + + rowPtr_h(0) = 0; + for (int rowIdx = 0; rowIdx < numRows; ++rowIdx) { + // Skip Dirichlet nodes + if (localVertex2AggID[rowIdx] == LO_INVALID) { + rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx); } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - using local_graph_type = typename local_matrix_type::staticcrsgraph_type; - using values_type = typename local_matrix_type::values_type; - using size_type = typename local_graph_type::size_type; - using col_index_type = typename local_graph_type::data_type; - using array_layout = typename local_graph_type::array_layout; - using memory_traits = typename local_graph_type::memory_traits; - using row_pointer_type = Kokkos::View; - using col_indices_type = Kokkos::View; - - const LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); - - const int intermediatePnnz = numRows - numDirichletNodes; - row_pointer_type rowPtr("intermediateP row pointer", numRows + 1); - col_indices_type colInd("intermediateP column indices", intermediatePnnz); - values_type values("intermediateP values", intermediatePnnz); - typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr); - typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd); - - rowPtr_h(0) = 0; - for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) { - // Skip Dirichlet nodes - if(localVertex2AggID[rowIdx] == LO_INVALID) { - rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx); - } else { - rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1; - colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx]; - } + rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1; + colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx]; } + } - Kokkos::deep_copy(rowPtr, rowPtr_h); - Kokkos::deep_copy(colInd, colInd_h); - Kokkos::deep_copy(values, Kokkos::ArithTraits::one()); - - intermediateP = local_matrix_type("intermediateP", - numRows, numLocalAggregates, intermediatePnnz, - values, rowPtr, colInd); - } // BuildIntermediateProlongator - - template - void NotayAggregationFactory:: - BuildCoarseLocalMatrix(const typename Matrix::local_matrix_type& intermediateP, - typename Matrix::local_matrix_type& coarseA) const { - Monitor m(*this, "BuildCoarseLocalMatrix"); - - using local_graph_type = typename local_matrix_type::staticcrsgraph_type; - using values_type = typename local_matrix_type::values_type; - using size_type = typename local_graph_type::size_type; - using col_index_type = typename local_graph_type::data_type; - using array_layout = typename local_graph_type::array_layout; - using memory_traits = typename local_graph_type::memory_traits; - using row_pointer_type = Kokkos::View; - using col_indices_type = Kokkos::View; - - local_matrix_type AP; - localSpGEMM(coarseA, intermediateP, "AP", AP); - - // Note 03/11/20, lbv: does kh need to destroy and recreate the spgemm handle - // I am not sure but doing it for safety in case it stashes data from the previous - // spgemm computation... - - // Compute Ac = Pt * AP - // Two steps needed: - // 1. compute Pt - // 2. perform multiplication - - // Step 1 compute Pt - // Obviously this requires the same amount of storage as P except for the rowPtr - row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing("Pt row pointer"), - intermediateP.numCols() + 1); - col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing("Pt column indices"), - intermediateP.nnz()); - values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing("Pt values"), - intermediateP.nnz()); - - typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt); - typename col_indices_type::HostMirror entries_h = Kokkos::create_mirror_view(intermediateP.graph.entries); - Kokkos::deep_copy(entries_h, intermediateP.graph.entries); - Kokkos::deep_copy(rowPtrPt_h, 0); - for(size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) { - rowPtrPt_h(entries_h(entryIdx) + 1) += 1; - } - for(LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) { - rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx); - } - Kokkos::deep_copy(rowPtrPt, rowPtrPt_h); - - typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map); - Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map); - typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries); - Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries); - typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values); - Kokkos::deep_copy(valuesP_h, intermediateP.values); - typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt); - typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt); - const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits::invalid(); - Kokkos::deep_copy(colIndPt_h, invalidColumnIndex); - - col_index_type colIdx = 0; - for(LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) { - for(size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) { - colIdx = entries_h(entryIdxP); - for(size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) { - if(colIndPt_h(entryIdxPt) == invalidColumnIndex) { - colIndPt_h(entryIdxPt) = rowIdx; - valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP); - break; - } - } // Loop over entries in row of Pt - } // Loop over entries in row of P - } // Loop over rows of P - - Kokkos::deep_copy(colIndPt, colIndPt_h); - Kokkos::deep_copy(valuesPt, valuesPt_h); - - - local_matrix_type intermediatePt("intermediatePt", - intermediateP.numCols(), - intermediateP.numRows(), - intermediateP.nnz(), - valuesPt, rowPtrPt, colIndPt); - - // Create views for coarseA matrix - localSpGEMM(intermediatePt, AP, "coarseA", coarseA); - } // BuildCoarseLocalMatrix - - template - void NotayAggregationFactory:: - localSpGEMM(const typename Matrix::local_matrix_type& A, - const typename Matrix::local_matrix_type& B, - const std::string matrixLabel, - typename Matrix::local_matrix_type& C) const { - - using local_graph_type = typename local_matrix_type::staticcrsgraph_type; - using values_type = typename local_matrix_type::values_type; - using size_type = typename local_graph_type::size_type; - using col_index_type = typename local_graph_type::data_type; - using array_layout = typename local_graph_type::array_layout; - using memory_space = typename device_type::memory_space; - using memory_traits = typename local_graph_type::memory_traits; - using row_pointer_type = Kokkos::View; - using col_indices_type = Kokkos::View; - - // Options - int team_work_size = 16; - std::string myalg("SPGEMM_KK_MEMORY"); - KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg); - KokkosKernels::Experimental::KokkosKernelsHandle kh; - kh.create_spgemm_handle(alg_enum); - kh.set_team_work_size(team_work_size); - - // Create views for AP matrix - row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing("C row pointer"), - A.numRows() + 1); - col_indices_type colIndC; - values_type valuesC; - - // Symbolic multiplication - KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(), - B.numRows(), B.numCols(), - A.graph.row_map, A.graph.entries, false, - B.graph.row_map, B.graph.entries, false, - rowPtrC); - - // allocate column indices and values of AP - size_t nnzC = kh.get_spgemm_handle()->get_c_nnz(); - if (nnzC) { - colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing("C column inds"), nnzC); - valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing("C values"), nnzC); - } + Kokkos::deep_copy(rowPtr, rowPtr_h); + Kokkos::deep_copy(colInd, colInd_h); + Kokkos::deep_copy(values, Kokkos::ArithTraits::one()); + + intermediateP = local_matrix_type("intermediateP", + numRows, numLocalAggregates, intermediatePnnz, + values, rowPtr, colInd); +} // BuildIntermediateProlongator + +template +void NotayAggregationFactory:: + BuildCoarseLocalMatrix(const typename Matrix::local_matrix_type& intermediateP, + typename Matrix::local_matrix_type& coarseA) const { + Monitor m(*this, "BuildCoarseLocalMatrix"); + + using local_graph_type = typename local_matrix_type::staticcrsgraph_type; + using values_type = typename local_matrix_type::values_type; + using size_type = typename local_graph_type::size_type; + using col_index_type = typename local_graph_type::data_type; + using array_layout = typename local_graph_type::array_layout; + using memory_traits = typename local_graph_type::memory_traits; + using row_pointer_type = Kokkos::View; + using col_indices_type = Kokkos::View; + + local_matrix_type AP; + localSpGEMM(coarseA, intermediateP, "AP", AP); + + // Note 03/11/20, lbv: does kh need to destroy and recreate the spgemm handle + // I am not sure but doing it for safety in case it stashes data from the previous + // spgemm computation... + + // Compute Ac = Pt * AP + // Two steps needed: + // 1. compute Pt + // 2. perform multiplication + + // Step 1 compute Pt + // Obviously this requires the same amount of storage as P except for the rowPtr + row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing("Pt row pointer"), + intermediateP.numCols() + 1); + col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing("Pt column indices"), + intermediateP.nnz()); + values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing("Pt values"), + intermediateP.nnz()); + + typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt); + typename col_indices_type::HostMirror entries_h = Kokkos::create_mirror_view(intermediateP.graph.entries); + Kokkos::deep_copy(entries_h, intermediateP.graph.entries); + Kokkos::deep_copy(rowPtrPt_h, 0); + for (size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) { + rowPtrPt_h(entries_h(entryIdx) + 1) += 1; + } + for (LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) { + rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx); + } + Kokkos::deep_copy(rowPtrPt, rowPtrPt_h); + + typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map); + Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map); + typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries); + Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries); + typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values); + Kokkos::deep_copy(valuesP_h, intermediateP.values); + typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt); + typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt); + const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits::invalid(); + Kokkos::deep_copy(colIndPt_h, invalidColumnIndex); + + col_index_type colIdx = 0; + for (LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) { + for (size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) { + colIdx = entries_h(entryIdxP); + for (size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) { + if (colIndPt_h(entryIdxPt) == invalidColumnIndex) { + colIndPt_h(entryIdxPt) = rowIdx; + valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP); + break; + } + } // Loop over entries in row of Pt + } // Loop over entries in row of P + } // Loop over rows of P + + Kokkos::deep_copy(colIndPt, colIndPt_h); + Kokkos::deep_copy(valuesPt, valuesPt_h); + + local_matrix_type intermediatePt("intermediatePt", + intermediateP.numCols(), + intermediateP.numRows(), + intermediateP.nnz(), + valuesPt, rowPtrPt, colIndPt); + + // Create views for coarseA matrix + localSpGEMM(intermediatePt, AP, "coarseA", coarseA); +} // BuildCoarseLocalMatrix + +template +void NotayAggregationFactory:: + localSpGEMM(const typename Matrix::local_matrix_type& A, + const typename Matrix::local_matrix_type& B, + const std::string matrixLabel, + typename Matrix::local_matrix_type& C) const { + using local_graph_type = typename local_matrix_type::staticcrsgraph_type; + using values_type = typename local_matrix_type::values_type; + using size_type = typename local_graph_type::size_type; + using col_index_type = typename local_graph_type::data_type; + using array_layout = typename local_graph_type::array_layout; + using memory_space = typename device_type::memory_space; + using memory_traits = typename local_graph_type::memory_traits; + using row_pointer_type = Kokkos::View; + using col_indices_type = Kokkos::View; + + // Options + int team_work_size = 16; + std::string myalg("SPGEMM_KK_MEMORY"); + KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg); + KokkosKernels::Experimental::KokkosKernelsHandle + kh; + kh.create_spgemm_handle(alg_enum); + kh.set_team_work_size(team_work_size); + + // Create views for AP matrix + row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing("C row pointer"), + A.numRows() + 1); + col_indices_type colIndC; + values_type valuesC; + + // Symbolic multiplication + KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(), + B.numRows(), B.numCols(), + A.graph.row_map, A.graph.entries, false, + B.graph.row_map, B.graph.entries, false, + rowPtrC); + + // allocate column indices and values of AP + size_t nnzC = kh.get_spgemm_handle()->get_c_nnz(); + if (nnzC) { + colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing("C column inds"), nnzC); + valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing("C values"), nnzC); + } - // Numeric multiplication - KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(), - B.numRows(), B.numCols(), - A.graph.row_map, A.graph.entries, A.values, false, - B.graph.row_map, B.graph.entries, B.values, false, - rowPtrC, colIndC, valuesC); - kh.destroy_spgemm_handle(); + // Numeric multiplication + KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(), + B.numRows(), B.numCols(), + A.graph.row_map, A.graph.entries, A.values, false, + B.graph.row_map, B.graph.entries, B.values, false, + rowPtrC, colIndC, valuesC); + kh.destroy_spgemm_handle(); - C = local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC); + C = local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC); - } // localSpGEMM +} // localSpGEMM -} //namespace MueLu +} // namespace MueLu #endif /* MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_decl.hpp index 2b89f58299a9..2cd379401f54 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_decl.hpp @@ -56,78 +56,74 @@ #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! - @class AggregationStructuredAlgorithm class. - @brief Algorithm for coarsening a graph with structured aggregation. - - @ingroup Aggregation - - ### Idea ### - Use the logical indexing of the mesh to obtain a very regular aggregation pattern and maintain - lines and planes of the problem as they might be useful to the smoother. - This algorithms is also very easy to parallelize on node due to its very regular and predictible - memory access patern. - - ### Parameters ### - Parameter | Meaning - ----------|-------- - aggregation: coarsen | describe the coarsening rate to be used in each direction - */ - - template - class AggregationStructuredAlgorithm : - public MueLu::AggregationAlgorithmBase { +/*! + @class AggregationStructuredAlgorithm class. + @brief Algorithm for coarsening a graph with structured aggregation. + + @ingroup Aggregation + + ### Idea ### + Use the logical indexing of the mesh to obtain a very regular aggregation pattern and maintain + lines and planes of the problem as they might be useful to the smoother. + This algorithms is also very easy to parallelize on node due to its very regular and predictible + memory access patern. + + ### Parameters ### + Parameter | Meaning + ----------|-------- + aggregation: coarsen | describe the coarsening rate to be used in each direction +*/ + +template +class AggregationStructuredAlgorithm : public MueLu::AggregationAlgorithmBase { #undef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationStructuredAlgorithm(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationStructuredAlgorithm(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationStructuredAlgorithm() { } + //! Destructor. + virtual ~AggregationStructuredAlgorithm() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const Teuchos::ParameterList& params, const GraphBase& graph, + Aggregates& aggregates, std::vector& aggStat, + LO& numNonAggregatedNodes) const; - void BuildAggregates(const Teuchos::ParameterList& params, const GraphBase& graph, - Aggregates& aggregates, std::vector& aggStat, - LO& numNonAggregatedNodes) const; + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildGraph(const GraphBase& graph, RCP& geoData, const LO dofsPerNode, + RCP& myGraph, RCP& coarseCoordinatesFineMap, + RCP& coarseCoordinatesMap) const; + //@} - void BuildGraph(const GraphBase& graph, RCP& geoData, const LO dofsPerNode, - RCP& myGraph, RCP& coarseCoordinatesFineMap, - RCP& coarseCoordinatesMap) const; - //@} + std::string description() const { return "Aggretation: structured algorithm"; } - std::string description() const { return "Aggretation: structured algorithm"; } - - private: - - void ComputeGraphDataConstant(const GraphBase& graph, RCP& geoData, - const LO dofsPerNode, const int numInterpolationPoints, - ArrayRCP& nnzOnRow, Array& rowPtr, - Array& colIndex) const; - - void ComputeGraphDataLinear(const GraphBase& graph, RCP& geoData, + private: + void ComputeGraphDataConstant(const GraphBase& graph, RCP& geoData, const LO dofsPerNode, const int numInterpolationPoints, ArrayRCP& nnzOnRow, Array& rowPtr, Array& colIndex) const; - }; + void ComputeGraphDataLinear(const GraphBase& graph, RCP& geoData, + const LO dofsPerNode, const int numInterpolationPoints, + ArrayRCP& nnzOnRow, Array& rowPtr, + Array& colIndex) const; +}; -} //namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_SHORT #endif /* MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_def.hpp index 325b932fb453..96bbd070c19a 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_def.hpp @@ -46,7 +46,6 @@ #ifndef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DEF_HPP_ #define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DEF_HPP_ - #include #include @@ -65,370 +64,361 @@ namespace MueLu { - template - void AggregationStructuredAlgorithm:: - BuildAggregates(const Teuchos::ParameterList& /* params */, const GraphBase& graph, - Aggregates& aggregates, std::vector& aggStat, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); +template +void AggregationStructuredAlgorithm:: + BuildAggregates(const Teuchos::ParameterList& /* params */, const GraphBase& graph, + Aggregates& aggregates, std::vector& aggStat, + LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + + RCP out; + if (const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + RCP geoData = aggregates.GetIndexManager(); + const bool coupled = geoData->isAggregationCoupled(); + const bool singleCoarsePoint = geoData->isSingleCoarsePoint(); + ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); + Array ghostedCoarseNodeCoarseLIDs; + Array ghostedCoarseNodeCoarsePIDs; + Array ghostedCoarseNodeCoarseGIDs; + + *out << "Extract data for ghosted nodes" << std::endl; + geoData->getGhostedNodesData(graph.GetDomainMap(), ghostedCoarseNodeCoarseLIDs, + ghostedCoarseNodeCoarsePIDs, ghostedCoarseNodeCoarseGIDs); + + LO rem, rate; + Array ghostedIdx(3), coarseIdx(3); + LO ghostedCoarseNodeCoarseLID, aggId; + *out << "Loop over fine nodes and assign them to an aggregate and a rank" << std::endl; + for (LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { + // Compute coarse ID associated with fine LID + geoData->getFineNodeGhostedTuple(nodeIdx, ghostedIdx[0], ghostedIdx[1], ghostedIdx[2]); + + for (int dim = 0; dim < 3; ++dim) { + if (singleCoarsePoint && (geoData->getLocalFineNodesInDir(dim) - 1 < geoData->getCoarseningRate(dim))) { + coarseIdx[dim] = 0; + } else { + coarseIdx[dim] = ghostedIdx[dim] / geoData->getCoarseningRate(dim); + rem = ghostedIdx[dim] % geoData->getCoarseningRate(dim); + if (ghostedIdx[dim] - geoData->getOffset(dim) < geoData->getLocalFineNodesInDir(dim) - geoData->getCoarseningEndRate(dim)) { + rate = geoData->getCoarseningRate(dim); + } else { + rate = geoData->getCoarseningEndRate(dim); + } + if (rem > (rate / 2)) { + ++coarseIdx[dim]; + } + if (coupled && (geoData->getStartGhostedCoarseNode(dim) * geoData->getCoarseningRate(dim) > geoData->getStartIndex(dim))) { + --coarseIdx[dim]; + } + } } - RCP geoData = aggregates.GetIndexManager(); - const bool coupled = geoData->isAggregationCoupled(); - const bool singleCoarsePoint = geoData->isSingleCoarsePoint(); - ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); - Array ghostedCoarseNodeCoarseLIDs; - Array ghostedCoarseNodeCoarsePIDs; - Array ghostedCoarseNodeCoarseGIDs; - + geoData->getCoarseNodeGhostedLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], + ghostedCoarseNodeCoarseLID); + + aggId = ghostedCoarseNodeCoarseLIDs[ghostedCoarseNodeCoarseLID]; + vertex2AggId[nodeIdx] = aggId; + procWinner[nodeIdx] = ghostedCoarseNodeCoarsePIDs[ghostedCoarseNodeCoarseLID]; + aggStat[nodeIdx] = AGGREGATED; + --numNonAggregatedNodes; + + } // Loop over fine points +} // BuildAggregates() + +template +void AggregationStructuredAlgorithm:: + BuildGraph(const GraphBase& graph, RCP& geoData, const LO dofsPerNode, + RCP& myGraph, RCP& coarseCoordinatesFineMap, + RCP& coarseCoordinatesMap) const { + Monitor m(*this, "BuildGraphP"); + + RCP out; + if (const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + const bool coupled = geoData->isAggregationCoupled(); + + // Compute the number of coarse points needed to interpolate quantities to a fine point + int numInterpolationPoints = 0; + if (geoData->getInterpolationOrder() == 0) { + numInterpolationPoints = 1; + } else if (geoData->getInterpolationOrder() == 1) { + // Compute 2^numDimensions using bit logic to avoid round-off errors + numInterpolationPoints = 1 << geoData->getNumDimensions(); + } + *out << "numInterpolationPoints=" << numInterpolationPoints << std::endl; + + Array colIndex((geoData->getNumLocalCoarseNodes() + numInterpolationPoints * + (geoData->getNumLocalFineNodes() - geoData->getNumLocalCoarseNodes())) * + dofsPerNode); + Array rowPtr(geoData->getNumLocalFineNodes() * dofsPerNode + 1); + rowPtr[0] = 0; + ArrayRCP nnzOnRow(geoData->getNumLocalFineNodes() * dofsPerNode); + + *out << "Compute prolongatorGraph data" << std::endl; + if (geoData->getInterpolationOrder() == 0) { + ComputeGraphDataConstant(graph, geoData, dofsPerNode, numInterpolationPoints, + nnzOnRow, rowPtr, colIndex); + } else if (geoData->getInterpolationOrder() == 1) { + ComputeGraphDataLinear(graph, geoData, dofsPerNode, numInterpolationPoints, + nnzOnRow, rowPtr, colIndex); + } + + // Compute graph's rowMap, colMap and domainMap + RCP rowMap = MapFactory::Build(graph.GetDomainMap(), dofsPerNode); + RCP colMap, domainMap; + *out << "Compute domain and column maps of the CrsGraph" << std::endl; + if (coupled) { *out << "Extract data for ghosted nodes" << std::endl; + Array ghostedCoarseNodeCoarseLIDs; + Array ghostedCoarseNodeCoarsePIDs; + Array ghostedCoarseNodeCoarseGIDs; geoData->getGhostedNodesData(graph.GetDomainMap(), ghostedCoarseNodeCoarseLIDs, ghostedCoarseNodeCoarsePIDs, ghostedCoarseNodeCoarseGIDs); - LO rem, rate; - Array ghostedIdx(3), coarseIdx(3); - LO ghostedCoarseNodeCoarseLID, aggId; - *out << "Loop over fine nodes and assign them to an aggregate and a rank" << std::endl; - for(LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { - // Compute coarse ID associated with fine LID - geoData->getFineNodeGhostedTuple(nodeIdx, ghostedIdx[0], ghostedIdx[1], ghostedIdx[2]); - - for(int dim = 0; dim < 3; ++dim) { - if(singleCoarsePoint - && (geoData->getLocalFineNodesInDir(dim) - 1 < geoData->getCoarseningRate(dim))) { - coarseIdx[dim] = 0; - } else { - coarseIdx[dim] = ghostedIdx[dim] / geoData->getCoarseningRate(dim); - rem = ghostedIdx[dim] % geoData->getCoarseningRate(dim); - if(ghostedIdx[dim] - geoData->getOffset(dim) - < geoData->getLocalFineNodesInDir(dim) - geoData->getCoarseningEndRate(dim)) { - rate = geoData->getCoarseningRate(dim); - } else { - rate = geoData->getCoarseningEndRate(dim); - } - if(rem > (rate / 2)) {++coarseIdx[dim];} - if(coupled && (geoData->getStartGhostedCoarseNode(dim)*geoData->getCoarseningRate(dim) - > geoData->getStartIndex(dim))) {--coarseIdx[dim];} - } + // In this case we specify the global number of nodes on the coarse mesh + // as well as the GIDs needed on rank. + colMap = MapFactory::Build(graph.GetDomainMap()->lib(), + geoData->getNumGlobalCoarseNodes(), + ghostedCoarseNodeCoarseGIDs(), + graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + + LO coarseNodeIdx = 0; + Array coarseNodeCoarseGIDs, coarseNodeFineGIDs; + geoData->getCoarseNodesData(graph.GetDomainMap(), coarseNodeCoarseGIDs, coarseNodeFineGIDs); + for (LO nodeIdx = 0; nodeIdx < ghostedCoarseNodeCoarseGIDs.size(); ++nodeIdx) { + if (ghostedCoarseNodeCoarsePIDs[nodeIdx] == colMap->getComm()->getRank()) { + coarseNodeCoarseGIDs[coarseNodeIdx] = ghostedCoarseNodeCoarseGIDs[nodeIdx]; + ++coarseNodeIdx; } - - geoData->getCoarseNodeGhostedLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], - ghostedCoarseNodeCoarseLID); - - aggId = ghostedCoarseNodeCoarseLIDs[ghostedCoarseNodeCoarseLID]; - vertex2AggId[nodeIdx] = aggId; - procWinner[nodeIdx] = ghostedCoarseNodeCoarsePIDs[ghostedCoarseNodeCoarseLID]; - aggStat[nodeIdx] = AGGREGATED; - --numNonAggregatedNodes; - - } // Loop over fine points - } // BuildAggregates() - - - template - void AggregationStructuredAlgorithm:: - BuildGraph(const GraphBase& graph, RCP& geoData, const LO dofsPerNode, - RCP& myGraph, RCP& coarseCoordinatesFineMap, - RCP& coarseCoordinatesMap) const { - Monitor m(*this, "BuildGraphP"); - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); } - - const bool coupled = geoData->isAggregationCoupled(); - - // Compute the number of coarse points needed to interpolate quantities to a fine point - int numInterpolationPoints = 0; - if(geoData->getInterpolationOrder() == 0) { - numInterpolationPoints = 1; - } else if(geoData->getInterpolationOrder() == 1) { - // Compute 2^numDimensions using bit logic to avoid round-off errors - numInterpolationPoints = 1 << geoData->getNumDimensions(); - } - *out << "numInterpolationPoints=" << numInterpolationPoints << std::endl; - - Array colIndex((geoData->getNumLocalCoarseNodes() + numInterpolationPoints* - (geoData->getNumLocalFineNodes() - geoData->getNumLocalCoarseNodes()))*dofsPerNode); - Array rowPtr(geoData->getNumLocalFineNodes()*dofsPerNode + 1); - rowPtr[0] = 0; - ArrayRCP nnzOnRow(geoData->getNumLocalFineNodes()*dofsPerNode); - - *out << "Compute prolongatorGraph data" << std::endl; - if(geoData->getInterpolationOrder() == 0) { - ComputeGraphDataConstant(graph, geoData, dofsPerNode, numInterpolationPoints, - nnzOnRow, rowPtr, colIndex); - } else if(geoData->getInterpolationOrder() == 1) { - ComputeGraphDataLinear(graph, geoData, dofsPerNode, numInterpolationPoints, - nnzOnRow, rowPtr, colIndex); + domainMap = MapFactory::Build(graph.GetDomainMap()->lib(), + geoData->getNumGlobalCoarseNodes(), + coarseNodeCoarseGIDs(), + graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + coarseCoordinatesMap = MapFactory::Build(graph.GetDomainMap()->lib(), + geoData->getNumGlobalCoarseNodes(), + coarseNodeCoarseGIDs(), + graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + coarseCoordinatesFineMap = MapFactory::Build(graph.GetDomainMap()->lib(), + geoData->getNumGlobalCoarseNodes(), + coarseNodeFineGIDs(), + graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + } else { + // In this case the map will compute the global number of nodes on the coarse mesh + // and it will assign GIDs to the local coarse nodes. + colMap = MapFactory::Build(graph.GetDomainMap()->lib(), + Teuchos::OrdinalTraits::invalid(), + geoData->getNumLocalCoarseNodes() * dofsPerNode, + graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + domainMap = colMap; + + Array coarseNodeCoarseGIDs(geoData->getNumLocalCoarseNodes()); + Array coarseNodeFineGIDs(geoData->getNumLocalCoarseNodes()); + geoData->getCoarseNodesData(graph.GetDomainMap(), coarseNodeCoarseGIDs, coarseNodeFineGIDs); + coarseCoordinatesMap = MapFactory::Build(graph.GetDomainMap()->lib(), + Teuchos::OrdinalTraits::invalid(), + geoData->getNumLocalCoarseNodes(), + graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + coarseCoordinatesFineMap = MapFactory::Build(graph.GetDomainMap()->lib(), + Teuchos::OrdinalTraits::invalid(), + coarseNodeFineGIDs(), + graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + } + + *out << "Call constructor of CrsGraph" << std::endl; + myGraph = CrsGraphFactory::Build(rowMap, + colMap, + nnzOnRow); + + *out << "Fill CrsGraph" << std::endl; + LO rowIdx = 0; + for (LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { + for (LO dof = 0; dof < dofsPerNode; ++dof) { + rowIdx = nodeIdx * dofsPerNode + dof; + myGraph->insertLocalIndices(rowIdx, colIndex(rowPtr[rowIdx], nnzOnRow[rowIdx])); } - - // Compute graph's rowMap, colMap and domainMap - RCP rowMap = MapFactory::Build(graph.GetDomainMap(), dofsPerNode); - RCP colMap, domainMap; - *out << "Compute domain and column maps of the CrsGraph" << std::endl; - if(coupled){ - *out << "Extract data for ghosted nodes" << std::endl; - Array ghostedCoarseNodeCoarseLIDs; - Array ghostedCoarseNodeCoarsePIDs; - Array ghostedCoarseNodeCoarseGIDs; - geoData->getGhostedNodesData(graph.GetDomainMap(), ghostedCoarseNodeCoarseLIDs, - ghostedCoarseNodeCoarsePIDs, ghostedCoarseNodeCoarseGIDs); - - // In this case we specify the global number of nodes on the coarse mesh - // as well as the GIDs needed on rank. - colMap = MapFactory::Build(graph.GetDomainMap()->lib(), - geoData->getNumGlobalCoarseNodes(), - ghostedCoarseNodeCoarseGIDs(), - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - - LO coarseNodeIdx = 0; - Array coarseNodeCoarseGIDs, coarseNodeFineGIDs; - geoData->getCoarseNodesData(graph.GetDomainMap(), coarseNodeCoarseGIDs, coarseNodeFineGIDs); - for(LO nodeIdx = 0; nodeIdx < ghostedCoarseNodeCoarseGIDs.size(); ++nodeIdx) { - if(ghostedCoarseNodeCoarsePIDs[nodeIdx] == colMap->getComm()->getRank()) { - coarseNodeCoarseGIDs[coarseNodeIdx] = ghostedCoarseNodeCoarseGIDs[nodeIdx]; - ++coarseNodeIdx; + } + + *out << "Call fillComplete on CrsGraph" << std::endl; + myGraph->fillComplete(domainMap, rowMap); + *out << "Prolongator CrsGraph computed" << std::endl; + +} // BuildGraph() + +template +void AggregationStructuredAlgorithm:: + ComputeGraphDataConstant(const GraphBase& graph, RCP& geoData, + const LO dofsPerNode, const int /* numInterpolationPoints */, + ArrayRCP& nnzOnRow, Array& rowPtr, + Array& colIndex) const { + RCP out; + if (const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + Array ghostedCoarseNodeCoarseLIDs; + Array ghostedCoarseNodeCoarsePIDs; + Array ghostedCoarseNodeCoarseGIDs; + geoData->getGhostedNodesData(graph.GetDomainMap(), ghostedCoarseNodeCoarseLIDs, + ghostedCoarseNodeCoarsePIDs, ghostedCoarseNodeCoarseGIDs); + + LO ghostedCoarseNodeCoarseLID, rem, rate; + Array ghostedIdx(3), coarseIdx(3); + for (LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { + // Compute coarse ID associated with fine LID + geoData->getFineNodeGhostedTuple(nodeIdx, ghostedIdx[0], ghostedIdx[1], ghostedIdx[2]); + + for (int dim = 0; dim < 3; ++dim) { + if (geoData->isSingleCoarsePoint() && (geoData->getLocalFineNodesInDir(dim) - 1 < geoData->getCoarseningRate(dim))) { + coarseIdx[dim] = 0; + } else { + coarseIdx[dim] = ghostedIdx[dim] / geoData->getCoarseningRate(dim); + rem = ghostedIdx[dim] % geoData->getCoarseningRate(dim); + if (ghostedIdx[dim] - geoData->getOffset(dim) < geoData->getLocalFineNodesInDir(dim) - geoData->getCoarseningEndRate(dim)) { + rate = geoData->getCoarseningRate(dim); + } else { + rate = geoData->getCoarseningEndRate(dim); + } + if (rem > (rate / 2)) { + ++coarseIdx[dim]; + } + if ((geoData->getStartGhostedCoarseNode(dim) * geoData->getCoarseningRate(dim) > geoData->getStartIndex(dim)) && geoData->isAggregationCoupled()) { + --coarseIdx[dim]; } - } - domainMap = MapFactory::Build(graph.GetDomainMap()->lib(), - geoData->getNumGlobalCoarseNodes(), - coarseNodeCoarseGIDs(), - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - coarseCoordinatesMap = MapFactory::Build(graph.GetDomainMap()->lib(), - geoData->getNumGlobalCoarseNodes(), - coarseNodeCoarseGIDs(), - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - coarseCoordinatesFineMap = MapFactory::Build(graph.GetDomainMap()->lib(), - geoData->getNumGlobalCoarseNodes(), - coarseNodeFineGIDs(), - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - } else { - // In this case the map will compute the global number of nodes on the coarse mesh - // and it will assign GIDs to the local coarse nodes. - colMap = MapFactory::Build(graph.GetDomainMap()->lib(), - Teuchos::OrdinalTraits::invalid(), - geoData->getNumLocalCoarseNodes()*dofsPerNode, - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - domainMap = colMap; - - Array coarseNodeCoarseGIDs(geoData->getNumLocalCoarseNodes()); - Array coarseNodeFineGIDs(geoData->getNumLocalCoarseNodes()); - geoData->getCoarseNodesData(graph.GetDomainMap(), coarseNodeCoarseGIDs, coarseNodeFineGIDs); - coarseCoordinatesMap = MapFactory::Build(graph.GetDomainMap()->lib(), - Teuchos::OrdinalTraits::invalid(), - geoData->getNumLocalCoarseNodes(), - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - coarseCoordinatesFineMap = MapFactory::Build(graph.GetDomainMap()->lib(), - Teuchos::OrdinalTraits::invalid(), - coarseNodeFineGIDs(), - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - } - - *out << "Call constructor of CrsGraph" << std::endl; - myGraph = CrsGraphFactory::Build(rowMap, - colMap, - nnzOnRow); - - *out << "Fill CrsGraph" << std::endl; - LO rowIdx = 0; - for(LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { - for(LO dof = 0; dof < dofsPerNode; ++dof) { - rowIdx = nodeIdx*dofsPerNode + dof; - myGraph->insertLocalIndices(rowIdx, colIndex(rowPtr[rowIdx], nnzOnRow[rowIdx]) ); } } - *out << "Call fillComplete on CrsGraph" << std::endl; - myGraph->fillComplete(domainMap, rowMap); - *out << "Prolongator CrsGraph computed" << std::endl; + geoData->getCoarseNodeGhostedLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], + ghostedCoarseNodeCoarseLID); - } // BuildGraph() + for (LO dof = 0; dof < dofsPerNode; ++dof) { + nnzOnRow[nodeIdx * dofsPerNode + dof] = 1; + rowPtr[nodeIdx * dofsPerNode + dof + 1] = rowPtr[nodeIdx * dofsPerNode + dof] + 1; + colIndex[rowPtr[nodeIdx * dofsPerNode + dof]] = + ghostedCoarseNodeCoarseLIDs[ghostedCoarseNodeCoarseLID] * dofsPerNode + dof; + } + } // Loop over fine points +} // ComputeGraphDataConstant() - template - void AggregationStructuredAlgorithm:: - ComputeGraphDataConstant(const GraphBase& graph, RCP& geoData, - const LO dofsPerNode, const int /* numInterpolationPoints */, +template +void AggregationStructuredAlgorithm:: + ComputeGraphDataLinear(const GraphBase& /* graph */, RCP& geoData, + const LO dofsPerNode, const int numInterpolationPoints, ArrayRCP& nnzOnRow, Array& rowPtr, Array& colIndex) const { - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - Array ghostedCoarseNodeCoarseLIDs; - Array ghostedCoarseNodeCoarsePIDs; - Array ghostedCoarseNodeCoarseGIDs; - geoData->getGhostedNodesData(graph.GetDomainMap(), ghostedCoarseNodeCoarseLIDs, - ghostedCoarseNodeCoarsePIDs, ghostedCoarseNodeCoarseGIDs); - - LO ghostedCoarseNodeCoarseLID, rem, rate; - Array ghostedIdx(3), coarseIdx(3); - for(LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { - - // Compute coarse ID associated with fine LID - geoData->getFineNodeGhostedTuple(nodeIdx, ghostedIdx[0], ghostedIdx[1], ghostedIdx[2]); - - for(int dim = 0; dim < 3; ++dim) { - if(geoData->isSingleCoarsePoint() - && (geoData->getLocalFineNodesInDir(dim) - 1 < geoData->getCoarseningRate(dim))) { - coarseIdx[dim] = 0; - } else { - coarseIdx[dim] = ghostedIdx[dim] / geoData->getCoarseningRate(dim); - rem = ghostedIdx[dim] % geoData->getCoarseningRate(dim); - if(ghostedIdx[dim] - geoData->getOffset(dim) - < geoData->getLocalFineNodesInDir(dim) - geoData->getCoarseningEndRate(dim)) { - rate = geoData->getCoarseningRate(dim); - } else { - rate = geoData->getCoarseningEndRate(dim); - } - if(rem > (rate / 2)) {++coarseIdx[dim];} - if( (geoData->getStartGhostedCoarseNode(dim)*geoData->getCoarseningRate(dim) - > geoData->getStartIndex(dim)) && geoData->isAggregationCoupled() ) { - --coarseIdx[dim]; - } + RCP out; + if (const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + const bool coupled = geoData->isAggregationCoupled(); + const int numDimensions = geoData->getNumDimensions(); + Array ghostedIdx(3, 0); + Array coarseIdx(3, 0); + Array ijkRem(3, 0); + const LO coarsePointOffset[8][3] = {{0, 0, 0}, {1, 0, 0}, {0, 1, 0}, {1, 1, 0}, {0, 0, 1}, {1, 0, 1}, {0, 1, 1}, {1, 1, 1}}; + + for (LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { + // Compute coarse ID associated with fine LID + geoData->getFineNodeGhostedTuple(nodeIdx, ghostedIdx[0], ghostedIdx[1], ghostedIdx[2]); + for (int dim = 0; dim < numDimensions; dim++) { + coarseIdx[dim] = ghostedIdx[dim] / geoData->getCoarseningRate(dim); + ijkRem[dim] = ghostedIdx[dim] % geoData->getCoarseningRate(dim); + if (coupled) { + if (geoData->getStartGhostedCoarseNode(dim) * geoData->getCoarseningRate(dim) > geoData->getStartIndex(dim)) { + --coarseIdx[dim]; } - } - - geoData->getCoarseNodeGhostedLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], - ghostedCoarseNodeCoarseLID); - - for(LO dof = 0; dof < dofsPerNode; ++dof) { - nnzOnRow[nodeIdx*dofsPerNode + dof] = 1; - rowPtr[nodeIdx*dofsPerNode + dof + 1] = rowPtr[nodeIdx*dofsPerNode + dof] + 1; - colIndex[rowPtr[nodeIdx*dofsPerNode + dof]] = - ghostedCoarseNodeCoarseLIDs[ghostedCoarseNodeCoarseLID]*dofsPerNode + dof; - } - } // Loop over fine points - - } // ComputeGraphDataConstant() - - - template - void AggregationStructuredAlgorithm:: - ComputeGraphDataLinear(const GraphBase& /* graph */, RCP& geoData, - const LO dofsPerNode, const int numInterpolationPoints, - ArrayRCP& nnzOnRow, Array& rowPtr, - Array& colIndex) const { - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - const bool coupled = geoData->isAggregationCoupled(); - const int numDimensions = geoData->getNumDimensions(); - Array ghostedIdx(3,0); - Array coarseIdx(3,0); - Array ijkRem(3,0); - const LO coarsePointOffset[8][3] = {{0, 0, 0}, {1, 0, 0}, {0, 1, 0}, {1, 1, 0}, - {0, 0, 1}, {1, 0, 1}, {0, 1, 1}, {1, 1, 1}}; - - for(LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { - - // Compute coarse ID associated with fine LID - geoData->getFineNodeGhostedTuple(nodeIdx, ghostedIdx[0], ghostedIdx[1], ghostedIdx[2]); - for(int dim=0; dim < numDimensions; dim++){ - coarseIdx[dim] = ghostedIdx[dim] / geoData->getCoarseningRate(dim); - ijkRem[dim] = ghostedIdx[dim] % geoData->getCoarseningRate(dim); - if(coupled) { - if (geoData->getStartGhostedCoarseNode(dim)*geoData->getCoarseningRate(dim) - > geoData->getStartIndex(dim)) { - --coarseIdx[dim]; - } - } else { - if(ghostedIdx[dim] == geoData->getLocalFineNodesInDir(dim) - 1) { - coarseIdx[dim] = geoData->getLocalCoarseNodesInDir(dim) - 1; - } + } else { + if (ghostedIdx[dim] == geoData->getLocalFineNodesInDir(dim) - 1) { + coarseIdx[dim] = geoData->getLocalCoarseNodesInDir(dim) - 1; } } + } - // Fill Graph - // Check if Fine node lies on Coarse Node - bool allCoarse = true; - Array isCoarse(numDimensions); - for(int dim = 0; dim < numDimensions; ++dim) { - isCoarse[dim] = false; - if(ijkRem[dim] == 0) + // Fill Graph + // Check if Fine node lies on Coarse Node + bool allCoarse = true; + Array isCoarse(numDimensions); + for (int dim = 0; dim < numDimensions; ++dim) { + isCoarse[dim] = false; + if (ijkRem[dim] == 0) + isCoarse[dim] = true; + + if (coupled) { + if (ghostedIdx[dim] - geoData->getOffset(dim) == geoData->getLocalFineNodesInDir(dim) - 1 && + geoData->getMeshEdge(dim * 2 + 1)) + isCoarse[dim] = true; + } else { + if (ghostedIdx[dim] - geoData->getOffset(dim) == geoData->getLocalFineNodesInDir(dim) - 1) isCoarse[dim] = true; - - if(coupled){ - if( ghostedIdx[dim]-geoData->getOffset(dim) == geoData->getLocalFineNodesInDir(dim)-1 && - geoData->getMeshEdge(dim*2+1) ) - isCoarse[dim] = true; - } else { - if( ghostedIdx[dim]-geoData->getOffset(dim) == geoData->getLocalFineNodesInDir(dim)-1) - isCoarse[dim] = true; - } - - if(!isCoarse[dim]) - allCoarse = false; } - LO rowIdx = 0, colIdx = 0; - if(allCoarse) { - for(LO dof = 0; dof < dofsPerNode; ++dof) { - rowIdx = nodeIdx*dofsPerNode + dof; - nnzOnRow[rowIdx] = 1; - rowPtr[rowIdx + 1] = rowPtr[rowIdx] + 1; + if (!isCoarse[dim]) + allCoarse = false; + } - // Fine node lies on Coarse node, easy case, we only need the LID of the coarse node. - geoData->getCoarseNodeGhostedLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], colIdx); - colIndex[rowPtr[rowIdx]] = colIdx*dofsPerNode + dof; - } - } else { - // Harder case, we need the LIDs of all the coarse nodes contributing to the interpolation - for(int dim = 0; dim < numDimensions; ++dim) { - if(coarseIdx[dim] == geoData->getGhostedNodesInDir(dim) - 1) - --coarseIdx[dim]; - } + LO rowIdx = 0, colIdx = 0; + if (allCoarse) { + for (LO dof = 0; dof < dofsPerNode; ++dof) { + rowIdx = nodeIdx * dofsPerNode + dof; + nnzOnRow[rowIdx] = 1; + rowPtr[rowIdx + 1] = rowPtr[rowIdx] + 1; - for(LO dof = 0; dof < dofsPerNode; ++dof) { - // at the current node. - rowIdx = nodeIdx*dofsPerNode + dof; - nnzOnRow[rowIdx] = Teuchos::as( numInterpolationPoints ); - rowPtr[rowIdx + 1] = rowPtr[rowIdx] + Teuchos::as(numInterpolationPoints); - // Compute Coarse Node LID - for(LO interpIdx = 0; interpIdx < numInterpolationPoints; ++interpIdx) { - geoData->getCoarseNodeGhostedLID(coarseIdx[0] + coarsePointOffset[interpIdx][0], - coarseIdx[1] + coarsePointOffset[interpIdx][1], - coarseIdx[2] + coarsePointOffset[interpIdx][2], - colIdx); - colIndex[rowPtr[rowIdx] + interpIdx] = colIdx*dofsPerNode + dof; - } // Loop over numInterpolationPoints - } // Loop over dofsPerNode + // Fine node lies on Coarse node, easy case, we only need the LID of the coarse node. + geoData->getCoarseNodeGhostedLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], colIdx); + colIndex[rowPtr[rowIdx]] = colIdx * dofsPerNode + dof; + } + } else { + // Harder case, we need the LIDs of all the coarse nodes contributing to the interpolation + for (int dim = 0; dim < numDimensions; ++dim) { + if (coarseIdx[dim] == geoData->getGhostedNodesInDir(dim) - 1) + --coarseIdx[dim]; } - } // Loop over fine points - } // ComputeGraphDataLinear() -} // end namespace + for (LO dof = 0; dof < dofsPerNode; ++dof) { + // at the current node. + rowIdx = nodeIdx * dofsPerNode + dof; + nnzOnRow[rowIdx] = Teuchos::as(numInterpolationPoints); + rowPtr[rowIdx + 1] = rowPtr[rowIdx] + Teuchos::as(numInterpolationPoints); + // Compute Coarse Node LID + for (LO interpIdx = 0; interpIdx < numInterpolationPoints; ++interpIdx) { + geoData->getCoarseNodeGhostedLID(coarseIdx[0] + coarsePointOffset[interpIdx][0], + coarseIdx[1] + coarsePointOffset[interpIdx][1], + coarseIdx[2] + coarsePointOffset[interpIdx][2], + colIdx); + colIndex[rowPtr[rowIdx] + interpIdx] = colIdx * dofsPerNode + dof; + } // Loop over numInterpolationPoints + } // Loop over dofsPerNode + } + } // Loop over fine points +} // ComputeGraphDataLinear() +} // namespace MueLu #endif /* MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_decl.hpp index 5d83bf9a5a42..cab18f503aec 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_decl.hpp @@ -55,169 +55,160 @@ #include "MueLu_LWGraph_kokkos.hpp" namespace MueLu { - /*! - @class AggregationStructuredAlgorithm class. - @brief Algorithm for coarsening a graph with structured aggregation. - - @ingroup Aggregation - - ### Idea ### - Use the logical indexing of the mesh to obtain a very regular aggregation pattern and maintain - lines and planes of the problem as they might be useful to the smoother. - This algorithms is also very easy to parallelize on node due to its very regular and predictible - memory access patern. - All the parameters needed are passed to this class by the StructuredAggregationFactory class. - */ - - template - class AggregationStructuredAlgorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +/*! + @class AggregationStructuredAlgorithm class. + @brief Algorithm for coarsening a graph with structured aggregation. + + @ingroup Aggregation + + ### Idea ### + Use the logical indexing of the mesh to obtain a very regular aggregation pattern and maintain + lines and planes of the problem as they might be useful to the smoother. + This algorithms is also very easy to parallelize on node due to its very regular and predictible + memory access patern. + All the parameters needed are passed to this class by the StructuredAggregationFactory class. +*/ + +template +class AggregationStructuredAlgorithm_kokkos : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: + public: + using local_graph_type = typename LWGraph_kokkos::local_graph_type; + using non_const_row_map_type = typename local_graph_type::row_map_type::non_const_type; + using size_type = typename local_graph_type::size_type; + using entries_type = typename local_graph_type::entries_type; + using device_type = typename local_graph_type::device_type; + using execution_space = typename local_graph_type::device_type::execution_space; + using memory_space = typename local_graph_type::device_type::memory_space; - using local_graph_type = typename LWGraph_kokkos::local_graph_type; - using non_const_row_map_type = typename local_graph_type::row_map_type::non_const_type; - using size_type = typename local_graph_type::size_type; - using entries_type = typename local_graph_type::entries_type; - using device_type = typename local_graph_type::device_type; - using execution_space = typename local_graph_type::device_type::execution_space; - using memory_space = typename local_graph_type::device_type::memory_space; + using LOVectorView = decltype(std::declval().getDeviceLocalView(Xpetra::Access::ReadWrite)); + using constIntTupleView = typename Kokkos::View; + using constLOTupleView = typename Kokkos::View; - using LOVectorView = decltype(std::declval().getDeviceLocalView(Xpetra::Access::ReadWrite)); - using constIntTupleView = typename Kokkos::View; - using constLOTupleView = typename Kokkos::View; + //! @name Constructors/Destructors. + //@{ - //! @name Constructors/Destructors. - //@{ + //! Constructor. + AggregationStructuredAlgorithm_kokkos() {} - //! Constructor. - AggregationStructuredAlgorithm_kokkos() { } + //! Destructor. + virtual ~AggregationStructuredAlgorithm_kokkos() {} - //! Destructor. - virtual ~AggregationStructuredAlgorithm_kokkos() { } + //@} - //@} + //! @name Aggregation methods. + //@{ + /*! @brief Build aggregates object. */ - //! @name Aggregation methods. - //@{ + void BuildAggregates(const Teuchos::ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; - /*! @brief Build aggregates object. */ + /*! @brief Build a CrsGraph instead of aggregates. */ - void BuildAggregates(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildGraph(const LWGraph_kokkos& graph, + RCP& geoData, + const LO dofsPerNode, + RCP& myGraph) const; + //@} - /*! @brief Build a CrsGraph instead of aggregates. */ + std::string description() const { return "Aggretation: structured algorithm"; } - void BuildGraph(const LWGraph_kokkos& graph, - RCP& geoData, - const LO dofsPerNode, - RCP& myGraph) const; - //@} + struct fillAggregatesFunctor { + IndexManager_kokkos geoData_; + const int myRank_; + Kokkos::View aggStat_; + LOVectorView vertex2AggID_; + LOVectorView procWinner_; - std::string description() const { return "Aggretation: structured algorithm"; } + fillAggregatesFunctor(RCP geoData, + const int myRank, + Kokkos::View aggStat, + LOVectorView vertex2AggID, + LOVectorView procWinner); - struct fillAggregatesFunctor{ + KOKKOS_INLINE_FUNCTION + void operator()(const LO nodeIdx, LO& lNumAggregatedNodes) const; - IndexManager_kokkos geoData_; - const int myRank_; - Kokkos::View aggStat_; - LOVectorView vertex2AggID_; - LOVectorView procWinner_; + }; // struct fillAggregatesFunctor - fillAggregatesFunctor(RCP geoData, - const int myRank, - Kokkos::View aggStat, - LOVectorView vertex2AggID, - LOVectorView procWinner); + struct computeGraphDataConstantFunctor { + IndexManager_kokkos geoData_; + const int numGhostedNodes_; + const LO dofsPerNode_; + constIntTupleView coarseRate_; + constIntTupleView endRate_; + constLOTupleView lFineNodesPerDir_; + non_const_row_map_type rowPtr_; + entries_type colIndex_; - KOKKOS_INLINE_FUNCTION - void operator() (const LO nodeIdx, LO& lNumAggregatedNodes) const; - - }; // struct fillAggregatesFunctor - - struct computeGraphDataConstantFunctor { - - IndexManager_kokkos geoData_; - const int numGhostedNodes_; - const LO dofsPerNode_; - constIntTupleView coarseRate_; - constIntTupleView endRate_; - constLOTupleView lFineNodesPerDir_; - non_const_row_map_type rowPtr_; - entries_type colIndex_; - - - computeGraphDataConstantFunctor(RCP geoData, - const LO numGhostedNodes, const LO dofsPerNode, - constIntTupleView coarseRate, constIntTupleView endRate, - constLOTupleView lFineNodesPerDir, - non_const_row_map_type rowPtr, entries_type colIndex); - - KOKKOS_INLINE_FUNCTION - void operator() (const LO nodeIdx) const; - - }; // struct computeGraphDataConstantFunctor - - struct computeGraphRowPtrFunctor { - - IndexManager_kokkos geoData_; - const LO dofsPerNode_; - const int numInterpolationPoints_; - const LO numLocalRows_; - constIntTupleView coarseRate_; - constLOTupleView lFineNodesPerDir_; - non_const_row_map_type rowPtr_; - - computeGraphRowPtrFunctor(RCP geoData, - const LO dofsPerNode, - const int numInterpolationPoints, const LO numLocalRows, - constIntTupleView coarseRate, constLOTupleView lFineNodesPerDir, - non_const_row_map_type rowPtr); - - KOKKOS_INLINE_FUNCTION - void operator() (const LO rowIdx, GO& update, const bool final) const; - }; // struct computeGraphRowPtrFunctor - - struct computeGraphDataLinearFunctor { - - IndexManager_kokkos geoData_; - const int numDimensions_; - const int numGhostedNodes_; - const LO dofsPerNode_; - const int numInterpolationPoints_; - constIntTupleView coarseRate_; - constIntTupleView endRate_; - constLOTupleView lFineNodesPerDir_; - constLOTupleView ghostedNodesPerDir_; - non_const_row_map_type rowPtr_; - entries_type colIndex_; - - - computeGraphDataLinearFunctor(RCP geoData, - const int numDimensions, + computeGraphDataConstantFunctor(RCP geoData, const LO numGhostedNodes, const LO dofsPerNode, - const int numInterpolationPoints, constIntTupleView coarseRate, constIntTupleView endRate, constLOTupleView lFineNodesPerDir, - constLOTupleView ghostedNodesPerDir, non_const_row_map_type rowPtr, entries_type colIndex); - KOKKOS_INLINE_FUNCTION - void operator() (const LO nodeIdx) const; - - }; // struct computeGraphDataLinearFunctor - - }; // class AggregationStructuredAlgorithm_kokkos - -} //namespace MueLu + KOKKOS_INLINE_FUNCTION + void operator()(const LO nodeIdx) const; + + }; // struct computeGraphDataConstantFunctor + + struct computeGraphRowPtrFunctor { + IndexManager_kokkos geoData_; + const LO dofsPerNode_; + const int numInterpolationPoints_; + const LO numLocalRows_; + constIntTupleView coarseRate_; + constLOTupleView lFineNodesPerDir_; + non_const_row_map_type rowPtr_; + + computeGraphRowPtrFunctor(RCP geoData, + const LO dofsPerNode, + const int numInterpolationPoints, const LO numLocalRows, + constIntTupleView coarseRate, constLOTupleView lFineNodesPerDir, + non_const_row_map_type rowPtr); + + KOKKOS_INLINE_FUNCTION + void operator()(const LO rowIdx, GO& update, const bool final) const; + }; // struct computeGraphRowPtrFunctor + + struct computeGraphDataLinearFunctor { + IndexManager_kokkos geoData_; + const int numDimensions_; + const int numGhostedNodes_; + const LO dofsPerNode_; + const int numInterpolationPoints_; + constIntTupleView coarseRate_; + constIntTupleView endRate_; + constLOTupleView lFineNodesPerDir_; + constLOTupleView ghostedNodesPerDir_; + non_const_row_map_type rowPtr_; + entries_type colIndex_; + + computeGraphDataLinearFunctor(RCP geoData, + const int numDimensions, + const LO numGhostedNodes, const LO dofsPerNode, + const int numInterpolationPoints, + constIntTupleView coarseRate, constIntTupleView endRate, + constLOTupleView lFineNodesPerDir, + constLOTupleView ghostedNodesPerDir, + non_const_row_map_type rowPtr, entries_type colIndex); + + KOKKOS_INLINE_FUNCTION + void operator()(const LO nodeIdx) const; + + }; // struct computeGraphDataLinearFunctor + +}; // class AggregationStructuredAlgorithm_kokkos + +} // namespace MueLu #define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_SHORT #endif /* MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_def.hpp index 09ab332b27f4..d6e7bb665f33 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_def.hpp @@ -46,7 +46,6 @@ #ifndef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP #define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP - #include #include @@ -65,348 +64,363 @@ namespace MueLu { - template - void AggregationStructuredAlgorithm_kokkos:: - BuildAggregates(const Teuchos::ParameterList& /* params */, const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - RCP geoData = aggregates.GetIndexManagerKokkos(); - const LO numLocalFineNodes= geoData->getNumLocalFineNodes(); - const LO numCoarseNodes = geoData->getNumCoarseNodes(); - LOVectorView vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - LOVectorView procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - - *out << "Loop over fine nodes and assign them to an aggregate and a rank" << std::endl; - LO numAggregatedNodes; - fillAggregatesFunctor fillAggregates(geoData, - graph.GetComm()->getRank(), - aggStat, - vertex2AggId, - procWinner); - Kokkos::parallel_reduce("StructuredAggregation: fill aggregates data", - Kokkos::RangePolicy(0, numLocalFineNodes), - fillAggregates, - numAggregatedNodes); - - *out << "numCoarseNodes= " << numCoarseNodes - << ", numAggregatedNodes= " << numAggregatedNodes << std::endl; - numNonAggregatedNodes = numNonAggregatedNodes - numAggregatedNodes; - - } // BuildAggregates() - - - template - void AggregationStructuredAlgorithm_kokkos:: - BuildGraph(const LWGraph_kokkos& graph, RCP& geoData, const LO dofsPerNode, - RCP& myGraph) const { - Monitor m(*this, "BuildGraphP"); - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - // Compute the number of coarse points needed to interpolate quantities to a fine point - int numInterpolationPoints = 0; - if(geoData->getInterpolationOrder() == 0) { - numInterpolationPoints = 1; - } else if(geoData->getInterpolationOrder() == 1) { - // Compute 2^numDimensions using bit logic to avoid round-off errors from std::pow() - numInterpolationPoints = 1 << geoData->getNumDimensions(); - } - *out << "numInterpolationPoints=" << numInterpolationPoints << std::endl; - - const LO numLocalFineNodes = geoData->getNumLocalFineNodes(); - const LO numCoarseNodes = geoData->getNumCoarseNodes(); - const LO numNnzEntries = dofsPerNode*(numCoarseNodes + numInterpolationPoints - *(numLocalFineNodes - numCoarseNodes)); - - non_const_row_map_type rowPtr("Prolongator graph, rowPtr", dofsPerNode*(numLocalFineNodes + 1)); - entries_type colIndex("Prolongator graph, colIndices", numNnzEntries); - - *out << "Compute prolongatorGraph data" << std::endl; - if(geoData->getInterpolationOrder() == 0) { - computeGraphDataConstantFunctor computeGraphData(geoData, - numCoarseNodes, - dofsPerNode, - geoData->getCoarseningRates(), - geoData->getCoarseningEndRates(), - geoData->getLocalFineNodesPerDir(), - rowPtr, - colIndex); - Kokkos::parallel_for("Structured Aggregation: compute loca graph data", - Kokkos::RangePolicy(0, numLocalFineNodes), - computeGraphData); - } else if(geoData->getInterpolationOrder() == 1) { - // Note, lbv 2018-11-08: in the piece-wise linear case I am computing the rowPtr - // using a parallel scan, it might be possible to do something faster than that - // by including this calculation in computeGraphDataLinearFunctor but at the moment - // all the ideas I have include a bunch of if statements which I would like to avoid. - computeGraphRowPtrFunctor computeGraphRowPtr(geoData, - dofsPerNode, - numInterpolationPoints, - numLocalFineNodes, - geoData->getCoarseningRates(), - geoData->getLocalFineNodesPerDir(), - rowPtr); - Kokkos::parallel_scan("Structured Aggregation: compute rowPtr for prolongator graph", - Kokkos::RangePolicy(0, numLocalFineNodes + 1), - computeGraphRowPtr); - - computeGraphDataLinearFunctor computeGraphData(geoData, - geoData->getNumDimensions(), +template +void AggregationStructuredAlgorithm_kokkos:: + BuildAggregates(const Teuchos::ParameterList& /* params */, const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + + RCP out; + if (const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + RCP geoData = aggregates.GetIndexManagerKokkos(); + const LO numLocalFineNodes = geoData->getNumLocalFineNodes(); + const LO numCoarseNodes = geoData->getNumCoarseNodes(); + LOVectorView vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + LOVectorView procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + + *out << "Loop over fine nodes and assign them to an aggregate and a rank" << std::endl; + LO numAggregatedNodes; + fillAggregatesFunctor fillAggregates(geoData, + graph.GetComm()->getRank(), + aggStat, + vertex2AggId, + procWinner); + Kokkos::parallel_reduce("StructuredAggregation: fill aggregates data", + Kokkos::RangePolicy(0, numLocalFineNodes), + fillAggregates, + numAggregatedNodes); + + *out << "numCoarseNodes= " << numCoarseNodes + << ", numAggregatedNodes= " << numAggregatedNodes << std::endl; + numNonAggregatedNodes = numNonAggregatedNodes - numAggregatedNodes; + +} // BuildAggregates() + +template +void AggregationStructuredAlgorithm_kokkos:: + BuildGraph(const LWGraph_kokkos& graph, RCP& geoData, const LO dofsPerNode, + RCP& myGraph) const { + Monitor m(*this, "BuildGraphP"); + + RCP out; + if (const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + // Compute the number of coarse points needed to interpolate quantities to a fine point + int numInterpolationPoints = 0; + if (geoData->getInterpolationOrder() == 0) { + numInterpolationPoints = 1; + } else if (geoData->getInterpolationOrder() == 1) { + // Compute 2^numDimensions using bit logic to avoid round-off errors from std::pow() + numInterpolationPoints = 1 << geoData->getNumDimensions(); + } + *out << "numInterpolationPoints=" << numInterpolationPoints << std::endl; + + const LO numLocalFineNodes = geoData->getNumLocalFineNodes(); + const LO numCoarseNodes = geoData->getNumCoarseNodes(); + const LO numNnzEntries = dofsPerNode * (numCoarseNodes + numInterpolationPoints * (numLocalFineNodes - numCoarseNodes)); + + non_const_row_map_type rowPtr("Prolongator graph, rowPtr", dofsPerNode * (numLocalFineNodes + 1)); + entries_type colIndex("Prolongator graph, colIndices", numNnzEntries); + + *out << "Compute prolongatorGraph data" << std::endl; + if (geoData->getInterpolationOrder() == 0) { + computeGraphDataConstantFunctor computeGraphData(geoData, numCoarseNodes, dofsPerNode, - numInterpolationPoints, geoData->getCoarseningRates(), geoData->getCoarseningEndRates(), geoData->getLocalFineNodesPerDir(), - geoData->getCoarseNodesPerDir(), rowPtr, colIndex); - Kokkos::parallel_for("Structured Aggregation: compute loca graph data", - Kokkos::RangePolicy(0, numLocalFineNodes), - computeGraphData); + Kokkos::parallel_for("Structured Aggregation: compute loca graph data", + Kokkos::RangePolicy(0, numLocalFineNodes), + computeGraphData); + } else if (geoData->getInterpolationOrder() == 1) { + // Note, lbv 2018-11-08: in the piece-wise linear case I am computing the rowPtr + // using a parallel scan, it might be possible to do something faster than that + // by including this calculation in computeGraphDataLinearFunctor but at the moment + // all the ideas I have include a bunch of if statements which I would like to avoid. + computeGraphRowPtrFunctor computeGraphRowPtr(geoData, + dofsPerNode, + numInterpolationPoints, + numLocalFineNodes, + geoData->getCoarseningRates(), + geoData->getLocalFineNodesPerDir(), + rowPtr); + Kokkos::parallel_scan("Structured Aggregation: compute rowPtr for prolongator graph", + Kokkos::RangePolicy(0, numLocalFineNodes + 1), + computeGraphRowPtr); + + computeGraphDataLinearFunctor computeGraphData(geoData, + geoData->getNumDimensions(), + numCoarseNodes, + dofsPerNode, + numInterpolationPoints, + geoData->getCoarseningRates(), + geoData->getCoarseningEndRates(), + geoData->getLocalFineNodesPerDir(), + geoData->getCoarseNodesPerDir(), + rowPtr, + colIndex); + Kokkos::parallel_for("Structured Aggregation: compute loca graph data", + Kokkos::RangePolicy(0, numLocalFineNodes), + computeGraphData); + } + + local_graph_type myLocalGraph(colIndex, rowPtr); + + // Compute graph's colMap and domainMap + RCP colMap, domainMap; + *out << "Compute domain and column maps of the CrsGraph" << std::endl; + colMap = MapFactory::Build(graph.GetDomainMap()->lib(), + Teuchos::OrdinalTraits::invalid(), + numCoarseNodes, + graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + domainMap = colMap; + + myGraph = CrsGraphFactory::Build(myLocalGraph, graph.GetDomainMap(), colMap, + colMap, graph.GetDomainMap()); + +} // BuildGraph() + +template +AggregationStructuredAlgorithm_kokkos:: + fillAggregatesFunctor::fillAggregatesFunctor(RCP geoData, + const int myRank, + Kokkos::View aggStat, + LOVectorView vertex2AggID, + LOVectorView procWinner) + : geoData_(*geoData) + , myRank_(myRank) + , aggStat_(aggStat) + , vertex2AggID_(vertex2AggID) + , procWinner_(procWinner) {} + +template +KOKKOS_INLINE_FUNCTION void AggregationStructuredAlgorithm_kokkos:: + fillAggregatesFunctor::operator()(const LO nodeIdx, LO& lNumAggregatedNodes) const { + // Compute coarse ID associated with fine LID + LO rem, rate; + LO coarseNodeCoarseLID; + LO nodeFineTuple[3], coarseIdx[3]; + auto coarseRate = geoData_.getCoarseningRates(); + auto endRate = geoData_.getCoarseningEndRates(); + auto lFineNodesPerDir = geoData_.getLocalFineNodesPerDir(); + // Compute coarse ID associated with fine LID + geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); + + for (int dim = 0; dim < 3; ++dim) { + coarseIdx[dim] = nodeFineTuple[dim] / coarseRate(dim); + rem = nodeFineTuple[dim] % coarseRate(dim); + rate = (nodeFineTuple[dim] < lFineNodesPerDir(dim) - endRate(dim)) ? coarseRate(dim) : endRate(dim); + if (rem > (rate / 2)) { + ++coarseIdx[dim]; } - - local_graph_type myLocalGraph(colIndex, rowPtr); - - // Compute graph's colMap and domainMap - RCP colMap, domainMap; - *out << "Compute domain and column maps of the CrsGraph" << std::endl; - colMap = MapFactory::Build(graph.GetDomainMap()->lib(), - Teuchos::OrdinalTraits::invalid(), - numCoarseNodes, - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - domainMap = colMap; - - myGraph = CrsGraphFactory::Build(myLocalGraph, graph.GetDomainMap(), colMap, - colMap, graph.GetDomainMap()); - - } // BuildGraph() - - - template - AggregationStructuredAlgorithm_kokkos:: - fillAggregatesFunctor::fillAggregatesFunctor(RCP geoData, - const int myRank, - Kokkos::View aggStat, - LOVectorView vertex2AggID, - LOVectorView procWinner) : - geoData_(*geoData), myRank_(myRank), aggStat_(aggStat), - vertex2AggID_(vertex2AggID), procWinner_(procWinner) {} - - template - KOKKOS_INLINE_FUNCTION - void AggregationStructuredAlgorithm_kokkos:: - fillAggregatesFunctor::operator() (const LO nodeIdx, LO& lNumAggregatedNodes) const { - // Compute coarse ID associated with fine LID - LO rem, rate; - LO coarseNodeCoarseLID; - LO nodeFineTuple[3], coarseIdx[3]; - auto coarseRate = geoData_.getCoarseningRates(); - auto endRate = geoData_.getCoarseningEndRates(); - auto lFineNodesPerDir = geoData_.getLocalFineNodesPerDir(); - // Compute coarse ID associated with fine LID - geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); - - for(int dim = 0; dim < 3; ++dim) { - coarseIdx[dim] = nodeFineTuple[dim] / coarseRate(dim); - rem = nodeFineTuple[dim] % coarseRate(dim); - rate = (nodeFineTuple[dim] < lFineNodesPerDir(dim) - endRate(dim)) ? coarseRate(dim) : endRate(dim); - if(rem > (rate / 2)) {++coarseIdx[dim];} + } + + geoData_.getCoarseTuple2CoarseLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], + coarseNodeCoarseLID); + + vertex2AggID_(nodeIdx, 0) = coarseNodeCoarseLID; + procWinner_(nodeIdx, 0) = myRank_; + aggStat_(nodeIdx) = AGGREGATED; + ++lNumAggregatedNodes; + +} // fillAggregatesFunctor::operator() + +template +AggregationStructuredAlgorithm_kokkos:: + computeGraphDataConstantFunctor:: + computeGraphDataConstantFunctor(RCP geoData, + const LO NumGhostedNodes, + const LO dofsPerNode, + constIntTupleView coarseRate, + constIntTupleView endRate, + constLOTupleView lFineNodesPerDir, + non_const_row_map_type rowPtr, + entries_type colIndex) + : geoData_(*geoData) + , numGhostedNodes_(NumGhostedNodes) + , dofsPerNode_(dofsPerNode) + , coarseRate_(coarseRate) + , endRate_(endRate) + , lFineNodesPerDir_(lFineNodesPerDir) + , rowPtr_(rowPtr) + , colIndex_(colIndex) { +} // computeGraphDataConstantFunctor() + +template +KOKKOS_INLINE_FUNCTION void AggregationStructuredAlgorithm_kokkos:: + computeGraphDataConstantFunctor::operator()(const LO nodeIdx) const { + LO nodeFineTuple[3] = {0, 0, 0}; + LO nodeCoarseTuple[3] = {0, 0, 0}; + + // Compute ghosted tuple associated with fine LID + geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); + + // Compute coarse tuple associated with fine point + // then overwrite it with tuple associated with aggregate + LO rem, rate, coarseNodeCoarseLID; + for (int dim = 0; dim < 3; ++dim) { + nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim); + rem = nodeFineTuple[dim] % coarseRate_(dim); + if (nodeFineTuple[dim] < (lFineNodesPerDir_(dim) - endRate_(dim))) { + rate = coarseRate_(dim); + } else { + rate = endRate_(dim); } - - geoData_.getCoarseTuple2CoarseLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], - coarseNodeCoarseLID); - - vertex2AggID_(nodeIdx, 0) = coarseNodeCoarseLID; - procWinner_(nodeIdx, 0) = myRank_; - aggStat_(nodeIdx) = AGGREGATED; - ++lNumAggregatedNodes; - - } // fillAggregatesFunctor::operator() - - template - AggregationStructuredAlgorithm_kokkos:: - computeGraphDataConstantFunctor:: - computeGraphDataConstantFunctor(RCP geoData, - const LO NumGhostedNodes, - const LO dofsPerNode, - constIntTupleView coarseRate, - constIntTupleView endRate, - constLOTupleView lFineNodesPerDir, - non_const_row_map_type rowPtr, - entries_type colIndex) : geoData_(*geoData), - numGhostedNodes_(NumGhostedNodes), dofsPerNode_(dofsPerNode), - coarseRate_(coarseRate), endRate_(endRate), lFineNodesPerDir_(lFineNodesPerDir), - rowPtr_(rowPtr), colIndex_(colIndex) { - - } // computeGraphDataConstantFunctor() - - template - KOKKOS_INLINE_FUNCTION - void AggregationStructuredAlgorithm_kokkos:: - computeGraphDataConstantFunctor::operator() (const LO nodeIdx) const { - LO nodeFineTuple[3] = {0, 0, 0}; - LO nodeCoarseTuple[3] = {0, 0, 0}; - - // Compute ghosted tuple associated with fine LID - geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); - - // Compute coarse tuple associated with fine point - // then overwrite it with tuple associated with aggregate - LO rem, rate, coarseNodeCoarseLID; - for(int dim = 0; dim < 3; ++dim) { - nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim); - rem = nodeFineTuple[dim] % coarseRate_(dim); - if( nodeFineTuple[dim] < (lFineNodesPerDir_(dim) - endRate_(dim)) ) { - rate = coarseRate_(dim); - } else { - rate = endRate_(dim); - } - if(rem > (rate / 2)) {++nodeCoarseTuple[dim];} + if (rem > (rate / 2)) { + ++nodeCoarseTuple[dim]; } + } + + // get LID associted with aggregate + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], + coarseNodeCoarseLID); + + // store data into CrsGraph taking care of multiple dofs case + for (LO dof = 0; dof < dofsPerNode_; ++dof) { + rowPtr_(nodeIdx * dofsPerNode_ + dof + 1) = nodeIdx * dofsPerNode_ + dof + 1; + colIndex_(nodeIdx * dofsPerNode_ + dof) = coarseNodeCoarseLID * dofsPerNode_ + dof; + } + +} // computeGraphDataConstantFunctor::operator() + +template +AggregationStructuredAlgorithm_kokkos:: + computeGraphRowPtrFunctor::computeGraphRowPtrFunctor(RCP geoData, + const LO dofsPerNode, + const int numInterpolationPoints, + const LO numLocalRows, + constIntTupleView coarseRate, + constLOTupleView lFineNodesPerDir, + non_const_row_map_type rowPtr) + : geoData_(*geoData) + , dofsPerNode_(dofsPerNode) + , numInterpolationPoints_(numInterpolationPoints) + , numLocalRows_(numLocalRows) + , coarseRate_(coarseRate) + , lFineNodesPerDir_(lFineNodesPerDir) + , rowPtr_(rowPtr) {} + +template +KOKKOS_INLINE_FUNCTION void AggregationStructuredAlgorithm_kokkos:: + computeGraphRowPtrFunctor::operator()(const LO rowIdx, GO& update, const bool final) const { + if (final) { + // Kokkos uses a multipass algorithm to implement scan. + // Only update the array on the final pass. Updating the + // array before changing 'update' means that we do an + // exclusive scan. Update the array after for an inclusive + // scan. + rowPtr_(rowIdx) = update; + } + if (rowIdx < numLocalRows_) { + LO nodeIdx = rowIdx / dofsPerNode_; + bool allCoarse = true; + LO nodeFineTuple[3] = {0, 0, 0}; + geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); + for (int dim = 0; dim < 3; ++dim) { + const LO rem = nodeFineTuple[dim] % coarseRate_(dim); - // get LID associted with aggregate - geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], - coarseNodeCoarseLID); - - // store data into CrsGraph taking care of multiple dofs case - for(LO dof = 0; dof < dofsPerNode_; ++dof) { - rowPtr_(nodeIdx*dofsPerNode_ + dof + 1) = nodeIdx*dofsPerNode_ + dof + 1; - colIndex_(nodeIdx*dofsPerNode_ + dof) = coarseNodeCoarseLID*dofsPerNode_ + dof; + // Check if Fine node lies on Coarse Node + allCoarse = (allCoarse && ((rem == 0) || (nodeFineTuple[dim] == lFineNodesPerDir_(dim) - 1))); } - - } // computeGraphDataConstantFunctor::operator() - - template - AggregationStructuredAlgorithm_kokkos:: - computeGraphRowPtrFunctor::computeGraphRowPtrFunctor(RCP geoData, - const LO dofsPerNode, - const int numInterpolationPoints, - const LO numLocalRows, - constIntTupleView coarseRate, - constLOTupleView lFineNodesPerDir, - non_const_row_map_type rowPtr) : - geoData_(*geoData), dofsPerNode_(dofsPerNode), - numInterpolationPoints_(numInterpolationPoints), numLocalRows_(numLocalRows), - coarseRate_(coarseRate), lFineNodesPerDir_(lFineNodesPerDir), rowPtr_(rowPtr) {} - - template - KOKKOS_INLINE_FUNCTION - void AggregationStructuredAlgorithm_kokkos:: - computeGraphRowPtrFunctor::operator() (const LO rowIdx, GO& update, const bool final) const { - if (final) { - // Kokkos uses a multipass algorithm to implement scan. - // Only update the array on the final pass. Updating the - // array before changing 'update' means that we do an - // exclusive scan. Update the array after for an inclusive - // scan. - rowPtr_(rowIdx) = update; + update += (allCoarse ? 1 : numInterpolationPoints_); + } +} // computeGraphRowPtrFunctor::operator() + +template +AggregationStructuredAlgorithm_kokkos:: + computeGraphDataLinearFunctor::computeGraphDataLinearFunctor(RCP geoData, + const int numDimensions, + const LO numGhostedNodes, + const LO dofsPerNode, + const int numInterpolationPoints, + constIntTupleView coarseRate, + constIntTupleView endRate, + constLOTupleView lFineNodesPerDir, + constLOTupleView ghostedNodesPerDir, + non_const_row_map_type rowPtr, + entries_type colIndex) + : geoData_(*geoData) + , numDimensions_(numDimensions) + , numGhostedNodes_(numGhostedNodes) + , dofsPerNode_(dofsPerNode) + , numInterpolationPoints_(numInterpolationPoints) + , coarseRate_(coarseRate) + , endRate_(endRate) + , lFineNodesPerDir_(lFineNodesPerDir) + , ghostedNodesPerDir_(ghostedNodesPerDir) + , rowPtr_(rowPtr) + , colIndex_(colIndex) { +} // computeGraphDataLinearFunctor() + +template +KOKKOS_INLINE_FUNCTION void AggregationStructuredAlgorithm_kokkos:: + computeGraphDataLinearFunctor::operator()(const LO nodeIdx) const { + LO nodeFineTuple[3] = {0, 0, 0}; + LO nodeCoarseTuple[3] = {0, 0, 0}; + + // Compute coarse ID associated with fine LID + geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); + + LO coarseNodeCoarseLID; + bool allCoarse = false; + for (int dim = 0; dim < 3; ++dim) { + nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim); + } + if (rowPtr_(nodeIdx + 1) == rowPtr_(nodeIdx) + 1) { + allCoarse = true; + } + + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], + coarseNodeCoarseLID); + + if (allCoarse) { + // Fine node lies on Coarse node, easy case, we only need the LID of the coarse node. + for (LO dof = 0; dof < dofsPerNode_; ++dof) { + colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof)) = coarseNodeCoarseLID * dofsPerNode_ + dof; } - if (rowIdx < numLocalRows_) { - LO nodeIdx = rowIdx / dofsPerNode_; - bool allCoarse = true; - LO nodeFineTuple[3] = {0, 0, 0}; - geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); - for(int dim = 0; dim < 3; ++dim) { - const LO rem = nodeFineTuple[dim] % coarseRate_(dim); - - // Check if Fine node lies on Coarse Node - allCoarse = (allCoarse && ((rem == 0) || (nodeFineTuple[dim] == lFineNodesPerDir_(dim) - 1))); + } else { + for (int dim = 0; dim < numDimensions_; ++dim) { + if (nodeCoarseTuple[dim] == ghostedNodesPerDir_(dim) - 1) { + --nodeCoarseTuple[dim]; } - update += (allCoarse ? 1 : numInterpolationPoints_); } - } // computeGraphRowPtrFunctor::operator() - - template - AggregationStructuredAlgorithm_kokkos:: - computeGraphDataLinearFunctor::computeGraphDataLinearFunctor(RCP geoData, - const int numDimensions, - const LO numGhostedNodes, - const LO dofsPerNode, - const int numInterpolationPoints, - constIntTupleView coarseRate, - constIntTupleView endRate, - constLOTupleView lFineNodesPerDir, - constLOTupleView ghostedNodesPerDir, - non_const_row_map_type rowPtr, - entries_type colIndex) : - geoData_(*geoData), numDimensions_(numDimensions), - numGhostedNodes_(numGhostedNodes), - dofsPerNode_(dofsPerNode), numInterpolationPoints_(numInterpolationPoints), - coarseRate_(coarseRate), endRate_(endRate), lFineNodesPerDir_(lFineNodesPerDir), - ghostedNodesPerDir_(ghostedNodesPerDir), rowPtr_(rowPtr), colIndex_(colIndex) { - - } // computeGraphDataLinearFunctor() - - template - KOKKOS_INLINE_FUNCTION - void AggregationStructuredAlgorithm_kokkos:: - computeGraphDataLinearFunctor::operator() (const LO nodeIdx) const { - LO nodeFineTuple[3] = {0, 0, 0}; - LO nodeCoarseTuple[3] = {0, 0, 0}; - - // Compute coarse ID associated with fine LID - geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); - - LO coarseNodeCoarseLID; - bool allCoarse = false; - for(int dim = 0; dim < 3; ++dim) { - nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim); - } - if(rowPtr_(nodeIdx + 1) == rowPtr_(nodeIdx) + 1) {allCoarse = true;} - - geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], - coarseNodeCoarseLID); - - if(allCoarse) { - // Fine node lies on Coarse node, easy case, we only need the LID of the coarse node. - for(LO dof = 0; dof < dofsPerNode_; ++dof) { - colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)) = coarseNodeCoarseLID*dofsPerNode_ + dof; - } - } else { - - for(int dim = 0; dim < numDimensions_; ++dim) { - if(nodeCoarseTuple[dim] == ghostedNodesPerDir_(dim) - 1) { --nodeCoarseTuple[dim]; } - } - // Compute Coarse Node LID - // Note lbv 10-06-2018: it is likely benefitial to remove the two if statments and somehow - // find out the number of dimensions before calling the opertor() of the functor. - for(LO dof = 0; dof < dofsPerNode_; ++dof) { - geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+0)); - geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0]+1, nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+1)); - if(numDimensions_ > 1) { - geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0], nodeCoarseTuple[1]+1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+2)); - geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0]+1, nodeCoarseTuple[1]+1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+3)); - if(numDimensions_ > 2) { - geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+4)); - geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0]+1, nodeCoarseTuple[1], nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+5)); - geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1]+1, nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+6)); - geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0]+1, nodeCoarseTuple[1]+1, nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+7)); - } + // Compute Coarse Node LID + // Note lbv 10-06-2018: it is likely benefitial to remove the two if statments and somehow + // find out the number of dimensions before calling the opertor() of the functor. + for (LO dof = 0; dof < dofsPerNode_; ++dof) { + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 0)); + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0] + 1, nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 1)); + if (numDimensions_ > 1) { + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1] + 1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 2)); + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0] + 1, nodeCoarseTuple[1] + 1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 3)); + if (numDimensions_ > 2) { + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2] + 1, colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 4)); + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0] + 1, nodeCoarseTuple[1], nodeCoarseTuple[2] + 1, colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 5)); + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1] + 1, nodeCoarseTuple[2] + 1, colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 6)); + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0] + 1, nodeCoarseTuple[1] + 1, nodeCoarseTuple[2] + 1, colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 7)); } } } - } // computeGraphDataLinearFunctor::operator() - -} // end namespace + } +} // computeGraphDataLinearFunctor::operator() +} // namespace MueLu #endif /* MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_decl.hpp index 28d26813f5d6..04d360374190 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_decl.hpp @@ -75,178 +75,174 @@ namespace MueLu { and local lexicographic mesh orderings are supported. */ - template - class IndexManager : public BaseClass { +template +class IndexManager : public BaseClass { #undef MUELU_INDEXMANAGER_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - private: + private: + protected: + const RCP > comm_; ///< Communicator used by uncoupled aggregation + const bool coupled_; ///< Flag for coupled vs uncoupled aggregation mode, if true aggregation is coupled. + const bool singleCoarsePoint_; ///< Flag telling us if can reduce dimensions to a single layer. + const int numDimensions; ///< Number of spacial dimensions in the problem + const int interpolationOrder_; ///< Interpolation order used by grid transfer operators using these aggregates. - protected: + Array coarseRate; ///< coarsening rate in each direction + Array endRate; ///< adapted coarsening rate at the edge of the mesh in each direction. - const RCP > comm_; ///< Communicator used by uncoupled aggregation - const bool coupled_; ///< Flag for coupled vs uncoupled aggregation mode, if true aggregation is coupled. - const bool singleCoarsePoint_; ///< Flag telling us if can reduce dimensions to a single layer. - const int numDimensions; ///< Number of spacial dimensions in the problem - const int interpolationOrder_; ///< Interpolation order used by grid transfer operators using these aggregates. + GO gNumFineNodes; ///< global number of nodes. + GO gNumFineNodes10; ///< global number of nodes per 0-1 slice. + const Array gFineNodesPerDir; ///< global number of nodes per direction. - Array coarseRate; ///< coarsening rate in each direction - Array endRate; ///< adapted coarsening rate at the edge of the mesh in each direction. + LO lNumFineNodes; ///< local number of nodes. + LO lNumFineNodes10; ///< local number of nodes per 0-1 slice. + const Array lFineNodesPerDir; ///< local number of nodes per direction. - GO gNumFineNodes; ///< global number of nodes. - GO gNumFineNodes10; ///< global number of nodes per 0-1 slice. - const Array gFineNodesPerDir; ///< global number of nodes per direction. + GO gNumCoarseNodes; ///< global number of nodes remaining after coarsening. + GO gNumCoarseNodes10; ///< global number of nodes per 0-1 slice remaining after coarsening. + Array gCoarseNodesPerDir; ///< global number of nodes per direction remaining after coarsening. - LO lNumFineNodes; ///< local number of nodes. - LO lNumFineNodes10; ///< local number of nodes per 0-1 slice. - const Array lFineNodesPerDir; ///< local number of nodes per direction. + LO lNumCoarseNodes; ///< local number of nodes remaining after coarsening. + LO lNumCoarseNodes10; ///< local number of nodes per 0-1 slice remaining after coarsening. + Array lCoarseNodesPerDir; ///< local number of nodes per direction remaing after coarsening. - GO gNumCoarseNodes; ///< global number of nodes remaining after coarsening. - GO gNumCoarseNodes10; ///< global number of nodes per 0-1 slice remaining after coarsening. - Array gCoarseNodesPerDir; ///< global number of nodes per direction remaining after coarsening. + LO numGhostNodes; ///< local number of ghost nodes + LO numGhostedNodes; ///< local number of ghosted nodes (i.e. ghost + coarse nodes). + LO numGhostedNodes10; ///< local number of ghosted nodes (i.e. ghost + coarse nodes) per 0-1 slice. + Array ghostedNodesPerDir; ///< local number of ghosted nodes (i.e. ghost + coarse nodes) per direction - LO lNumCoarseNodes; ///< local number of nodes remaining after coarsening. - LO lNumCoarseNodes10; ///< local number of nodes per 0-1 slice remaining after coarsening. - Array lCoarseNodesPerDir; ///< local number of nodes per direction remaing after coarsening. + GO minGlobalIndex; ///< lowest GID of any node in the local process + Array offsets; ///< distance between lowest (resp. highest) index to the lowest (resp. highest) ghostedNodeIndex in that direction. + Array coarseNodeOffsets; ///< distance between lowest (resp. highest) index to the lowest (resp. highest) coarseNodeIndex in that direction. + Array startIndices; ///< lowest global tuple (i,j,k) of a node on the local process + Array startGhostedCoarseNode; ///< lowest coarse global tuple (i,j,k) of a node remaing on the local process after coarsening. - LO numGhostNodes; ///< local number of ghost nodes - LO numGhostedNodes; ///< local number of ghosted nodes (i.e. ghost + coarse nodes). - LO numGhostedNodes10; ///< local number of ghosted nodes (i.e. ghost + coarse nodes) per 0-1 slice. - Array ghostedNodesPerDir; ///< local number of ghosted nodes (i.e. ghost + coarse nodes) per direction + bool meshEdge[6] = {false}; ///< flags indicating if we run into the edge of the mesh in ilo, ihi, jlo, jhi, klo or khi. + bool ghostInterface[6] = {false}; ///< flags indicating if ghost points are needed at ilo, ihi, jlo, jhi, klo and khi boundaries. + bool ghostedDir[6] = {false}; ///< flags indicating if ghost points are needed at ilo, ihi, jlo, jhi, klo and khi boundaries. - GO minGlobalIndex; ///< lowest GID of any node in the local process - Array offsets; ///< distance between lowest (resp. highest) index to the lowest (resp. highest) ghostedNodeIndex in that direction. - Array coarseNodeOffsets; ///< distance between lowest (resp. highest) index to the lowest (resp. highest) coarseNodeIndex in that direction. - Array startIndices; ///< lowest global tuple (i,j,k) of a node on the local process - Array startGhostedCoarseNode; ///< lowest coarse global tuple (i,j,k) of a node remaing on the local process after coarsening. + public: + IndexManager() = default; - bool meshEdge[6] = {false}; ///< flags indicating if we run into the edge of the mesh in ilo, ihi, jlo, jhi, klo or khi. - bool ghostInterface[6] = {false}; ///< flags indicating if ghost points are needed at ilo, ihi, jlo, jhi, klo and khi boundaries. - bool ghostedDir[6] = {false}; ///< flags indicating if ghost points are needed at ilo, ihi, jlo, jhi, klo and khi boundaries. + IndexManager(const RCP > comm, const bool coupled, + const bool singleCoarsePoint, const int NumDimensions, + const int interpolationOrder, const Array GFineNodesPerDir, + const Array LFineNodesPerDir); - public: + virtual ~IndexManager() {} - IndexManager() = default; + //! Sets basic parameters used to compute indices on the mesh. + //! This method requires you to have set this->coarseRate and this->startIndices. + void computeMeshParameters(); - IndexManager(const RCP > comm, const bool coupled, - const bool singleCoarsePoint, const int NumDimensions, - const int interpolationOrder, const Array GFineNodesPerDir, - const Array LFineNodesPerDir); + virtual void computeGlobalCoarseParameters() = 0; - virtual ~IndexManager() {} + virtual void getGhostedNodesData(const RCP fineMap, + Array& ghostedNodeCoarseLIDs, + Array& ghostedNodeCoarsePIDs, + Array& ghostedNodeCoarseGIDs) const = 0; - //! Sets basic parameters used to compute indices on the mesh. - //! This method requires you to have set this->coarseRate and this->startIndices. - void computeMeshParameters(); + virtual void getCoarseNodesData(const RCP fineCoordinatesMap, + Array& coarseNodeCoarseGIDs, + Array& coarseNodeFineGIDs) const = 0; - virtual void computeGlobalCoarseParameters() = 0; + bool isAggregationCoupled() const { return coupled_; } - virtual void getGhostedNodesData(const RCP fineMap, - Array& ghostedNodeCoarseLIDs, - Array& ghostedNodeCoarsePIDs, - Array& ghostedNodeCoarseGIDs) const = 0; + bool isSingleCoarsePoint() const { return singleCoarsePoint_; } - virtual void getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const = 0; + int getNumDimensions() const { return numDimensions; } - bool isAggregationCoupled() const {return coupled_;} + int getInterpolationOrder() const { return interpolationOrder_; } - bool isSingleCoarsePoint() const {return singleCoarsePoint_;} + GO getNumGlobalFineNodes() const { return gNumFineNodes; } - int getNumDimensions() const {return numDimensions;} + GO getNumGlobalCoarseNodes() const { return gNumCoarseNodes; } - int getInterpolationOrder() const {return interpolationOrder_;} + LO getNumLocalFineNodes() const { return lNumFineNodes; } - GO getNumGlobalFineNodes() const {return gNumFineNodes;} + LO getNumLocalCoarseNodes() const { return lNumCoarseNodes; } - GO getNumGlobalCoarseNodes() const {return gNumCoarseNodes;} + LO getNumLocalGhostedNodes() const { return numGhostedNodes; } - LO getNumLocalFineNodes() const {return lNumFineNodes;} + Array getCoarseningRates() const { return coarseRate; } - LO getNumLocalCoarseNodes() const {return lNumCoarseNodes;} + int getCoarseningRate(const int dim) const { return coarseRate[dim]; } - LO getNumLocalGhostedNodes() const {return numGhostedNodes;} + Array getCoarseningEndRates() const { return endRate; } - Array getCoarseningRates() const {return coarseRate;} + int getCoarseningEndRate(const int dim) const { return endRate[dim]; } - int getCoarseningRate(const int dim) const {return coarseRate[dim];} + bool getMeshEdge(const int dir) const { return meshEdge[dir]; } - Array getCoarseningEndRates() const {return endRate;} + bool getGhostInterface(const int dir) const { return ghostInterface[dir]; } - int getCoarseningEndRate(const int dim) const {return endRate[dim];} + Array getOffsets() const { return offsets; } - bool getMeshEdge(const int dir) const {return meshEdge[dir];} + LO getOffset(int const dim) const { return offsets[dim]; } - bool getGhostInterface(const int dir) const {return ghostInterface[dir];} + Array getCoarseNodeOffsets() const { return coarseNodeOffsets; } - Array getOffsets() const {return offsets;} + LO getCoarseNodeOffset(int const dim) const { return coarseNodeOffsets[dim]; } - LO getOffset(int const dim) const {return offsets[dim];} + Array getStartIndices() const { return startIndices; } - Array getCoarseNodeOffsets() const {return coarseNodeOffsets;} + GO getStartIndex(int const dim) const { return startIndices[dim]; } - LO getCoarseNodeOffset(int const dim) const {return coarseNodeOffsets[dim];} + Array getStartGhostedCoarseNodes() const { return startGhostedCoarseNode; } - Array getStartIndices() const {return startIndices;} + GO getStartGhostedCoarseNode(int const dim) const { return startGhostedCoarseNode[dim]; } - GO getStartIndex(int const dim) const {return startIndices[dim];} + Array getLocalFineNodesPerDir() const { return lFineNodesPerDir; } - Array getStartGhostedCoarseNodes() const {return startGhostedCoarseNode;} + LO getLocalFineNodesInDir(const int dim) const { return lFineNodesPerDir[dim]; } - GO getStartGhostedCoarseNode(int const dim) const {return startGhostedCoarseNode[dim];} + Array getGlobalFineNodesPerDir() const { return gFineNodesPerDir; } - Array getLocalFineNodesPerDir() const {return lFineNodesPerDir;} + GO getGlobalFineNodesInDir(const int dim) const { return gFineNodesPerDir[dim]; } - LO getLocalFineNodesInDir(const int dim) const {return lFineNodesPerDir[dim];} + Array getLocalCoarseNodesPerDir() const { return lCoarseNodesPerDir; } - Array getGlobalFineNodesPerDir() const {return gFineNodesPerDir;} + LO getLocalCoarseNodesInDir(const int dim) const { return lCoarseNodesPerDir[dim]; } - GO getGlobalFineNodesInDir(const int dim) const {return gFineNodesPerDir[dim];} + Array getGlobalCoarseNodesPerDir() const { return gCoarseNodesPerDir; } - Array getLocalCoarseNodesPerDir() const {return lCoarseNodesPerDir;} + GO getGlobalCoarseNodesInDir(const int dim) const { return gCoarseNodesPerDir[dim]; } - LO getLocalCoarseNodesInDir(const int dim) const {return lCoarseNodesPerDir[dim];} + Array getGhostedNodesPerDir() const { return ghostedNodesPerDir; } - Array getGlobalCoarseNodesPerDir() const {return gCoarseNodesPerDir;} + LO getGhostedNodesInDir(const int dim) const { return ghostedNodesPerDir[dim]; } - GO getGlobalCoarseNodesInDir(const int dim) const {return gCoarseNodesPerDir[dim];} + virtual std::vector > getCoarseMeshData() const = 0; - Array getGhostedNodesPerDir() const {return ghostedNodesPerDir;} + virtual void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const = 0; - LO getGhostedNodesInDir(const int dim) const {return ghostedNodesPerDir[dim];} + virtual void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const = 0; - virtual std::vector > getCoarseMeshData() const = 0; + virtual void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const = 0; - virtual void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const = 0; + virtual void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const = 0; - virtual void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const = 0; + virtual void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; - virtual void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const = 0; + virtual void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const = 0; - virtual void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const = 0; + virtual void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const = 0; - virtual void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; + virtual void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const = 0; - virtual void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const = 0; + virtual void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; - virtual void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const = 0; + virtual void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; - virtual void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const = 0; + virtual void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; - virtual void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; + virtual void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; - virtual void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; + virtual void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; +}; - virtual void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; - - virtual void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; - - virtual void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; - - }; - -} //namespace MueLu +} // namespace MueLu #define MUELU_INDEXMANAGER_SHORT -#endif // MUELU_INDEXMANAGER_DECL_HPP +#endif // MUELU_INDEXMANAGER_DECL_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_def.hpp index 894e26393906..50df1f9f9dd5 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_def.hpp @@ -57,220 +57,233 @@ namespace MueLu { - template - IndexManager:: - IndexManager(const RCP > comm, - const bool coupled, - const bool singleCoarsePoint, - const int NumDimensions, - const int interpolationOrder, - const Array GFineNodesPerDir, - const Array LFineNodesPerDir) : - comm_(comm), coupled_(coupled), singleCoarsePoint_(singleCoarsePoint), - numDimensions(NumDimensions), interpolationOrder_(interpolationOrder), - gFineNodesPerDir(GFineNodesPerDir), lFineNodesPerDir(LFineNodesPerDir) { +template +IndexManager:: + IndexManager(const RCP > comm, + const bool coupled, + const bool singleCoarsePoint, + const int NumDimensions, + const int interpolationOrder, + const Array GFineNodesPerDir, + const Array LFineNodesPerDir) + : comm_(comm) + , coupled_(coupled) + , singleCoarsePoint_(singleCoarsePoint) + , numDimensions(NumDimensions) + , interpolationOrder_(interpolationOrder) + , gFineNodesPerDir(GFineNodesPerDir) + , lFineNodesPerDir(LFineNodesPerDir) { + coarseRate.resize(3); + endRate.resize(3); + gCoarseNodesPerDir.resize(3); + lCoarseNodesPerDir.resize(3); + ghostedNodesPerDir.resize(3); - coarseRate.resize(3); - endRate.resize(3); - gCoarseNodesPerDir.resize(3); - lCoarseNodesPerDir.resize(3); - ghostedNodesPerDir.resize(3); + offsets.resize(3); + coarseNodeOffsets.resize(3); + startIndices.resize(6); + startGhostedCoarseNode.resize(3); - offsets.resize(3); - coarseNodeOffsets.resize(3); - startIndices.resize(6); - startGhostedCoarseNode.resize(3); +} // Constructor - } // Constructor +template +void IndexManager:: + computeMeshParameters() { + RCP out; + if (const char* dbg = std::getenv("MUELU_INDEXMANAGER_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } - template - void IndexManager:: - computeMeshParameters() { + if (coupled_) { + gNumFineNodes10 = gFineNodesPerDir[1] * gFineNodesPerDir[0]; + gNumFineNodes = gFineNodesPerDir[2] * gNumFineNodes10; + } else { + gNumFineNodes10 = Teuchos::OrdinalTraits::invalid(); + gNumFineNodes = Teuchos::OrdinalTraits::invalid(); + } + lNumFineNodes10 = lFineNodesPerDir[1] * lFineNodesPerDir[0]; + lNumFineNodes = lFineNodesPerDir[2] * lNumFineNodes10; + for (int dim = 0; dim < 3; ++dim) { + if (dim < numDimensions) { + if (coupled_) { + if (startIndices[dim] == 0) { + meshEdge[2 * dim] = true; + } + if (startIndices[dim + 3] + 1 == gFineNodesPerDir[dim]) { + meshEdge[2 * dim + 1] = true; + endRate[dim] = startIndices[dim + 3] % coarseRate[dim]; + } + } else { // With uncoupled problem each rank might require a different endRate + meshEdge[2 * dim] = true; + meshEdge[2 * dim + 1] = true; + endRate[dim] = (lFineNodesPerDir[dim] - 1) % coarseRate[dim]; + } + if (endRate[dim] == 0) { + endRate[dim] = coarseRate[dim]; + } - RCP out; - if(const char* dbg = std::getenv("MUELU_INDEXMANAGER_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } + // If uncoupled aggregation is used, offsets[dim] = 0, so nothing to do. + if (coupled_) { + offsets[dim] = Teuchos::as(startIndices[dim]) % coarseRate[dim]; + if (offsets[dim] == 0) { + coarseNodeOffsets[dim] = 0; + } else if (startIndices[dim] + endRate[dim] == lFineNodesPerDir[dim]) { + coarseNodeOffsets[dim] = endRate[dim] - offsets[dim]; + } else { + coarseNodeOffsets[dim] = coarseRate[dim] - offsets[dim]; + } - if(coupled_) { - gNumFineNodes10 = gFineNodesPerDir[1]*gFineNodesPerDir[0]; - gNumFineNodes = gFineNodesPerDir[2]*gNumFineNodes10; - } else { - gNumFineNodes10 = Teuchos::OrdinalTraits::invalid(); - gNumFineNodes = Teuchos::OrdinalTraits::invalid(); - } - lNumFineNodes10 = lFineNodesPerDir[1]*lFineNodesPerDir[0]; - lNumFineNodes = lFineNodesPerDir[2]*lNumFineNodes10; - for(int dim = 0; dim < 3; ++dim) { - if(dim < numDimensions) { - if(coupled_) { - if(startIndices[dim] == 0) { - meshEdge[2*dim] = true; + if (interpolationOrder_ == 0) { + int rem = startIndices[dim] % coarseRate[dim]; + if ((rem != 0) && (rem <= Teuchos::as(coarseRate[dim]) / 2.0)) { + ghostInterface[2 * dim] = true; } - if(startIndices[dim + 3] + 1 == gFineNodesPerDir[dim]) { - meshEdge[2*dim + 1] = true; - endRate[dim] = startIndices[dim + 3] % coarseRate[dim]; + rem = startIndices[dim + 3] % coarseRate[dim]; + // uncoupled by nature does not require ghosts nodes + if (coupled_ && (startIndices[dim + 3] != gFineNodesPerDir[dim] - 1) && + (rem > Teuchos::as(coarseRate[dim]) / 2.0)) { + ghostInterface[2 * dim + 1] = true; } - } else { // With uncoupled problem each rank might require a different endRate - meshEdge[2*dim] = true; - meshEdge[2*dim + 1] = true; - endRate[dim] = (lFineNodesPerDir[dim] - 1) % coarseRate[dim]; - } - if(endRate[dim] == 0) {endRate[dim] = coarseRate[dim];} - // If uncoupled aggregation is used, offsets[dim] = 0, so nothing to do. - if(coupled_) { - offsets[dim] = Teuchos::as(startIndices[dim]) % coarseRate[dim]; - if(offsets[dim] == 0) { - coarseNodeOffsets[dim] = 0; - } else if(startIndices[dim] + endRate[dim] == lFineNodesPerDir[dim]) { - coarseNodeOffsets[dim] = endRate[dim] - offsets[dim]; - } else { - coarseNodeOffsets[dim] = coarseRate[dim] - offsets[dim]; + } else if (interpolationOrder_ == 1) { + if (coupled_ && (startIndices[dim] % coarseRate[dim] != 0 || + startIndices[dim] == gFineNodesPerDir[dim] - 1)) { + ghostInterface[2 * dim] = true; } - - if(interpolationOrder_ == 0) { - int rem = startIndices[dim] % coarseRate[dim]; - if( (rem != 0) && (rem <= Teuchos::as(coarseRate[dim]) / 2.0)) { - ghostInterface[2*dim] = true; - } - rem = startIndices[dim + 3] % coarseRate[dim]; - // uncoupled by nature does not require ghosts nodes - if(coupled_ && (startIndices[dim + 3] != gFineNodesPerDir[dim] - 1) && - (rem > Teuchos::as(coarseRate[dim]) / 2.0)) { - ghostInterface[2*dim + 1] = true; - } - - } else if(interpolationOrder_ == 1) { - if(coupled_ && (startIndices[dim] % coarseRate[dim] != 0 || - startIndices[dim] == gFineNodesPerDir[dim]-1)) { - ghostInterface[2*dim] = true; - } - if(coupled_ && (startIndices[dim + 3] != gFineNodesPerDir[dim] - 1) && - ((lFineNodesPerDir[dim] == 1) || (startIndices[dim + 3] % coarseRate[dim] != 0))) { - ghostInterface[2*dim+1] = true; - } + if (coupled_ && (startIndices[dim + 3] != gFineNodesPerDir[dim] - 1) && + ((lFineNodesPerDir[dim] == 1) || (startIndices[dim + 3] % coarseRate[dim] != 0))) { + ghostInterface[2 * dim + 1] = true; } } - } else { // Default value for dim >= numDimensions - endRate[dim] = 1; } + } else { // Default value for dim >= numDimensions + endRate[dim] = 1; } + } - *out << "singleCoarsePoint? " << singleCoarsePoint_ << std::endl; - *out << "gFineNodesPerDir: " << gFineNodesPerDir << std::endl; - *out << "lFineNodesPerDir: " << lFineNodesPerDir << std::endl; - *out << "endRate: " << endRate << std::endl; - *out << "ghostInterface: {" << ghostInterface[0] << ", " << ghostInterface[1] << ", " - << ghostInterface[2] << ", " << ghostInterface[3] << ", " << ghostInterface[4] << ", " - << ghostInterface[5] << "}" << std::endl; - *out << "meshEdge: {" << meshEdge[0] << ", " << meshEdge[1] << ", " - << meshEdge[2] << ", " << meshEdge[3] << ", " << meshEdge[4] << ", " - << meshEdge[5] << "}" << std::endl; - *out << "startIndices: " << startIndices << std::endl; - *out << "offsets: " << offsets << std::endl; - *out << "coarseNodeOffsets: " << coarseNodeOffsets << std::endl; + *out << "singleCoarsePoint? " << singleCoarsePoint_ << std::endl; + *out << "gFineNodesPerDir: " << gFineNodesPerDir << std::endl; + *out << "lFineNodesPerDir: " << lFineNodesPerDir << std::endl; + *out << "endRate: " << endRate << std::endl; + *out << "ghostInterface: {" << ghostInterface[0] << ", " << ghostInterface[1] << ", " + << ghostInterface[2] << ", " << ghostInterface[3] << ", " << ghostInterface[4] << ", " + << ghostInterface[5] << "}" << std::endl; + *out << "meshEdge: {" << meshEdge[0] << ", " << meshEdge[1] << ", " + << meshEdge[2] << ", " << meshEdge[3] << ", " << meshEdge[4] << ", " + << meshEdge[5] << "}" << std::endl; + *out << "startIndices: " << startIndices << std::endl; + *out << "offsets: " << offsets << std::endl; + *out << "coarseNodeOffsets: " << coarseNodeOffsets << std::endl; - // Here one element can represent either the degenerate case of one node or the more general - // case of two nodes, i.e. x---x is a 1D element with two nodes and x is a 1D element with - // one node. This helps generating a 3D space from tensorial products... - // A good way to handle this would be to generalize the algorithm to take into account the - // discretization order used in each direction, at least in the FEM sense, since a 0 degree - // discretization will have a unique node per element. This way 1D discretization can be - // viewed as a 3D problem with one 0 degree element in the y direction and one 0 degre - // element in the z direction. - // !!! Operations below are aftecting both local and global values that have two !!! - // different orientations. Orientations can be interchanged using mapDirG2L and mapDirL2G. - // coarseRate, endRate and offsets are in the global basis, as well as all the variables - // starting with a g. - // !!! while the variables starting with an l are in the local basis. !!! - for(int dim = 0; dim < 3; ++dim) { - if(dim < numDimensions) { - // Check whether the partition includes the "end" of the mesh which means that endRate - // will apply. Also make sure that endRate is not 0 which means that the mesh does not - // require a particular treatment at the boundaries. - if( meshEdge[2*dim + 1] ) { - lCoarseNodesPerDir[dim] = (lFineNodesPerDir[dim] - endRate[dim] + offsets[dim] - 1) - / coarseRate[dim] + 1; - if(offsets[dim] == 0) {++lCoarseNodesPerDir[dim];} - // We might want to coarsening the direction - // into a single layer if there are not enough - // points left to form two aggregates - if(singleCoarsePoint_ && lFineNodesPerDir[dim] - 1 < coarseRate[dim]) { - lCoarseNodesPerDir[dim] =1; - } - } else { - lCoarseNodesPerDir[dim] = (lFineNodesPerDir[dim] + offsets[dim] - 1) / coarseRate[dim]; - if(offsets[dim] == 0) {++lCoarseNodesPerDir[dim];} + // Here one element can represent either the degenerate case of one node or the more general + // case of two nodes, i.e. x---x is a 1D element with two nodes and x is a 1D element with + // one node. This helps generating a 3D space from tensorial products... + // A good way to handle this would be to generalize the algorithm to take into account the + // discretization order used in each direction, at least in the FEM sense, since a 0 degree + // discretization will have a unique node per element. This way 1D discretization can be + // viewed as a 3D problem with one 0 degree element in the y direction and one 0 degre + // element in the z direction. + // !!! Operations below are aftecting both local and global values that have two !!! + // different orientations. Orientations can be interchanged using mapDirG2L and mapDirL2G. + // coarseRate, endRate and offsets are in the global basis, as well as all the variables + // starting with a g. + // !!! while the variables starting with an l are in the local basis. !!! + for (int dim = 0; dim < 3; ++dim) { + if (dim < numDimensions) { + // Check whether the partition includes the "end" of the mesh which means that endRate + // will apply. Also make sure that endRate is not 0 which means that the mesh does not + // require a particular treatment at the boundaries. + if (meshEdge[2 * dim + 1]) { + lCoarseNodesPerDir[dim] = (lFineNodesPerDir[dim] - endRate[dim] + offsets[dim] - 1) / coarseRate[dim] + 1; + if (offsets[dim] == 0) { + ++lCoarseNodesPerDir[dim]; } - - // The first branch of this if-statement will be used if the rank contains only one layer - // of nodes in direction i, that layer must also coincide with the boundary of the mesh - // and coarseRate[i] == endRate[i]... - if(interpolationOrder_ == 0) { - startGhostedCoarseNode[dim] = startIndices[dim] / coarseRate[dim]; - int rem = startIndices[dim] % coarseRate[dim]; - if(rem > (Teuchos::as(coarseRate[dim]) / 2.0) ) { - ++startGhostedCoarseNode[dim]; - } - } else { - if((startIndices[dim] == gFineNodesPerDir[dim] - 1) && - (startIndices[dim] % coarseRate[dim] == 0)) { - startGhostedCoarseNode[dim] = startIndices[dim] / coarseRate[dim] - 1; - } else { - startGhostedCoarseNode[dim] = startIndices[dim] / coarseRate[dim]; - } + // We might want to coarsening the direction + // into a single layer if there are not enough + // points left to form two aggregates + if (singleCoarsePoint_ && lFineNodesPerDir[dim] - 1 < coarseRate[dim]) { + lCoarseNodesPerDir[dim] = 1; } + } else { + lCoarseNodesPerDir[dim] = (lFineNodesPerDir[dim] + offsets[dim] - 1) / coarseRate[dim]; + if (offsets[dim] == 0) { + ++lCoarseNodesPerDir[dim]; + } + } - // This array is passed to the RAPFactory and eventually becomes gFineNodePerDir on the next - // level. - gCoarseNodesPerDir[dim] = (gFineNodesPerDir[dim] - 1) / coarseRate[dim]; - if((gFineNodesPerDir[dim] - 1) % coarseRate[dim] == 0) { - ++gCoarseNodesPerDir[dim]; + // The first branch of this if-statement will be used if the rank contains only one layer + // of nodes in direction i, that layer must also coincide with the boundary of the mesh + // and coarseRate[i] == endRate[i]... + if (interpolationOrder_ == 0) { + startGhostedCoarseNode[dim] = startIndices[dim] / coarseRate[dim]; + int rem = startIndices[dim] % coarseRate[dim]; + if (rem > (Teuchos::as(coarseRate[dim]) / 2.0)) { + ++startGhostedCoarseNode[dim]; + } + } else { + if ((startIndices[dim] == gFineNodesPerDir[dim] - 1) && + (startIndices[dim] % coarseRate[dim] == 0)) { + startGhostedCoarseNode[dim] = startIndices[dim] / coarseRate[dim] - 1; } else { - gCoarseNodesPerDir[dim] += 2; + startGhostedCoarseNode[dim] = startIndices[dim] / coarseRate[dim]; } - } else { // Default value for dim >= numDimensions - // endRate[dim] = 1; - gCoarseNodesPerDir[dim] = 1; - lCoarseNodesPerDir[dim] = 1; - } // if (dim < numDimensions) - - // This would happen if the rank does not own any nodes but in that case a subcommunicator - // should be used so this should really not be a concern. - if(lFineNodesPerDir[dim] < 1) {lCoarseNodesPerDir[dim] = 0;} - ghostedNodesPerDir[dim] = lCoarseNodesPerDir[dim]; - // Check whether face *low needs ghost nodes - if(ghostInterface[2*dim]) {ghostedNodesPerDir[dim] += 1;} - // Check whether face *hi needs ghost nodes - if(ghostInterface[2*dim + 1]) {ghostedNodesPerDir[dim] += 1;} - } // Loop for dim=0:3 + } - // With uncoupled aggregation we need to communicate to compute the global number of coarse points - if(!coupled_) { - for(int dim = 0; dim < 3; ++dim) { - gCoarseNodesPerDir[dim] = -1; + // This array is passed to the RAPFactory and eventually becomes gFineNodePerDir on the next + // level. + gCoarseNodesPerDir[dim] = (gFineNodesPerDir[dim] - 1) / coarseRate[dim]; + if ((gFineNodesPerDir[dim] - 1) % coarseRate[dim] == 0) { + ++gCoarseNodesPerDir[dim]; + } else { + gCoarseNodesPerDir[dim] += 2; } - } + } else { // Default value for dim >= numDimensions + // endRate[dim] = 1; + gCoarseNodesPerDir[dim] = 1; + lCoarseNodesPerDir[dim] = 1; + } // if (dim < numDimensions) - // Compute cummulative values - lNumCoarseNodes10 = lCoarseNodesPerDir[0]*lCoarseNodesPerDir[1]; - lNumCoarseNodes = lNumCoarseNodes10*lCoarseNodesPerDir[2]; - numGhostedNodes10 = ghostedNodesPerDir[1]*ghostedNodesPerDir[0]; - numGhostedNodes = numGhostedNodes10*ghostedNodesPerDir[2]; - numGhostNodes = numGhostedNodes - lNumCoarseNodes; + // This would happen if the rank does not own any nodes but in that case a subcommunicator + // should be used so this should really not be a concern. + if (lFineNodesPerDir[dim] < 1) { + lCoarseNodesPerDir[dim] = 0; + } + ghostedNodesPerDir[dim] = lCoarseNodesPerDir[dim]; + // Check whether face *low needs ghost nodes + if (ghostInterface[2 * dim]) { + ghostedNodesPerDir[dim] += 1; + } + // Check whether face *hi needs ghost nodes + if (ghostInterface[2 * dim + 1]) { + ghostedNodesPerDir[dim] += 1; + } + } // Loop for dim=0:3 - *out << "lCoarseNodesPerDir: " << lCoarseNodesPerDir << std::endl; - *out << "gCoarseNodesPerDir: " << gCoarseNodesPerDir << std::endl; - *out << "ghostedNodesPerDir: " << ghostedNodesPerDir << std::endl; - *out << "lNumCoarseNodes=" << lNumCoarseNodes << std::endl; - *out << "numGhostedNodes=" << numGhostedNodes << std::endl; + // With uncoupled aggregation we need to communicate to compute the global number of coarse points + if (!coupled_) { + for (int dim = 0; dim < 3; ++dim) { + gCoarseNodesPerDir[dim] = -1; + } } -} //namespace MueLu + // Compute cummulative values + lNumCoarseNodes10 = lCoarseNodesPerDir[0] * lCoarseNodesPerDir[1]; + lNumCoarseNodes = lNumCoarseNodes10 * lCoarseNodesPerDir[2]; + numGhostedNodes10 = ghostedNodesPerDir[1] * ghostedNodesPerDir[0]; + numGhostedNodes = numGhostedNodes10 * ghostedNodesPerDir[2]; + numGhostNodes = numGhostedNodes - lNumCoarseNodes; + + *out << "lCoarseNodesPerDir: " << lCoarseNodesPerDir << std::endl; + *out << "gCoarseNodesPerDir: " << gCoarseNodesPerDir << std::endl; + *out << "ghostedNodesPerDir: " << ghostedNodesPerDir << std::endl; + *out << "lNumCoarseNodes=" << lNumCoarseNodes << std::endl; + *out << "numGhostedNodes=" << numGhostedNodes << std::endl; +} + +} // namespace MueLu #define MUELU_INDEXMANAGER_SHORT -#endif // MUELU_INDEXMANAGER_DEF_HPP +#endif // MUELU_INDEXMANAGER_DEF_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_decl.hpp index 113368ad6f50..06e72c2eb66d 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_decl.hpp @@ -53,7 +53,6 @@ #include "Teuchos_OrdinalTraits.hpp" - #include "MueLu_BaseClass.hpp" #include "MueLu_IndexManager_kokkos_fwd.hpp" @@ -74,112 +73,109 @@ namespace MueLu { spaces and it also provides utilites for coarsening. */ - template - class IndexManager_kokkos : public BaseClass { +template +class IndexManager_kokkos : public BaseClass { #undef MUELU_INDEXMANAGER_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using execution_space = typename Node::execution_space; - using memory_space = typename Node::memory_space; - using device_type = Kokkos::Device; - using intTupleView = typename Kokkos::View; - using LOTupleView = typename Kokkos::View; - - private: - - const int meshLayout = UNCOUPLED; - int myRank = -1; - int numDimensions; ///< Number of spacial dimensions in the problem - int interpolationOrder_; ///< Interpolation order used by grid transfer operators using these aggregates. - intTupleView coarseRate; ///< coarsening rate in each direction - intTupleView endRate; ///< adapted coarsening rate at the edge of the mesh in each direction. - - LO lNumFineNodes; ///< local number of nodes. - LO lNumFineNodes10; ///< local number of nodes per 0-1 slice. - LOTupleView lFineNodesPerDir; ///< local number of nodes per direction. - - LO numCoarseNodes; ///< local number of nodes remaining after coarsening. - LO numCoarseNodes10; ///< local number of nodes per 0-1 slice remaining after coarsening. - LOTupleView coarseNodesPerDir; ///< local number of nodes per direction remaing after coarsening. - - public: - - //! Default constructor, return empty object - IndexManager_kokkos() = default; - - //! Constructs for uncoupled meshes - IndexManager_kokkos(const int NumDimensions, - const int interpolationOrder, - const int MyRank, - const ArrayView LFineNodesPerDir, - const ArrayView CoarseRate); - - virtual ~IndexManager_kokkos() {} - - //! Common setup pattern used for all the different types of undelying mesh - void setupIM(const int NumDimensions, - const int interpolationOrder, - const ArrayView coarseRate, - const ArrayView LFineNodesPerDir); - - //! Sets basic parameters used to compute indices on the mesh. - //! This method requires you to have set this->coarseRate. - void computeMeshParameters(); - - int getNumDimensions() const {return numDimensions;} - - int getInterpolationOrder() const {return interpolationOrder_;} - - LO getNumLocalFineNodes() const {return lNumFineNodes;} - - LO getNumCoarseNodes() const {return numCoarseNodes;} - - KOKKOS_INLINE_FUNCTION - intTupleView getCoarseningRates() const {return coarseRate;} - - KOKKOS_INLINE_FUNCTION - intTupleView getCoarseningEndRates() const {return endRate;} - - KOKKOS_INLINE_FUNCTION - LOTupleView getLocalFineNodesPerDir() const {return lFineNodesPerDir;} - - KOKKOS_INLINE_FUNCTION - LOTupleView getCoarseNodesPerDir() const {return coarseNodesPerDir;} - - Array getCoarseNodesPerDirArray() const; - - KOKKOS_INLINE_FUNCTION - void getFineLID2FineTuple(const LO myLID, LO (&tuple)[3]) const { - LO tmp; - tuple[2] = myLID / (lFineNodesPerDir(1)*lFineNodesPerDir(0)); - tmp = myLID % (lFineNodesPerDir(1)*lFineNodesPerDir(0)); - tuple[1] = tmp / lFineNodesPerDir(0); - tuple[0] = tmp % lFineNodesPerDir(0); - } // getFineNodeLocalTuple - - KOKKOS_INLINE_FUNCTION - void getFineTuple2FineLID(const LO tuple[3], LO& myLID) const { - myLID = tuple[2]*lNumFineNodes10 + tuple[1]*lFineNodesPerDir[0] + tuple[0]; - } // getFineNodeLID - - KOKKOS_INLINE_FUNCTION - void getCoarseLID2CoarseTuple(const LO myLID, LO (&tuple)[3]) const { - LO tmp; - tuple[2] = myLID / numCoarseNodes10; - tmp = myLID % numCoarseNodes10; - tuple[1] = tmp / coarseNodesPerDir[0]; - tuple[0] = tmp % coarseNodesPerDir[0]; - } // getCoarseNodeLocalTuple - - KOKKOS_INLINE_FUNCTION - void getCoarseTuple2CoarseLID(const LO i, const LO j, const LO k, LO& myLID) const { - myLID = k*numCoarseNodes10 + j*coarseNodesPerDir[0] + i; - } // getCoarseNodeLID - - }; - -} //namespace MueLu + public: + using execution_space = typename Node::execution_space; + using memory_space = typename Node::memory_space; + using device_type = Kokkos::Device; + using intTupleView = typename Kokkos::View; + using LOTupleView = typename Kokkos::View; + + private: + const int meshLayout = UNCOUPLED; + int myRank = -1; + int numDimensions; ///< Number of spacial dimensions in the problem + int interpolationOrder_; ///< Interpolation order used by grid transfer operators using these aggregates. + intTupleView coarseRate; ///< coarsening rate in each direction + intTupleView endRate; ///< adapted coarsening rate at the edge of the mesh in each direction. + + LO lNumFineNodes; ///< local number of nodes. + LO lNumFineNodes10; ///< local number of nodes per 0-1 slice. + LOTupleView lFineNodesPerDir; ///< local number of nodes per direction. + + LO numCoarseNodes; ///< local number of nodes remaining after coarsening. + LO numCoarseNodes10; ///< local number of nodes per 0-1 slice remaining after coarsening. + LOTupleView coarseNodesPerDir; ///< local number of nodes per direction remaing after coarsening. + + public: + //! Default constructor, return empty object + IndexManager_kokkos() = default; + + //! Constructs for uncoupled meshes + IndexManager_kokkos(const int NumDimensions, + const int interpolationOrder, + const int MyRank, + const ArrayView LFineNodesPerDir, + const ArrayView CoarseRate); + + virtual ~IndexManager_kokkos() {} + + //! Common setup pattern used for all the different types of undelying mesh + void setupIM(const int NumDimensions, + const int interpolationOrder, + const ArrayView coarseRate, + const ArrayView LFineNodesPerDir); + + //! Sets basic parameters used to compute indices on the mesh. + //! This method requires you to have set this->coarseRate. + void computeMeshParameters(); + + int getNumDimensions() const { return numDimensions; } + + int getInterpolationOrder() const { return interpolationOrder_; } + + LO getNumLocalFineNodes() const { return lNumFineNodes; } + + LO getNumCoarseNodes() const { return numCoarseNodes; } + + KOKKOS_INLINE_FUNCTION + intTupleView getCoarseningRates() const { return coarseRate; } + + KOKKOS_INLINE_FUNCTION + intTupleView getCoarseningEndRates() const { return endRate; } + + KOKKOS_INLINE_FUNCTION + LOTupleView getLocalFineNodesPerDir() const { return lFineNodesPerDir; } + + KOKKOS_INLINE_FUNCTION + LOTupleView getCoarseNodesPerDir() const { return coarseNodesPerDir; } + + Array getCoarseNodesPerDirArray() const; + + KOKKOS_INLINE_FUNCTION + void getFineLID2FineTuple(const LO myLID, LO (&tuple)[3]) const { + LO tmp; + tuple[2] = myLID / (lFineNodesPerDir(1) * lFineNodesPerDir(0)); + tmp = myLID % (lFineNodesPerDir(1) * lFineNodesPerDir(0)); + tuple[1] = tmp / lFineNodesPerDir(0); + tuple[0] = tmp % lFineNodesPerDir(0); + } // getFineNodeLocalTuple + + KOKKOS_INLINE_FUNCTION + void getFineTuple2FineLID(const LO tuple[3], LO& myLID) const { + myLID = tuple[2] * lNumFineNodes10 + tuple[1] * lFineNodesPerDir[0] + tuple[0]; + } // getFineNodeLID + + KOKKOS_INLINE_FUNCTION + void getCoarseLID2CoarseTuple(const LO myLID, LO (&tuple)[3]) const { + LO tmp; + tuple[2] = myLID / numCoarseNodes10; + tmp = myLID % numCoarseNodes10; + tuple[1] = tmp / coarseNodesPerDir[0]; + tuple[0] = tmp % coarseNodesPerDir[0]; + } // getCoarseNodeLocalTuple + + KOKKOS_INLINE_FUNCTION + void getCoarseTuple2CoarseLID(const LO i, const LO j, const LO k, LO& myLID) const { + myLID = k * numCoarseNodes10 + j * coarseNodesPerDir[0] + i; + } // getCoarseNodeLID +}; + +} // namespace MueLu #define MUELU_INDEXMANAGER_KOKKOS_SHORT -#endif // MUELU_INDEXMANAGER_KOKKOS_DECL_HPP +#endif // MUELU_INDEXMANAGER_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_def.hpp index f31f94421d86..502f7c3667eb 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_def.hpp @@ -63,171 +63,172 @@ namespace MueLu { - template - IndexManager_kokkos:: - IndexManager_kokkos(const int NumDimensions, - const int interpolationOrder, - const int MyRank, - const ArrayView LFineNodesPerDir, - const ArrayView CoarseRate) : - myRank(MyRank), coarseRate("coarsening rate"), endRate("endRate"), - lFineNodesPerDir("lFineNodesPerDir"), coarseNodesPerDir("lFineNodesPerDir") { - - RCP out; - if(const char* dbg = std::getenv("MUELU_INDEXMANAGER_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - setupIM(NumDimensions, interpolationOrder, CoarseRate, LFineNodesPerDir); +template +IndexManager_kokkos:: + IndexManager_kokkos(const int NumDimensions, + const int interpolationOrder, + const int MyRank, + const ArrayView LFineNodesPerDir, + const ArrayView CoarseRate) + : myRank(MyRank) + , coarseRate("coarsening rate") + , endRate("endRate") + , lFineNodesPerDir("lFineNodesPerDir") + , coarseNodesPerDir("lFineNodesPerDir") { + RCP out; + if (const char* dbg = std::getenv("MUELU_INDEXMANAGER_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } - *out << "Done setting up the IndexManager" << std::endl; + setupIM(NumDimensions, interpolationOrder, CoarseRate, LFineNodesPerDir); - computeMeshParameters(); + *out << "Done setting up the IndexManager" << std::endl; - *out << "Computed Mesh Parameters" << std::endl; + computeMeshParameters(); - } // IndexManager_kokkos Constructor + *out << "Computed Mesh Parameters" << std::endl; - template - void IndexManager_kokkos:: - setupIM(const int NumDimensions, const int interpolationOrder, - const ArrayView CoarseRate, const ArrayView LFineNodesPerDir) { +} // IndexManager_kokkos Constructor - numDimensions = NumDimensions; - interpolationOrder_ = interpolationOrder; +template +void IndexManager_kokkos:: + setupIM(const int NumDimensions, const int interpolationOrder, + const ArrayView CoarseRate, const ArrayView LFineNodesPerDir) { + numDimensions = NumDimensions; + interpolationOrder_ = interpolationOrder; - TEUCHOS_TEST_FOR_EXCEPTION((LFineNodesPerDir.size() != 3) - && (LFineNodesPerDir.size() != numDimensions), - Exceptions::RuntimeError, - "LFineNodesPerDir has to be of size 3 or of size numDimensions!"); + TEUCHOS_TEST_FOR_EXCEPTION((LFineNodesPerDir.size() != 3) && (LFineNodesPerDir.size() != numDimensions), + Exceptions::RuntimeError, + "LFineNodesPerDir has to be of size 3 or of size numDimensions!"); - typename Kokkos::View::HostMirror lFineNodesPerDir_h = Kokkos::create_mirror_view(lFineNodesPerDir); - Kokkos::deep_copy(lFineNodesPerDir_h, lFineNodesPerDir); - typename Kokkos::View::HostMirror coarseRate_h = Kokkos::create_mirror_view(coarseRate); - Kokkos::deep_copy(coarseRate_h, coarseRate); + typename Kokkos::View::HostMirror lFineNodesPerDir_h = Kokkos::create_mirror_view(lFineNodesPerDir); + Kokkos::deep_copy(lFineNodesPerDir_h, lFineNodesPerDir); + typename Kokkos::View::HostMirror coarseRate_h = Kokkos::create_mirror_view(coarseRate); + Kokkos::deep_copy(coarseRate_h, coarseRate); - // Load coarse rate, being careful about formating - // Also load lFineNodesPerDir - for(int dim = 0; dim < 3; ++dim) { - if(dim < getNumDimensions()) { - lFineNodesPerDir_h(dim) = LFineNodesPerDir[dim]; - if(CoarseRate.size() == 1) { - coarseRate_h(dim) = CoarseRate[0]; - } else if(CoarseRate.size() == getNumDimensions()) { - coarseRate_h(dim) = CoarseRate[dim]; - } - } else { - lFineNodesPerDir_h(dim) = 1; - coarseRate_h(dim) = 1; + // Load coarse rate, being careful about formating + // Also load lFineNodesPerDir + for (int dim = 0; dim < 3; ++dim) { + if (dim < getNumDimensions()) { + lFineNodesPerDir_h(dim) = LFineNodesPerDir[dim]; + if (CoarseRate.size() == 1) { + coarseRate_h(dim) = CoarseRate[0]; + } else if (CoarseRate.size() == getNumDimensions()) { + coarseRate_h(dim) = CoarseRate[dim]; } - } - - Kokkos::deep_copy(lFineNodesPerDir, lFineNodesPerDir_h); - Kokkos::deep_copy(coarseRate, coarseRate_h); - - } // setupIM - - template - void IndexManager_kokkos::computeMeshParameters() { - - RCP out; - if(const char* dbg = std::getenv("MUELU_INDEXMANAGER_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + lFineNodesPerDir_h(dim) = 1; + coarseRate_h(dim) = 1; } + } - typename Kokkos::View::HostMirror coarseRate_h = Kokkos::create_mirror_view(coarseRate); - typename Kokkos::View::HostMirror endRate_h = Kokkos::create_mirror_view(endRate); - + Kokkos::deep_copy(lFineNodesPerDir, lFineNodesPerDir_h); + Kokkos::deep_copy(coarseRate, coarseRate_h); - typename Kokkos::View::HostMirror lFineNodesPerDir_h = Kokkos::create_mirror_view(lFineNodesPerDir); - typename Kokkos::View::HostMirror coarseNodesPerDir_h = Kokkos::create_mirror_view(coarseNodesPerDir); - Kokkos::deep_copy(lFineNodesPerDir_h, lFineNodesPerDir); - Kokkos::deep_copy(coarseRate_h, coarseRate); +} // setupIM - lNumFineNodes10 = lFineNodesPerDir_h(1)*lFineNodesPerDir_h(0); - lNumFineNodes = lFineNodesPerDir_h(2)*lNumFineNodes10; - for(int dim = 0; dim < 3; ++dim) { - if(dim < numDimensions) { - endRate_h(dim) = (lFineNodesPerDir_h(dim) - 1) % coarseRate_h(dim); - if(endRate_h(dim) == 0) {endRate_h(dim) = coarseRate_h(dim);} +template +void IndexManager_kokkos::computeMeshParameters() { + RCP out; + if (const char* dbg = std::getenv("MUELU_INDEXMANAGER_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } - } else { // Default value for dim >= numDimensions - endRate_h(dim) = 1; + typename Kokkos::View::HostMirror coarseRate_h = Kokkos::create_mirror_view(coarseRate); + typename Kokkos::View::HostMirror endRate_h = Kokkos::create_mirror_view(endRate); + + typename Kokkos::View::HostMirror lFineNodesPerDir_h = Kokkos::create_mirror_view(lFineNodesPerDir); + typename Kokkos::View::HostMirror coarseNodesPerDir_h = Kokkos::create_mirror_view(coarseNodesPerDir); + Kokkos::deep_copy(lFineNodesPerDir_h, lFineNodesPerDir); + Kokkos::deep_copy(coarseRate_h, coarseRate); + + lNumFineNodes10 = lFineNodesPerDir_h(1) * lFineNodesPerDir_h(0); + lNumFineNodes = lFineNodesPerDir_h(2) * lNumFineNodes10; + for (int dim = 0; dim < 3; ++dim) { + if (dim < numDimensions) { + endRate_h(dim) = (lFineNodesPerDir_h(dim) - 1) % coarseRate_h(dim); + if (endRate_h(dim) == 0) { + endRate_h(dim) = coarseRate_h(dim); } - } - *out << "lFineNodesPerDir: {" << lFineNodesPerDir_h(0) << ", " << lFineNodesPerDir_h(1) << ", " - << lFineNodesPerDir_h(2) << "}" << std::endl; - *out << "endRate: {" << endRate_h(0) << ", " << endRate_h(1) << ", " - << endRate_h(2) << "}" << std::endl; - - // Here one element can represent either the degenerate case of one node or the more general - // case of two nodes, i.e. x---x is a 1D element with two nodes and x is a 1D element with - // one node. This helps generating a 3D space from tensorial products... - // A good way to handle this would be to generalize the algorithm to take into account the - // discretization order used in each direction, at least in the FEM sense, since a 0 degree - // discretization will have a unique node per element. This way 1D discretization can be - // viewed as a 3D problem with one 0 degree element in the y direction and one 0 degre - // element in the z direction. - // !!! Operations below are aftecting both local and global values that have two !!! - // different orientations. Orientations can be interchanged using mapDirG2L and mapDirL2G. - // coarseRate, endRate and offsets are in the global basis, as well as all the variables - // starting with a g. - // !!! while the variables starting with an l are in the local basis. !!! - for(int dim = 0; dim < 3; ++dim) { - if(dim < numDimensions) { - // Check whether the partition includes the "end" of the mesh which means that endRate - // will apply. Also make sure that endRate is not 0 which means that the mesh does not - // require a particular treatment at the boundaries. - coarseNodesPerDir_h(dim) = (lFineNodesPerDir_h(dim) - endRate_h(dim) - 1) - / coarseRate_h(dim) + 2; - - } else { // Default value for dim >= numDimensions - // endRate[dim] = 1; - coarseNodesPerDir_h(dim) = 1; - } // if (dim < numDimensions) - - // This would happen if the rank does not own any nodes but in that case a subcommunicator - // should be used so this should really not be a concern. - if(lFineNodesPerDir_h(dim) < 1) {coarseNodesPerDir_h(dim) = 0;} - } // Loop for dim=0:3 - - // Compute cummulative values - numCoarseNodes10 = coarseNodesPerDir_h(0)*coarseNodesPerDir_h(1); - numCoarseNodes = numCoarseNodes10*coarseNodesPerDir_h(2); - - *out << "coarseNodesPerDir: {" << coarseNodesPerDir_h(0) << ", " - << coarseNodesPerDir_h(1) << ", " << coarseNodesPerDir_h(2) << "}" << std::endl; - *out << "numCoarseNodes=" << numCoarseNodes << std::endl; - - // Copy Host data to Device. - Kokkos::deep_copy(coarseRate, coarseRate_h); - Kokkos::deep_copy(endRate, endRate_h); - Kokkos::deep_copy(lFineNodesPerDir, lFineNodesPerDir_h); - Kokkos::deep_copy(coarseNodesPerDir, coarseNodesPerDir_h); + } else { // Default value for dim >= numDimensions + endRate_h(dim) = 1; + } } - template - Array IndexManager_kokkos:: - getCoarseNodesPerDirArray() const { - typename LOTupleView::HostMirror coarseNodesPerDir_h = Kokkos::create_mirror_view(coarseNodesPerDir); - Kokkos::deep_copy(coarseNodesPerDir_h, coarseNodesPerDir); - Array coarseNodesPerDirArray(3); - - for(int dim = 0; dim < 3; ++dim) { - coarseNodesPerDirArray[dim] = coarseNodesPerDir_h(dim); + *out << "lFineNodesPerDir: {" << lFineNodesPerDir_h(0) << ", " << lFineNodesPerDir_h(1) << ", " + << lFineNodesPerDir_h(2) << "}" << std::endl; + *out << "endRate: {" << endRate_h(0) << ", " << endRate_h(1) << ", " + << endRate_h(2) << "}" << std::endl; + + // Here one element can represent either the degenerate case of one node or the more general + // case of two nodes, i.e. x---x is a 1D element with two nodes and x is a 1D element with + // one node. This helps generating a 3D space from tensorial products... + // A good way to handle this would be to generalize the algorithm to take into account the + // discretization order used in each direction, at least in the FEM sense, since a 0 degree + // discretization will have a unique node per element. This way 1D discretization can be + // viewed as a 3D problem with one 0 degree element in the y direction and one 0 degre + // element in the z direction. + // !!! Operations below are aftecting both local and global values that have two !!! + // different orientations. Orientations can be interchanged using mapDirG2L and mapDirL2G. + // coarseRate, endRate and offsets are in the global basis, as well as all the variables + // starting with a g. + // !!! while the variables starting with an l are in the local basis. !!! + for (int dim = 0; dim < 3; ++dim) { + if (dim < numDimensions) { + // Check whether the partition includes the "end" of the mesh which means that endRate + // will apply. Also make sure that endRate is not 0 which means that the mesh does not + // require a particular treatment at the boundaries. + coarseNodesPerDir_h(dim) = (lFineNodesPerDir_h(dim) - endRate_h(dim) - 1) / coarseRate_h(dim) + 2; + + } else { // Default value for dim >= numDimensions + // endRate[dim] = 1; + coarseNodesPerDir_h(dim) = 1; + } // if (dim < numDimensions) + + // This would happen if the rank does not own any nodes but in that case a subcommunicator + // should be used so this should really not be a concern. + if (lFineNodesPerDir_h(dim) < 1) { + coarseNodesPerDir_h(dim) = 0; } + } // Loop for dim=0:3 + + // Compute cummulative values + numCoarseNodes10 = coarseNodesPerDir_h(0) * coarseNodesPerDir_h(1); + numCoarseNodes = numCoarseNodes10 * coarseNodesPerDir_h(2); + + *out << "coarseNodesPerDir: {" << coarseNodesPerDir_h(0) << ", " + << coarseNodesPerDir_h(1) << ", " << coarseNodesPerDir_h(2) << "}" << std::endl; + *out << "numCoarseNodes=" << numCoarseNodes << std::endl; + + // Copy Host data to Device. + Kokkos::deep_copy(coarseRate, coarseRate_h); + Kokkos::deep_copy(endRate, endRate_h); + Kokkos::deep_copy(lFineNodesPerDir, lFineNodesPerDir_h); + Kokkos::deep_copy(coarseNodesPerDir, coarseNodesPerDir_h); +} + +template +Array IndexManager_kokkos:: + getCoarseNodesPerDirArray() const { + typename LOTupleView::HostMirror coarseNodesPerDir_h = Kokkos::create_mirror_view(coarseNodesPerDir); + Kokkos::deep_copy(coarseNodesPerDir_h, coarseNodesPerDir); + Array coarseNodesPerDirArray(3); + + for (int dim = 0; dim < 3; ++dim) { + coarseNodesPerDirArray[dim] = coarseNodesPerDir_h(dim); + } - return coarseNodesPerDirArray; - } // getCoarseNodesData + return coarseNodesPerDirArray; +} // getCoarseNodesData -} //namespace MueLu +} // namespace MueLu #define MUELU_INDEXMANAGER_KOKKOS_SHORT -#endif // MUELU_INDEXMANAGER_DEF_KOKKOS_HPP +#endif // MUELU_INDEXMANAGER_DEF_KOKKOS_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_decl.hpp index c43758ee8352..dadd50c145c7 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_decl.hpp @@ -46,7 +46,6 @@ #ifndef MUELU_STRUCTUREDAGGREGATIONFACTORY_DECL_HPP #define MUELU_STRUCTUREDAGGREGATIONFACTORY_DECL_HPP - // #include // #include // #include @@ -102,60 +101,59 @@ namespace MueLu { | Aggregates | StructuredAggregationFactory | Container class with aggregation information. See also Aggregates. */ - template - class StructuredAggregationFactory : public SingleLevelFactoryBase { +template +class StructuredAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_STRUCTUREDAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - StructuredAggregationFactory(); + public: + //! @name Constructors/Destructors. + //@{ - //! Destructor. - virtual ~StructuredAggregationFactory() { } + //! Constructor. + StructuredAggregationFactory(); - RCP GetValidParameterList() const; + //! Destructor. + virtual ~StructuredAggregationFactory() {} - //@} + RCP GetValidParameterList() const; - //! @name Set/get methods. - //@{ - // set information about 1-node aggregates (map name and generating factory) - void SetOnePtMapName(const std::string name, Teuchos::RCP mapFact) { - SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate - SetFactory("OnePt aggregate map factory",mapFact); - } + //@} - //@} + //! @name Set/get methods. + //@{ + // set information about 1-node aggregates (map name and generating factory) + void SetOnePtMapName(const std::string name, Teuchos::RCP mapFact) { + SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate + SetFactory("OnePt aggregate map factory", mapFact); + } - //! Input - //@{ + //@} - void DeclareInput(Level& currentLevel) const; + //! Input + //@{ - //@} + void DeclareInput(Level& currentLevel) const; - //! @name Build methods. - //@{ + //@} - /*! @brief Build aggregates. */ - void Build(Level& currentLevel) const; + //! @name Build methods. + //@{ - //@} + /*! @brief Build aggregates. */ + void Build(Level& currentLevel) const; - private: + //@} - //! boolean flag: definition phase - //! if true, the aggregation algorithms still can be set and changed. - //! if false, no change in aggregation algorithms is possible any more - mutable bool bDefinitionPhase_; + private: + //! boolean flag: definition phase + //! if true, the aggregation algorithms still can be set and changed. + //! if false, no change in aggregation algorithms is possible any more + mutable bool bDefinitionPhase_; - }; // class StructuredAggregationFactory +}; // class StructuredAggregationFactory -} +} // namespace MueLu #define MUELU_STRUCTUREDAGGREGATIONFACTORY_SHORT #endif /* MUELU_STRUCTUREDAGGREGATIONFACTORY_DECL_HPP */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_def.hpp index 94685aefcef6..23e537a770ae 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_def.hpp @@ -63,173 +63,172 @@ namespace MueLu { - template - StructuredAggregationFactory:: - StructuredAggregationFactory() : bDefinitionPhase_(true) - { } +template +StructuredAggregationFactory:: + StructuredAggregationFactory() + : bDefinitionPhase_(true) {} - template - RCP StructuredAggregationFactory:: - GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP StructuredAggregationFactory:: + GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); - SET_VALID_ENTRY("aggregation: allow user-specified singletons"); - SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); - SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); - - // general variables needed in StructuredAggregationFactory - SET_VALID_ENTRY("aggregation: mesh layout"); - SET_VALID_ENTRY("aggregation: mode"); - SET_VALID_ENTRY("aggregation: output type"); - SET_VALID_ENTRY("aggregation: coarsening rate"); - SET_VALID_ENTRY("aggregation: coarsening order"); -#undef SET_VALID_ENTRY - validParamList->set >("Graph", Teuchos::null, - "Graph of the matrix after amalgamation but without dropping."); - validParamList->set >("numDimensions", Teuchos::null, - "Number of spatial dimension provided by CoordinatesTransferFactory."); - validParamList->set >("gNodesPerDim", Teuchos::null, - "Global number of nodes per spatial dimension provided by CoordinatesTransferFactory."); - validParamList->set >("lNodesPerDim", Teuchos::null, - "Local number of nodes per spatial dimension provided by CoordinatesTransferFactory."); - validParamList->set >("DofsPerNode", Teuchos::null, - "Generating factory for variable \'DofsPerNode\', usually the same as the \'Graph\' factory"); - validParamList->set("aggregation: single coarse point", false, - "Allows the aggreagtion process to reduce spacial dimensions to a single layer"); - - return validParamList; - } // GetValidParameterList() - - template - void StructuredAggregationFactory:: - DeclareInput(Level& currentLevel) const { - Input(currentLevel, "Graph"); - Input(currentLevel, "DofsPerNode"); - - ParameterList pL = GetParameterList(); - std::string coupling = pL.get("aggregation: mode"); - const bool coupled = (coupling == "coupled" ? true : false); - if(coupled) { - // Request the global number of nodes per dimensions - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("gNodesPerDim", NoFactory::get())) { - currentLevel.DeclareInput("gNodesPerDim", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("gNodesPerDim", NoFactory::get()), - Exceptions::RuntimeError, - "gNodesPerDim was not provided by the user on level0!"); - } + SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); + SET_VALID_ENTRY("aggregation: allow user-specified singletons"); + SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); + SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); + + // general variables needed in StructuredAggregationFactory + SET_VALID_ENTRY("aggregation: mesh layout"); + SET_VALID_ENTRY("aggregation: mode"); + SET_VALID_ENTRY("aggregation: output type"); + SET_VALID_ENTRY("aggregation: coarsening rate"); + SET_VALID_ENTRY("aggregation: coarsening order"); +#undef SET_VALID_ENTRY + validParamList->set >("Graph", Teuchos::null, + "Graph of the matrix after amalgamation but without dropping."); + validParamList->set >("numDimensions", Teuchos::null, + "Number of spatial dimension provided by CoordinatesTransferFactory."); + validParamList->set >("gNodesPerDim", Teuchos::null, + "Global number of nodes per spatial dimension provided by CoordinatesTransferFactory."); + validParamList->set >("lNodesPerDim", Teuchos::null, + "Local number of nodes per spatial dimension provided by CoordinatesTransferFactory."); + validParamList->set >("DofsPerNode", Teuchos::null, + "Generating factory for variable \'DofsPerNode\', usually the same as the \'Graph\' factory"); + validParamList->set("aggregation: single coarse point", false, + "Allows the aggreagtion process to reduce spacial dimensions to a single layer"); + + return validParamList; +} // GetValidParameterList() + +template +void StructuredAggregationFactory:: + DeclareInput(Level& currentLevel) const { + Input(currentLevel, "Graph"); + Input(currentLevel, "DofsPerNode"); + + ParameterList pL = GetParameterList(); + std::string coupling = pL.get("aggregation: mode"); + const bool coupled = (coupling == "coupled" ? true : false); + if (coupled) { + // Request the global number of nodes per dimensions + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("gNodesPerDim", NoFactory::get())) { + currentLevel.DeclareInput("gNodesPerDim", NoFactory::get(), this); } else { - Input(currentLevel, "gNodesPerDim"); - } - } - - // Request the local number of nodes per dimensions - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("numDimensions", NoFactory::get())) { - currentLevel.DeclareInput("numDimensions", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("numDimensions", NoFactory::get()), - Exceptions::RuntimeError, - "numDimensions was not provided by the user on level0!"); - } - if(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { - currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get()), + TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("gNodesPerDim", NoFactory::get()), Exceptions::RuntimeError, - "lNodesPerDim was not provided by the user on level0!"); + "gNodesPerDim was not provided by the user on level0!"); } } else { - Input(currentLevel, "numDimensions"); - Input(currentLevel, "lNodesPerDim"); + Input(currentLevel, "gNodesPerDim"); } - } // DeclareInput() - - template - void StructuredAggregationFactory:: - Build(Level ¤tLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); + } - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); + // Request the local number of nodes per dimensions + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("numDimensions", NoFactory::get())) { + currentLevel.DeclareInput("numDimensions", NoFactory::get(), this); } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("numDimensions", NoFactory::get()), + Exceptions::RuntimeError, + "numDimensions was not provided by the user on level0!"); } - - *out << "Entering structured aggregation" << std::endl; - - ParameterList pL = GetParameterList(); - bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed - - // General problem informations are gathered from data stored in the problem matix. - RCP graph = Get< RCP >(currentLevel, "Graph"); - RCP fineMap = graph->GetDomainMap(); - const int myRank = fineMap->getComm()->getRank(); - const int numRanks = fineMap->getComm()->getSize(); - const GO minGlobalIndex = fineMap->getMinGlobalIndex(); - const LO dofsPerNode = Get(currentLevel, "DofsPerNode"); - - // Since we want to operate on nodes and not dof, we need to modify the rowMap in order to - // obtain a nodeMap. - const int interpolationOrder = pL.get("aggregation: coarsening order"); - std::string meshLayout = pL.get("aggregation: mesh layout"); - std::string coupling = pL.get("aggregation: mode"); - const bool coupled = (coupling == "coupled" ? true : false); - std::string outputType = pL.get("aggregation: output type"); - const bool outputAggregates = (outputType == "Aggregates" ? true : false); - const bool singleCoarsePoint = pL.get("aggregation: single coarse point"); - int numDimensions; - Array gFineNodesPerDir(3); - Array lFineNodesPerDir(3); - if(currentLevel.GetLevelID() == 0) { - // On level 0, data is provided by applications and has no associated factory. - numDimensions = currentLevel.Get("numDimensions", NoFactory::get()); - lFineNodesPerDir = currentLevel.Get >("lNodesPerDim", NoFactory::get()); - if(coupled) { - gFineNodesPerDir = currentLevel.Get >("gNodesPerDim", NoFactory::get()); - } + if (currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { + currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); } else { - // On level > 0, data is provided directly by generating factories. - numDimensions = Get(currentLevel, "numDimensions"); - lFineNodesPerDir = Get >(currentLevel, "lNodesPerDim"); - if(coupled) { - gFineNodesPerDir = Get >(currentLevel, "gNodesPerDim"); - } + TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get()), + Exceptions::RuntimeError, + "lNodesPerDim was not provided by the user on level0!"); } - - - // First make sure that input parameters are set logically based on dimension - for(int dim = 0; dim < 3; ++dim) { - if(dim >= numDimensions) { - gFineNodesPerDir[dim] = 1; - lFineNodesPerDir[dim] = 1; - } + } else { + Input(currentLevel, "numDimensions"); + Input(currentLevel, "lNodesPerDim"); + } +} // DeclareInput() + +template +void StructuredAggregationFactory:: + Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + RCP out; + if (const char* dbg = std::getenv("MUELU_STRUCTUREDAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + *out << "Entering structured aggregation" << std::endl; + + ParameterList pL = GetParameterList(); + bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed + + // General problem informations are gathered from data stored in the problem matix. + RCP graph = Get >(currentLevel, "Graph"); + RCP fineMap = graph->GetDomainMap(); + const int myRank = fineMap->getComm()->getRank(); + const int numRanks = fineMap->getComm()->getSize(); + const GO minGlobalIndex = fineMap->getMinGlobalIndex(); + const LO dofsPerNode = Get(currentLevel, "DofsPerNode"); + + // Since we want to operate on nodes and not dof, we need to modify the rowMap in order to + // obtain a nodeMap. + const int interpolationOrder = pL.get("aggregation: coarsening order"); + std::string meshLayout = pL.get("aggregation: mesh layout"); + std::string coupling = pL.get("aggregation: mode"); + const bool coupled = (coupling == "coupled" ? true : false); + std::string outputType = pL.get("aggregation: output type"); + const bool outputAggregates = (outputType == "Aggregates" ? true : false); + const bool singleCoarsePoint = pL.get("aggregation: single coarse point"); + int numDimensions; + Array gFineNodesPerDir(3); + Array lFineNodesPerDir(3); + if (currentLevel.GetLevelID() == 0) { + // On level 0, data is provided by applications and has no associated factory. + numDimensions = currentLevel.Get("numDimensions", NoFactory::get()); + lFineNodesPerDir = currentLevel.Get >("lNodesPerDim", NoFactory::get()); + if (coupled) { + gFineNodesPerDir = currentLevel.Get >("gNodesPerDim", NoFactory::get()); } - - // Get the coarsening rate - std::string coarseningRate = pL.get("aggregation: coarsening rate"); - Teuchos::Array coarseRate; - try { - coarseRate = Teuchos::fromStringToArray(coarseningRate); - } catch(const Teuchos::InvalidArrayStringRepresentation& e) { - GetOStream(Errors,-1) << " *** \"aggregation: coarsening rate\" must be a string convertible into an array! *** " - << std::endl; - throw e; + } else { + // On level > 0, data is provided directly by generating factories. + numDimensions = Get(currentLevel, "numDimensions"); + lFineNodesPerDir = Get >(currentLevel, "lNodesPerDim"); + if (coupled) { + gFineNodesPerDir = Get >(currentLevel, "gNodesPerDim"); } - TEUCHOS_TEST_FOR_EXCEPTION((coarseRate.size() > 1) && (coarseRate.size() < numDimensions), - Exceptions::RuntimeError, - "\"aggregation: coarsening rate\" must have at least as many" - " components as the number of spatial dimensions in the problem."); + } - // Now that we have extracted info from the level, create the IndexManager - RCP geoData; - if(!coupled) { - geoData = rcp(new MueLu::UncoupledIndexManager(fineMap->getComm(), + // First make sure that input parameters are set logically based on dimension + for (int dim = 0; dim < 3; ++dim) { + if (dim >= numDimensions) { + gFineNodesPerDir[dim] = 1; + lFineNodesPerDir[dim] = 1; + } + } + + // Get the coarsening rate + std::string coarseningRate = pL.get("aggregation: coarsening rate"); + Teuchos::Array coarseRate; + try { + coarseRate = Teuchos::fromStringToArray(coarseningRate); + } catch (const Teuchos::InvalidArrayStringRepresentation& e) { + GetOStream(Errors, -1) << " *** \"aggregation: coarsening rate\" must be a string convertible into an array! *** " + << std::endl; + throw e; + } + TEUCHOS_TEST_FOR_EXCEPTION((coarseRate.size() > 1) && (coarseRate.size() < numDimensions), + Exceptions::RuntimeError, + "\"aggregation: coarsening rate\" must have at least as many" + " components as the number of spatial dimensions in the problem."); + + // Now that we have extracted info from the level, create the IndexManager + RCP geoData; + if (!coupled) { + geoData = rcp(new MueLu::UncoupledIndexManager(fineMap->getComm(), coupled, numDimensions, interpolationOrder, @@ -239,23 +238,23 @@ namespace MueLu { lFineNodesPerDir, coarseRate, singleCoarsePoint)); - } else if(meshLayout == "Local Lexicographic") { - Array meshData; - if(currentLevel.GetLevelID() == 0) { - // On level 0, data is provided by applications and has no associated factory. - meshData = currentLevel.Get >("aggregation: mesh data", NoFactory::get()); - TEUCHOS_TEST_FOR_EXCEPTION(meshData.empty() == true, Exceptions::RuntimeError, - "The meshData array is empty, somehow the input for structured" - " aggregation are not captured correctly."); - } else { - // On level > 0, data is provided directly by generating factories. - meshData = Get >(currentLevel, "aggregation: mesh data"); - } - // Note, LBV Feb 5th 2018: - // I think that it might make sense to pass ghostInterface rather than interpolationOrder. - // For that I need to make sure that ghostInterface can be computed with minimal mesh - // knowledge outside of the IndexManager... - geoData = rcp(new MueLu::LocalLexicographicIndexManager(fineMap->getComm(), + } else if (meshLayout == "Local Lexicographic") { + Array meshData; + if (currentLevel.GetLevelID() == 0) { + // On level 0, data is provided by applications and has no associated factory. + meshData = currentLevel.Get >("aggregation: mesh data", NoFactory::get()); + TEUCHOS_TEST_FOR_EXCEPTION(meshData.empty() == true, Exceptions::RuntimeError, + "The meshData array is empty, somehow the input for structured" + " aggregation are not captured correctly."); + } else { + // On level > 0, data is provided directly by generating factories. + meshData = Get >(currentLevel, "aggregation: mesh data"); + } + // Note, LBV Feb 5th 2018: + // I think that it might make sense to pass ghostInterface rather than interpolationOrder. + // For that I need to make sure that ghostInterface can be computed with minimal mesh + // knowledge outside of the IndexManager... + geoData = rcp(new MueLu::LocalLexicographicIndexManager(fineMap->getComm(), coupled, numDimensions, interpolationOrder, @@ -265,12 +264,12 @@ namespace MueLu { lFineNodesPerDir, coarseRate, meshData)); - } else if(meshLayout == "Global Lexicographic") { - // Note, LBV Feb 5th 2018: - // I think that it might make sense to pass ghostInterface rather than interpolationOrder. - // For that I need to make sure that ghostInterface can be computed with minimal mesh - // knowledge outside of the IndexManager... - geoData = rcp(new MueLu::GlobalLexicographicIndexManager(fineMap->getComm(), + } else if (meshLayout == "Global Lexicographic") { + // Note, LBV Feb 5th 2018: + // I think that it might make sense to pass ghostInterface rather than interpolationOrder. + // For that I need to make sure that ghostInterface can be computed with minimal mesh + // knowledge outside of the IndexManager... + geoData = rcp(new MueLu::GlobalLexicographicIndexManager(fineMap->getComm(), coupled, numDimensions, interpolationOrder, @@ -278,75 +277,71 @@ namespace MueLu { lFineNodesPerDir, coarseRate, minGlobalIndex)); - } - - - *out << "The index manager has now been built" << std::endl; - *out << "graph num nodes: " << fineMap->getLocalNumElements() - << ", structured aggregation num nodes: " << geoData->getNumLocalFineNodes() << std::endl; - TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getLocalNumElements() - != static_cast(geoData->getNumLocalFineNodes()), + } + + *out << "The index manager has now been built" << std::endl; + *out << "graph num nodes: " << fineMap->getLocalNumElements() + << ", structured aggregation num nodes: " << geoData->getNumLocalFineNodes() << std::endl; + TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getLocalNumElements() != static_cast(geoData->getNumLocalFineNodes()), + Exceptions::RuntimeError, + "The local number of elements in the graph's map is not equal to " + "the number of nodes given by: lNodesPerDim!"); + if (coupled) { + TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getGlobalNumElements() != static_cast(geoData->getNumGlobalFineNodes()), Exceptions::RuntimeError, - "The local number of elements in the graph's map is not equal to " - "the number of nodes given by: lNodesPerDim!"); - if(coupled) { - TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getGlobalNumElements() - != static_cast(geoData->getNumGlobalFineNodes()), - Exceptions::RuntimeError, - "The global number of elements in the graph's map is not equal to " - "the number of nodes given by: gNodesPerDim!"); - } - - *out << "Compute coarse mesh data" << std::endl; - std::vector > coarseMeshData = geoData->getCoarseMeshData(); - - // Now we are ready for the big loop over the fine node that will assign each - // node on the fine grid to an aggregate and a processor. - RCP graphFact = GetFactory("Graph"); - RCP coarseCoordinatesFineMap, coarseCoordinatesMap; - RCP > + "The global number of elements in the graph's map is not equal to " + "the number of nodes given by: gNodesPerDim!"); + } + + *out << "Compute coarse mesh data" << std::endl; + std::vector > coarseMeshData = geoData->getCoarseMeshData(); + + // Now we are ready for the big loop over the fine node that will assign each + // node on the fine grid to an aggregate and a processor. + RCP graphFact = GetFactory("Graph"); + RCP coarseCoordinatesFineMap, coarseCoordinatesMap; + RCP > myStructuredAlgorithm = rcp(new AggregationStructuredAlgorithm(graphFact)); - if(interpolationOrder == 0 && outputAggregates){ - // Create aggregates for prolongation - *out << "Compute Aggregates" << std::endl; - RCP aggregates = rcp(new Aggregates(graph->GetDomainMap())); - aggregates->setObjectLabel("ST"); - aggregates->SetIndexManager(geoData); - aggregates->AggregatesCrossProcessors(coupled); - aggregates->SetNumAggregates(geoData->getNumLocalCoarseNodes()); - std::vector aggStat(geoData->getNumLocalFineNodes(), READY); - LO numNonAggregatedNodes = geoData->getNumLocalFineNodes(); - - myStructuredAlgorithm->BuildAggregates(pL, *graph, *aggregates, aggStat, - numNonAggregatedNodes); - - TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, - "MueLu::StructuredAggregationFactory::Build: Leftover nodes found! Error!"); - aggregates->ComputeAggregateSizes(true/*forceRecompute*/); - GetOStream(Statistics1) << aggregates->description() << std::endl; - Set(currentLevel, "Aggregates", aggregates); - - } else { - // Create the graph of the prolongator - *out << "Compute CrsGraph" << std::endl; - RCP myGraph; - myStructuredAlgorithm->BuildGraph(*graph, geoData, dofsPerNode, myGraph, - coarseCoordinatesFineMap, coarseCoordinatesMap); - Set(currentLevel, "prolongatorGraph", myGraph); - } - - if(coupled) { - Set(currentLevel, "gCoarseNodesPerDim", geoData->getGlobalCoarseNodesPerDir()); - } - Set(currentLevel, "lCoarseNodesPerDim", geoData->getLocalCoarseNodesPerDir()); - Set(currentLevel, "coarseCoordinatesFineMap", coarseCoordinatesFineMap); - Set(currentLevel, "coarseCoordinatesMap", coarseCoordinatesMap); - Set(currentLevel, "structuredInterpolationOrder", interpolationOrder); - Set(currentLevel, "numDimensions", numDimensions); - - } // Build() -} //namespace MueLu - + if (interpolationOrder == 0 && outputAggregates) { + // Create aggregates for prolongation + *out << "Compute Aggregates" << std::endl; + RCP aggregates = rcp(new Aggregates(graph->GetDomainMap())); + aggregates->setObjectLabel("ST"); + aggregates->SetIndexManager(geoData); + aggregates->AggregatesCrossProcessors(coupled); + aggregates->SetNumAggregates(geoData->getNumLocalCoarseNodes()); + std::vector aggStat(geoData->getNumLocalFineNodes(), READY); + LO numNonAggregatedNodes = geoData->getNumLocalFineNodes(); + + myStructuredAlgorithm->BuildAggregates(pL, *graph, *aggregates, aggStat, + numNonAggregatedNodes); + + TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, + "MueLu::StructuredAggregationFactory::Build: Leftover nodes found! Error!"); + aggregates->ComputeAggregateSizes(true /*forceRecompute*/); + GetOStream(Statistics1) << aggregates->description() << std::endl; + Set(currentLevel, "Aggregates", aggregates); + + } else { + // Create the graph of the prolongator + *out << "Compute CrsGraph" << std::endl; + RCP myGraph; + myStructuredAlgorithm->BuildGraph(*graph, geoData, dofsPerNode, myGraph, + coarseCoordinatesFineMap, coarseCoordinatesMap); + Set(currentLevel, "prolongatorGraph", myGraph); + } + + if (coupled) { + Set(currentLevel, "gCoarseNodesPerDim", geoData->getGlobalCoarseNodesPerDir()); + } + Set(currentLevel, "lCoarseNodesPerDim", geoData->getLocalCoarseNodesPerDir()); + Set(currentLevel, "coarseCoordinatesFineMap", coarseCoordinatesFineMap); + Set(currentLevel, "coarseCoordinatesMap", coarseCoordinatesMap); + Set(currentLevel, "structuredInterpolationOrder", interpolationOrder); + Set(currentLevel, "numDimensions", numDimensions); + +} // Build() +} // namespace MueLu #endif /* MUELU_STRUCTUREDAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_decl.hpp index 88724397a340..b966b0043c9a 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_decl.hpp @@ -103,62 +103,61 @@ namespace MueLu { | CrsGraph | StructuredAggregationFactory_kokkos | CrsGraph of the prolongator */ - template - class StructuredAggregationFactory_kokkos : public SingleLevelFactoryBase { +template +class StructuredAggregationFactory_kokkos : public SingleLevelFactoryBase { #undef MUELU_STRUCTUREDAGGREGATIONFACTORY_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - StructuredAggregationFactory_kokkos(); + //! Constructor. + StructuredAggregationFactory_kokkos(); - //! Destructor. - virtual ~StructuredAggregationFactory_kokkos() { } + //! Destructor. + virtual ~StructuredAggregationFactory_kokkos() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! @name Set/get methods. - //@{ - // set information about 1-node aggregates (map name and generating factory) - void SetOnePtMapName(const std::string name, Teuchos::RCP mapFact) { - SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate - SetFactory("OnePt aggregate map factory",mapFact); - } + //! @name Set/get methods. + //@{ + // set information about 1-node aggregates (map name and generating factory) + void SetOnePtMapName(const std::string name, Teuchos::RCP mapFact) { + SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate + SetFactory("OnePt aggregate map factory", mapFact); + } - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! @brief Build aggregates. */ - void Build(Level& currentLevel) const; + /*! @brief Build aggregates. */ + void Build(Level& currentLevel) const; - //@} + //@} - private: + private: + //! boolean flag: definition phase + //! if true, the aggregation algorithms still can be set and changed. + //! if false, no change in aggregation algorithms is possible any more + mutable bool bDefinitionPhase_; - //! boolean flag: definition phase - //! if true, the aggregation algorithms still can be set and changed. - //! if false, no change in aggregation algorithms is possible any more - mutable bool bDefinitionPhase_; +}; // class StructuredAggregationFactory - }; // class StructuredAggregationFactory - -} +} // namespace MueLu #define MUELU_STRUCTUREDAGGREGATIONFACTORY_KOKKOS_SHORT -#endif // MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DECL_HPP +#endif // MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_def.hpp index 1067efc3e08d..5d1911c775e8 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_def.hpp @@ -65,193 +65,192 @@ namespace MueLu { - template - StructuredAggregationFactory_kokkos:: - StructuredAggregationFactory_kokkos() : bDefinitionPhase_(true) { } +template +StructuredAggregationFactory_kokkos:: + StructuredAggregationFactory_kokkos() + : bDefinitionPhase_(true) {} - template - RCP StructuredAggregationFactory_kokkos:: - GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP StructuredAggregationFactory_kokkos:: + GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); - SET_VALID_ENTRY("aggregation: allow user-specified singletons"); - SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); - SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); -#undef SET_VALID_ENTRY - - // general variables needed in StructuredAggregationFactory - validParamList->set ("aggregation: output type", "Aggregates", - "Type of object holding the aggregation data: Aggregtes or CrsGraph"); - validParamList->set ("aggregation: coarsening rate", "{3}", - "Coarsening rate per spatial dimensions"); - validParamList->set ("aggregation: coarsening order", 0, - "The interpolation order used to construct grid transfer operators based off these aggregates."); - validParamList->set >("Graph", Teuchos::null, - "Graph of the matrix after amalgamation but without dropping."); - validParamList->set >("DofsPerNode", Teuchos::null, - "Number of degrees of freedom per mesh node, provided by the coalsce drop factory."); - validParamList->set >("numDimensions", Teuchos::null, - "Number of spatial dimension provided by CoordinatesTransferFactory."); - validParamList->set >("lNodesPerDim", Teuchos::null, - "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory."); - - return validParamList; - } // GetValidParameterList() - - template - void StructuredAggregationFactory_kokkos:: - DeclareInput(Level& currentLevel) const { - Input(currentLevel, "Graph"); - Input(currentLevel, "DofsPerNode"); - - // Request the local number of nodes per dimensions - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("numDimensions", NoFactory::get())) { - currentLevel.DeclareInput("numDimensions", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("numDimensions", NoFactory::get()), - Exceptions::RuntimeError, - "numDimensions was not provided by the user on level0!"); - } - if(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { - currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get()), - Exceptions::RuntimeError, - "lNodesPerDim was not provided by the user on level0!"); - } + SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); + SET_VALID_ENTRY("aggregation: allow user-specified singletons"); + SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); + SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); +#undef SET_VALID_ENTRY + + // general variables needed in StructuredAggregationFactory + validParamList->set("aggregation: output type", "Aggregates", + "Type of object holding the aggregation data: Aggregtes or CrsGraph"); + validParamList->set("aggregation: coarsening rate", "{3}", + "Coarsening rate per spatial dimensions"); + validParamList->set("aggregation: coarsening order", 0, + "The interpolation order used to construct grid transfer operators based off these aggregates."); + validParamList->set >("Graph", Teuchos::null, + "Graph of the matrix after amalgamation but without dropping."); + validParamList->set >("DofsPerNode", Teuchos::null, + "Number of degrees of freedom per mesh node, provided by the coalsce drop factory."); + validParamList->set >("numDimensions", Teuchos::null, + "Number of spatial dimension provided by CoordinatesTransferFactory."); + validParamList->set >("lNodesPerDim", Teuchos::null, + "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory."); + + return validParamList; +} // GetValidParameterList() + +template +void StructuredAggregationFactory_kokkos:: + DeclareInput(Level& currentLevel) const { + Input(currentLevel, "Graph"); + Input(currentLevel, "DofsPerNode"); + + // Request the local number of nodes per dimensions + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("numDimensions", NoFactory::get())) { + currentLevel.DeclareInput("numDimensions", NoFactory::get(), this); } else { - Input(currentLevel, "lNodesPerDim"); - Input(currentLevel, "numDimensions"); + TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("numDimensions", NoFactory::get()), + Exceptions::RuntimeError, + "numDimensions was not provided by the user on level0!"); } - } // DeclareInput() - - template - void StructuredAggregationFactory_kokkos:: - Build(Level ¤tLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - using device_type = typename LWGraph_kokkos::local_graph_type::device_type; - using execution_space = typename LWGraph_kokkos::local_graph_type::device_type::execution_space; - using memory_space = typename LWGraph_kokkos::local_graph_type::device_type::memory_space; - - *out << "Entering structured aggregation" << std::endl; - - ParameterList pL = GetParameterList(); - bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed - - // General problem informations are gathered from data stored in the problem matix. - RCP graph = Get >(currentLevel, "Graph"); - RCP fineMap = graph->GetDomainMap(); - const int myRank = fineMap->getComm()->getRank(); - const LO dofsPerNode = Get(currentLevel, "DofsPerNode"); - - // Since we want to operate on nodes and not dof, we need to modify the rowMap in order to - // obtain a nodeMap. - const int interpolationOrder = pL.get("aggregation: coarsening order"); - std::string outputType = pL.get("aggregation: output type"); - const bool outputAggregates = (outputType == "Aggregates" ? true : false); - Array lFineNodesPerDir(3); - int numDimensions; - if(currentLevel.GetLevelID() == 0) { - // On level 0, data is provided by applications and has no associated factory. - lFineNodesPerDir = currentLevel.Get >("lNodesPerDim", NoFactory::get()); - numDimensions = currentLevel.Get("numDimensions", NoFactory::get()); + if (currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { + currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); } else { - // On level > 0, data is provided directly by generating factories. - lFineNodesPerDir = Get >(currentLevel, "lNodesPerDim"); - numDimensions = Get(currentLevel, "numDimensions"); + TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get()), + Exceptions::RuntimeError, + "lNodesPerDim was not provided by the user on level0!"); } - - - // First make sure that input parameters are set logically based on dimension - for(int dim = 0; dim < 3; ++dim) { - if(dim >= numDimensions) { - lFineNodesPerDir[dim] = 1; - } + } else { + Input(currentLevel, "lNodesPerDim"); + Input(currentLevel, "numDimensions"); + } +} // DeclareInput() + +template +void StructuredAggregationFactory_kokkos:: + Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + RCP out; + if (const char* dbg = std::getenv("MUELU_STRUCTUREDAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + using device_type = typename LWGraph_kokkos::local_graph_type::device_type; + using execution_space = typename LWGraph_kokkos::local_graph_type::device_type::execution_space; + using memory_space = typename LWGraph_kokkos::local_graph_type::device_type::memory_space; + + *out << "Entering structured aggregation" << std::endl; + + ParameterList pL = GetParameterList(); + bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed + + // General problem informations are gathered from data stored in the problem matix. + RCP graph = Get >(currentLevel, "Graph"); + RCP fineMap = graph->GetDomainMap(); + const int myRank = fineMap->getComm()->getRank(); + const LO dofsPerNode = Get(currentLevel, "DofsPerNode"); + + // Since we want to operate on nodes and not dof, we need to modify the rowMap in order to + // obtain a nodeMap. + const int interpolationOrder = pL.get("aggregation: coarsening order"); + std::string outputType = pL.get("aggregation: output type"); + const bool outputAggregates = (outputType == "Aggregates" ? true : false); + Array lFineNodesPerDir(3); + int numDimensions; + if (currentLevel.GetLevelID() == 0) { + // On level 0, data is provided by applications and has no associated factory. + lFineNodesPerDir = currentLevel.Get >("lNodesPerDim", NoFactory::get()); + numDimensions = currentLevel.Get("numDimensions", NoFactory::get()); + } else { + // On level > 0, data is provided directly by generating factories. + lFineNodesPerDir = Get >(currentLevel, "lNodesPerDim"); + numDimensions = Get(currentLevel, "numDimensions"); + } + + // First make sure that input parameters are set logically based on dimension + for (int dim = 0; dim < 3; ++dim) { + if (dim >= numDimensions) { + lFineNodesPerDir[dim] = 1; } - - // Get the coarsening rate - std::string coarseningRate = pL.get("aggregation: coarsening rate"); - Teuchos::Array coarseRate; - try { - coarseRate = Teuchos::fromStringToArray(coarseningRate); - } catch(const Teuchos::InvalidArrayStringRepresentation& e) { - GetOStream(Errors,-1) << " *** \"aggregation: coarsening rate\" must be a string convertible into an array! *** " - << std::endl; - throw e; - } - TEUCHOS_TEST_FOR_EXCEPTION((coarseRate.size() > 1) && (coarseRate.size() < numDimensions), - Exceptions::RuntimeError, - "\"aggregation: coarsening rate\" must have at least as many" - " components as the number of spatial dimensions in the problem."); - - // Now that we have extracted info from the level, create the IndexManager - RCP geoData = rcp(new IndexManager_kokkos(numDimensions, - interpolationOrder, myRank, - lFineNodesPerDir, - coarseRate)); - - *out << "The index manager has now been built" << std::endl; - TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getLocalNumElements() - != static_cast(geoData->getNumLocalFineNodes()), - Exceptions::RuntimeError, - "The local number of elements in the graph's map is not equal to " - "the number of nodes given by: lNodesPerDim!"); - - // Now we are ready for the big loop over the fine node that will assign each - // node on the fine grid to an aggregate and a processor. - RCP myStructuredAlgorithm - = rcp(new AggregationStructuredAlgorithm_kokkos()); - - if(interpolationOrder == 0 && outputAggregates){ - RCP aggregates = rcp(new Aggregates(graph->GetDomainMap())); - aggregates->setObjectLabel("ST"); - aggregates->SetIndexManagerKokkos(geoData); - aggregates->AggregatesCrossProcessors(false); - aggregates->SetNumAggregates(geoData->getNumCoarseNodes()); - - LO numNonAggregatedNodes = geoData->getNumLocalFineNodes(); - Kokkos::View aggStat("aggStat", numNonAggregatedNodes); - Kokkos::parallel_for("StructuredAggregation: initialize aggStat", - Kokkos::RangePolicy(0, numNonAggregatedNodes), - KOKKOS_LAMBDA(const LO nodeIdx) {aggStat(nodeIdx) = READY;}); - - myStructuredAlgorithm->BuildAggregates(pL, *graph, *aggregates, aggStat, - numNonAggregatedNodes); - - *out << "numNonAggregatedNodes: " << numNonAggregatedNodes << std::endl; - - TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, - "MueLu::StructuredAggregationFactory::Build: Leftover nodes found! Error!"); - aggregates->ComputeAggregateSizes(true/*forceRecompute*/); - GetOStream(Statistics1) << aggregates->description() << std::endl; - Set(currentLevel, "Aggregates", aggregates); - - } else { - // Create Coarse Data - RCP myGraph; - myStructuredAlgorithm->BuildGraph(*graph, geoData, dofsPerNode, myGraph); - Set(currentLevel, "prolongatorGraph", myGraph); - } - - Set(currentLevel, "lCoarseNodesPerDim", geoData->getCoarseNodesPerDirArray()); - Set(currentLevel, "indexManager", geoData); - Set(currentLevel, "structuredInterpolationOrder", interpolationOrder); - Set(currentLevel, "numDimensions", numDimensions); - - } // Build() - -} //namespace MueLu + } + + // Get the coarsening rate + std::string coarseningRate = pL.get("aggregation: coarsening rate"); + Teuchos::Array coarseRate; + try { + coarseRate = Teuchos::fromStringToArray(coarseningRate); + } catch (const Teuchos::InvalidArrayStringRepresentation& e) { + GetOStream(Errors, -1) << " *** \"aggregation: coarsening rate\" must be a string convertible into an array! *** " + << std::endl; + throw e; + } + TEUCHOS_TEST_FOR_EXCEPTION((coarseRate.size() > 1) && (coarseRate.size() < numDimensions), + Exceptions::RuntimeError, + "\"aggregation: coarsening rate\" must have at least as many" + " components as the number of spatial dimensions in the problem."); + + // Now that we have extracted info from the level, create the IndexManager + RCP geoData = rcp(new IndexManager_kokkos(numDimensions, + interpolationOrder, myRank, + lFineNodesPerDir, + coarseRate)); + + *out << "The index manager has now been built" << std::endl; + TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getLocalNumElements() != static_cast(geoData->getNumLocalFineNodes()), + Exceptions::RuntimeError, + "The local number of elements in the graph's map is not equal to " + "the number of nodes given by: lNodesPerDim!"); + + // Now we are ready for the big loop over the fine node that will assign each + // node on the fine grid to an aggregate and a processor. + RCP myStructuredAlgorithm = rcp(new AggregationStructuredAlgorithm_kokkos()); + + if (interpolationOrder == 0 && outputAggregates) { + RCP aggregates = rcp(new Aggregates(graph->GetDomainMap())); + aggregates->setObjectLabel("ST"); + aggregates->SetIndexManagerKokkos(geoData); + aggregates->AggregatesCrossProcessors(false); + aggregates->SetNumAggregates(geoData->getNumCoarseNodes()); + + LO numNonAggregatedNodes = geoData->getNumLocalFineNodes(); + Kokkos::View aggStat("aggStat", numNonAggregatedNodes); + Kokkos::parallel_for( + "StructuredAggregation: initialize aggStat", + Kokkos::RangePolicy(0, numNonAggregatedNodes), + KOKKOS_LAMBDA(const LO nodeIdx) { aggStat(nodeIdx) = READY; }); + + myStructuredAlgorithm->BuildAggregates(pL, *graph, *aggregates, aggStat, + numNonAggregatedNodes); + + *out << "numNonAggregatedNodes: " << numNonAggregatedNodes << std::endl; + + TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, + "MueLu::StructuredAggregationFactory::Build: Leftover nodes found! Error!"); + aggregates->ComputeAggregateSizes(true /*forceRecompute*/); + GetOStream(Statistics1) << aggregates->description() << std::endl; + Set(currentLevel, "Aggregates", aggregates); + + } else { + // Create Coarse Data + RCP myGraph; + myStructuredAlgorithm->BuildGraph(*graph, geoData, dofsPerNode, myGraph); + Set(currentLevel, "prolongatorGraph", myGraph); + } + + Set(currentLevel, "lCoarseNodesPerDim", geoData->getCoarseNodesPerDirArray()); + Set(currentLevel, "indexManager", geoData); + Set(currentLevel, "structuredInterpolationOrder", interpolationOrder); + Set(currentLevel, "numDimensions", numDimensions); + +} // Build() + +} // namespace MueLu #endif /* MUELU_STRUCTUREDAGGREGATIONFACTORY_KOKKOS_DEF_HPP */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_decl.hpp index 9488bfe3dd72..3f5cda1e3962 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_decl.hpp @@ -77,69 +77,67 @@ namespace MueLu { correspond to nodes. While not strictly necessary, it might be convenient. */ - template - class GlobalLexicographicIndexManager : public IndexManager { +template +class GlobalLexicographicIndexManager : public IndexManager { #undef MUELU_GLOBALLEXICOGRAPHICINDEXMANAGER_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: + public: + GlobalLexicographicIndexManager(); - GlobalLexicographicIndexManager(); + GlobalLexicographicIndexManager(const RCP > comm, const bool coupled, + const int NumDimensions, const int interpolationOrder, + const Array GFineNodesPerDir, + const Array LFineNodesPerDir, const Array CoarseRate, + const GO MinGlobalIndex); - GlobalLexicographicIndexManager(const RCP< const Teuchos::Comm > comm, const bool coupled, - const int NumDimensions, const int interpolationOrder, - const Array GFineNodesPerDir, - const Array LFineNodesPerDir, const Array CoarseRate, - const GO MinGlobalIndex); + virtual ~GlobalLexicographicIndexManager() {} - virtual ~GlobalLexicographicIndexManager() {} + void computeGlobalCoarseParameters(); - void computeGlobalCoarseParameters(); + void getGhostedNodesData(const RCP fineMap, + Array& ghostedNodeCoarseLIDs, + Array& ghostedNodeCoarsePIDs, + Array& ghostedNodeCoarseGIDs) const; - void getGhostedNodesData(const RCP fineMap, - Array& ghostedNodeCoarseLIDs, - Array& ghostedNodeCoarsePIDs, - Array& ghostedNodeCoarseGIDs) const; + void getCoarseNodesData(const RCP fineCoordinatesMap, + Array& coarseNodeCoarseGIDs, + Array& coarseNodeFineGIDs) const; - void getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const; + std::vector > getCoarseMeshData() const; - std::vector > getCoarseMeshData() const; + void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; - void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; + void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; - void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const; - void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; - void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; + void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; - void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; + void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; - void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; - void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; + void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const; + private: +}; - private: - - }; - -} //namespace MueLu +} // namespace MueLu #define MUELU_GLOBALLEXICOGRPHICINDEXMANAGER_SHORT -#endif // MUELU_GLOBALLEXICOGRPHICINDEXMANAGER_DECL_HPP +#endif // MUELU_GLOBALLEXICOGRPHICINDEXMANAGER_DECL_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_def.hpp index 8211e1a461bd..20bc4e0e7419 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_def.hpp @@ -51,341 +51,330 @@ namespace MueLu { - template - GlobalLexicographicIndexManager:: - GlobalLexicographicIndexManager(const RCP > comm, const bool coupled, - const int NumDimensions, const int interpolationOrder, - const Array GFineNodesPerDir, - const Array LFineNodesPerDir, const Array CoarseRate, - const GO MinGlobalIndex) : - IndexManager(comm, coupled, false, NumDimensions, interpolationOrder, GFineNodesPerDir, LFineNodesPerDir) { - - // Load coarse rate, being careful about formating. - for(int dim = 0; dim < 3; ++dim) { - if(dim < this->numDimensions) { - if(CoarseRate.size() == 1) { - this->coarseRate[dim] = CoarseRate[0]; - } else if(CoarseRate.size() == this->numDimensions) { - this->coarseRate[dim] = CoarseRate[dim]; - } - } else { - this->coarseRate[dim] = 1; +template +GlobalLexicographicIndexManager:: + GlobalLexicographicIndexManager(const RCP > comm, const bool coupled, + const int NumDimensions, const int interpolationOrder, + const Array GFineNodesPerDir, + const Array LFineNodesPerDir, const Array CoarseRate, + const GO MinGlobalIndex) + : IndexManager(comm, coupled, false, NumDimensions, interpolationOrder, GFineNodesPerDir, LFineNodesPerDir) { + // Load coarse rate, being careful about formating. + for (int dim = 0; dim < 3; ++dim) { + if (dim < this->numDimensions) { + if (CoarseRate.size() == 1) { + this->coarseRate[dim] = CoarseRate[0]; + } else if (CoarseRate.size() == this->numDimensions) { + this->coarseRate[dim] = CoarseRate[dim]; } + } else { + this->coarseRate[dim] = 1; } + } - { - GO tmp = 0; - this->startIndices[2]= MinGlobalIndex / (this->gFineNodesPerDir[1]*this->gFineNodesPerDir[0]); - tmp = MinGlobalIndex % (this->gFineNodesPerDir[1]*this->gFineNodesPerDir[0]); - this->startIndices[1]= tmp / this->gFineNodesPerDir[0]; - this->startIndices[0]= tmp % this->gFineNodesPerDir[0]; + { + GO tmp = 0; + this->startIndices[2] = MinGlobalIndex / (this->gFineNodesPerDir[1] * this->gFineNodesPerDir[0]); + tmp = MinGlobalIndex % (this->gFineNodesPerDir[1] * this->gFineNodesPerDir[0]); + this->startIndices[1] = tmp / this->gFineNodesPerDir[0]; + this->startIndices[0] = tmp % this->gFineNodesPerDir[0]; - for(int dim = 0; dim < 3; ++dim) { - this->startIndices[dim + 3] = this->startIndices[dim] + this->lFineNodesPerDir[dim] - 1; - } + for (int dim = 0; dim < 3; ++dim) { + this->startIndices[dim + 3] = this->startIndices[dim] + this->lFineNodesPerDir[dim] - 1; } - - this->computeMeshParameters(); - computeGlobalCoarseParameters(); - - } - - template - void GlobalLexicographicIndexManager:: - computeGlobalCoarseParameters() { - this->gNumCoarseNodes10 = this->gCoarseNodesPerDir[0]*this->gCoarseNodesPerDir[1]; - this->gNumCoarseNodes = this->gNumCoarseNodes10*this->gCoarseNodesPerDir[2]; } - template - void GlobalLexicographicIndexManager:: - getGhostedNodesData(const RCP fineMap, - Array& ghostedNodeCoarseLIDs, Array& ghostedNodeCoarsePIDs, Array&ghostedNodeCoarseGIDs) const { - - ghostedNodeCoarseLIDs.resize(this->getNumLocalGhostedNodes()); - ghostedNodeCoarsePIDs.resize(this->getNumLocalGhostedNodes()); - ghostedNodeCoarseGIDs.resize(this->numGhostedNodes); - - // Find the GIDs, LIDs and PIDs of the coarse points on the fine mesh and coarse - // mesh as this data will be used to fill vertex2AggId and procWinner vectors. - Array lCoarseNodeCoarseGIDs(this->lNumCoarseNodes), + this->computeMeshParameters(); + computeGlobalCoarseParameters(); +} + +template +void GlobalLexicographicIndexManager:: + computeGlobalCoarseParameters() { + this->gNumCoarseNodes10 = this->gCoarseNodesPerDir[0] * this->gCoarseNodesPerDir[1]; + this->gNumCoarseNodes = this->gNumCoarseNodes10 * this->gCoarseNodesPerDir[2]; +} + +template +void GlobalLexicographicIndexManager:: + getGhostedNodesData(const RCP fineMap, + Array& ghostedNodeCoarseLIDs, Array& ghostedNodeCoarsePIDs, Array& ghostedNodeCoarseGIDs) const { + ghostedNodeCoarseLIDs.resize(this->getNumLocalGhostedNodes()); + ghostedNodeCoarsePIDs.resize(this->getNumLocalGhostedNodes()); + ghostedNodeCoarseGIDs.resize(this->numGhostedNodes); + + // Find the GIDs, LIDs and PIDs of the coarse points on the fine mesh and coarse + // mesh as this data will be used to fill vertex2AggId and procWinner vectors. + Array lCoarseNodeCoarseGIDs(this->lNumCoarseNodes), lCoarseNodeFineGIDs(this->lNumCoarseNodes); - Array ghostedCoarseNodeFineGIDs(this->numGhostedNodes); - Array ghostedCoarseNodeCoarseIndices(3), ghostedCoarseNodeFineIndices(3), ijk(3); - LO currentIndex = -1, currentCoarseIndex = -1; - for(ijk[2] = 0; ijk[2] < this->ghostedNodesPerDir[2]; ++ijk[2]) { - for(ijk[1] = 0; ijk[1] < this->ghostedNodesPerDir[1]; ++ijk[1]) { - for(ijk[0] = 0; ijk[0] < this->ghostedNodesPerDir[0]; ++ijk[0]) { - currentIndex = ijk[2]*this->numGhostedNodes10 + ijk[1]*this->ghostedNodesPerDir[0] + ijk[0]; - ghostedCoarseNodeCoarseIndices[0] = this->startGhostedCoarseNode[0] + ijk[0]; - ghostedCoarseNodeCoarseIndices[1] = this->startGhostedCoarseNode[1] + ijk[1]; - ghostedCoarseNodeCoarseIndices[2] = this->startGhostedCoarseNode[2] + ijk[2]; - GO myCoarseGID = ghostedCoarseNodeCoarseIndices[0] - + ghostedCoarseNodeCoarseIndices[1]*this->gCoarseNodesPerDir[0] - + ghostedCoarseNodeCoarseIndices[2]*this->gNumCoarseNodes10; - ghostedNodeCoarseGIDs[currentIndex] = myCoarseGID; - GO myGID = 0, factor[3] = {}; - factor[2] = this->gNumFineNodes10; - factor[1] = this->gFineNodesPerDir[0]; - factor[0] = 1; - for(int dim = 0; dim < 3; ++dim) { - if(dim < this->numDimensions) { - if(this->startIndices[dim] - this->offsets[dim] + ijk[dim]*this->coarseRate[dim] - < this->gFineNodesPerDir[dim] - 1) { - myGID += (this->startIndices[dim] - this->offsets[dim] - + ijk[dim]*this->coarseRate[dim])*factor[dim]; - } else { - myGID += (this->startIndices[dim] - this->offsets[dim] + (ijk[dim] - 1) - *this->coarseRate[dim] + this->endRate[dim])*factor[dim]; - } + Array ghostedCoarseNodeFineGIDs(this->numGhostedNodes); + Array ghostedCoarseNodeCoarseIndices(3), ghostedCoarseNodeFineIndices(3), ijk(3); + LO currentIndex = -1, currentCoarseIndex = -1; + for (ijk[2] = 0; ijk[2] < this->ghostedNodesPerDir[2]; ++ijk[2]) { + for (ijk[1] = 0; ijk[1] < this->ghostedNodesPerDir[1]; ++ijk[1]) { + for (ijk[0] = 0; ijk[0] < this->ghostedNodesPerDir[0]; ++ijk[0]) { + currentIndex = ijk[2] * this->numGhostedNodes10 + ijk[1] * this->ghostedNodesPerDir[0] + ijk[0]; + ghostedCoarseNodeCoarseIndices[0] = this->startGhostedCoarseNode[0] + ijk[0]; + ghostedCoarseNodeCoarseIndices[1] = this->startGhostedCoarseNode[1] + ijk[1]; + ghostedCoarseNodeCoarseIndices[2] = this->startGhostedCoarseNode[2] + ijk[2]; + GO myCoarseGID = ghostedCoarseNodeCoarseIndices[0] + ghostedCoarseNodeCoarseIndices[1] * this->gCoarseNodesPerDir[0] + ghostedCoarseNodeCoarseIndices[2] * this->gNumCoarseNodes10; + ghostedNodeCoarseGIDs[currentIndex] = myCoarseGID; + GO myGID = 0, factor[3] = {}; + factor[2] = this->gNumFineNodes10; + factor[1] = this->gFineNodesPerDir[0]; + factor[0] = 1; + for (int dim = 0; dim < 3; ++dim) { + if (dim < this->numDimensions) { + if (this->startIndices[dim] - this->offsets[dim] + ijk[dim] * this->coarseRate[dim] < this->gFineNodesPerDir[dim] - 1) { + myGID += (this->startIndices[dim] - this->offsets[dim] + ijk[dim] * this->coarseRate[dim]) * factor[dim]; + } else { + myGID += (this->startIndices[dim] - this->offsets[dim] + (ijk[dim] - 1) * this->coarseRate[dim] + this->endRate[dim]) * factor[dim]; } } - // lbv 02-08-2018: - // This check is simplistic and should be replaced by a condition that checks - // if the local tuple of the current index is wihin the range of local nodes - // or not in the range of ghosted nodes. - if((!this->ghostInterface[0] || ijk[0] != 0) && - (!this->ghostInterface[2] || ijk[1] != 0) && - (!this->ghostInterface[4] || ijk[2] != 0) && - (!this->ghostInterface[1] || ijk[0] != this->ghostedNodesPerDir[0] - 1) && - (!this->ghostInterface[3] || ijk[1] != this->ghostedNodesPerDir[1] - 1) && - (!this->ghostInterface[5] || ijk[2] != this->ghostedNodesPerDir[2] - 1)) { - - // this->getGhostedNodeFineLID(ijk[0], ijk[1], ijk[2], coarseNodeFineLID); - if(this->interpolationOrder_ == 0) { - currentCoarseIndex = 0; - if(this->ghostInterface[4]) { - currentCoarseIndex += (ijk[2] - 1)*this->lNumCoarseNodes10; - } else { - currentCoarseIndex += ijk[2]*this->lNumCoarseNodes10; - } - if(this->ghostInterface[2]) { - currentCoarseIndex += (ijk[1] - 1)*this->getLocalCoarseNodesInDir(0); - } else { - currentCoarseIndex += ijk[1]*this->getLocalCoarseNodesInDir(0); - } - if(this->ghostInterface[0]) { - currentCoarseIndex += ijk[0] - 1; - } else { - currentCoarseIndex += ijk[0]; - } + } + // lbv 02-08-2018: + // This check is simplistic and should be replaced by a condition that checks + // if the local tuple of the current index is wihin the range of local nodes + // or not in the range of ghosted nodes. + if ((!this->ghostInterface[0] || ijk[0] != 0) && + (!this->ghostInterface[2] || ijk[1] != 0) && + (!this->ghostInterface[4] || ijk[2] != 0) && + (!this->ghostInterface[1] || ijk[0] != this->ghostedNodesPerDir[0] - 1) && + (!this->ghostInterface[3] || ijk[1] != this->ghostedNodesPerDir[1] - 1) && + (!this->ghostInterface[5] || ijk[2] != this->ghostedNodesPerDir[2] - 1)) { + // this->getGhostedNodeFineLID(ijk[0], ijk[1], ijk[2], coarseNodeFineLID); + if (this->interpolationOrder_ == 0) { + currentCoarseIndex = 0; + if (this->ghostInterface[4]) { + currentCoarseIndex += (ijk[2] - 1) * this->lNumCoarseNodes10; } else { - this->getGhostedNodeCoarseLID(ijk[0], ijk[1], ijk[2], currentCoarseIndex); + currentCoarseIndex += ijk[2] * this->lNumCoarseNodes10; } - - lCoarseNodeCoarseGIDs[currentCoarseIndex] = myCoarseGID; - lCoarseNodeFineGIDs[currentCoarseIndex] = myGID; + if (this->ghostInterface[2]) { + currentCoarseIndex += (ijk[1] - 1) * this->getLocalCoarseNodesInDir(0); + } else { + currentCoarseIndex += ijk[1] * this->getLocalCoarseNodesInDir(0); + } + if (this->ghostInterface[0]) { + currentCoarseIndex += ijk[0] - 1; + } else { + currentCoarseIndex += ijk[0]; + } + } else { + this->getGhostedNodeCoarseLID(ijk[0], ijk[1], ijk[2], currentCoarseIndex); } - ghostedCoarseNodeFineGIDs[currentIndex] = myGID; - } - } - } - - RCP coarseMap = Xpetra::MapFactory::Build (fineMap->lib(), - this->gNumCoarseNodes, - lCoarseNodeCoarseGIDs(), - fineMap->getIndexBase(), - fineMap->getComm()); - - coarseMap->getRemoteIndexList(ghostedNodeCoarseGIDs(), - ghostedNodeCoarsePIDs(), - ghostedNodeCoarseLIDs()); - - } // End getGhostedMeshData - - template - void GlobalLexicographicIndexManager:: - getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const { - - // Allocate sufficient storage space for outputs - coarseNodeCoarseGIDs.resize(this->getNumLocalCoarseNodes()); - coarseNodeFineGIDs.resize(this->getNumLocalCoarseNodes()); - - // Load all the GIDs on the fine mesh - ArrayView fineNodeGIDs = fineCoordinatesMap->getLocalElementList(); - - Array coarseStartIndices(3); - GO tmp; - for(int dim = 0; dim < 3; ++dim) { - coarseStartIndices[dim] = this->startIndices[dim] / this->coarseRate[dim]; - tmp = this->startIndices[dim] % this->coarseRate[dim]; - if(tmp > 0) {++coarseStartIndices[dim];} - } - // Extract the fine LIDs of the coarse nodes and store the corresponding GIDs - LO fineLID; - Array lCoarseIndices(3); - Array gCoarseIndices(3); - for(LO coarseLID = 0; coarseLID < this->getNumLocalCoarseNodes(); ++coarseLID) { - this->getCoarseNodeLocalTuple(coarseLID, - lCoarseIndices[0], - lCoarseIndices[1], - lCoarseIndices[2]); - getCoarseNodeFineLID(lCoarseIndices[0], lCoarseIndices[1], lCoarseIndices[2], fineLID); - coarseNodeFineGIDs[coarseLID] = fineNodeGIDs[fineLID]; - - // Get Coarse Global IJK - for(int dim=0; dim<3; dim++) { - gCoarseIndices[dim] = coarseStartIndices[dim] + lCoarseIndices[dim]; + lCoarseNodeCoarseGIDs[currentCoarseIndex] = myCoarseGID; + lCoarseNodeFineGIDs[currentCoarseIndex] = myGID; + } + ghostedCoarseNodeFineGIDs[currentIndex] = myGID; } - getCoarseNodeGID(gCoarseIndices[0], - gCoarseIndices[1], - gCoarseIndices[2], - coarseNodeCoarseGIDs[coarseLID] ); - } - - } - - template - std::vector > GlobalLexicographicIndexManager:: - getCoarseMeshData() const { - std::vector > coarseMeshData; - return coarseMeshData; - } - - template - void GlobalLexicographicIndexManager:: - getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const { - GO tmp; - k = myGID / this->gNumFineNodes10; - tmp = myGID % this->gNumFineNodes10; - j = tmp / this->gFineNodesPerDir[0]; - i = tmp % this->gFineNodesPerDir[0]; - } - - template - void GlobalLexicographicIndexManager:: - getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumFineNodes10; - tmp = myLID % this->lNumFineNodes10; - j = tmp / this->lFineNodesPerDir[0]; - i = tmp % this->lFineNodesPerDir[0]; - } - - template - void GlobalLexicographicIndexManager:: - getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumFineNodes10; - tmp = myLID % this->lNumFineNodes10; - j = tmp / this->lFineNodesPerDir[0]; - i = tmp % this->lFineNodesPerDir[0]; - - k += this->offsets[2]; - j += this->offsets[1]; - i += this->offsets[0]; - } - - template - void GlobalLexicographicIndexManager:: - getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const { - myGID = k*this->gNumFineNodes10 + j*this->gFineNodesPerDir[0] + i; - } - - template - void GlobalLexicographicIndexManager:: - getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const { - myLID = k*this->lNumFineNodes10 + j*this->lFineNodesPerDir[0] + i; - } - - template - void GlobalLexicographicIndexManager:: - getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const { - GO tmp; - k = myGID / this->gNumCoarseNodes10; - tmp = myGID % this->gNumCoarseNodes10; - j = tmp / this->gCoarseNodesPerDir[0]; - i = tmp % this->gCoarseNodesPerDir[0]; - } - - template - void GlobalLexicographicIndexManager:: - getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumCoarseNodes10; - tmp = myLID % this->lNumCoarseNodes10; - j = tmp / this->lCoarseNodesPerDir[0]; - i = tmp % this->lCoarseNodesPerDir[0]; - } - - template - void GlobalLexicographicIndexManager:: - getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const { - myGID = k*this->gNumCoarseNodes10 + j*this->gCoarseNodesPerDir[0] + i; - } - - template - void GlobalLexicographicIndexManager:: - getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const { - myLID = k*this->lNumCoarseNodes10 + j*this->lCoarseNodesPerDir[0] + i; } - template - void GlobalLexicographicIndexManager:: - getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const { - myLID = k*this->numGhostedNodes10 + j*this->ghostedNodesPerDir[0] + i; - } - - template - void GlobalLexicographicIndexManager:: - getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const { - // Assumptions: (i,j,k) is a tuple on the coarse mesh - // myLID is the corresponding local ID on the fine mesh - const LO multiplier[3] = {1, this->lFineNodesPerDir[0], this->lNumFineNodes10}; - const LO indices[3] = {i, j, k}; - - myLID = 0; - for(int dim = 0; dim < 3; ++dim) { - if((indices[dim] == this->getLocalCoarseNodesInDir(dim) - 1) && this->meshEdge[2*dim + 1]) { - // We are dealing with the last node on the mesh in direction dim - // so we can simply use the number of nodes on the fine mesh in that direction - myLID += (this->getLocalFineNodesInDir(dim) - 1)*multiplier[dim]; - } else { - myLID += (indices[dim]*this->getCoarseningRate(dim) + this->getCoarseNodeOffset(dim)) - *multiplier[dim]; - } + RCP coarseMap = Xpetra::MapFactory::Build(fineMap->lib(), + this->gNumCoarseNodes, + lCoarseNodeCoarseGIDs(), + fineMap->getIndexBase(), + fineMap->getComm()); + + coarseMap->getRemoteIndexList(ghostedNodeCoarseGIDs(), + ghostedNodeCoarsePIDs(), + ghostedNodeCoarseLIDs()); + +} // End getGhostedMeshData + +template +void GlobalLexicographicIndexManager:: + getCoarseNodesData(const RCP fineCoordinatesMap, + Array& coarseNodeCoarseGIDs, + Array& coarseNodeFineGIDs) const { + // Allocate sufficient storage space for outputs + coarseNodeCoarseGIDs.resize(this->getNumLocalCoarseNodes()); + coarseNodeFineGIDs.resize(this->getNumLocalCoarseNodes()); + + // Load all the GIDs on the fine mesh + ArrayView fineNodeGIDs = fineCoordinatesMap->getLocalElementList(); + + Array coarseStartIndices(3); + GO tmp; + for (int dim = 0; dim < 3; ++dim) { + coarseStartIndices[dim] = this->startIndices[dim] / this->coarseRate[dim]; + tmp = this->startIndices[dim] % this->coarseRate[dim]; + if (tmp > 0) { + ++coarseStartIndices[dim]; } } - template - void GlobalLexicographicIndexManager:: - getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const { - LO itmp = i - (this->offsets[0] > 0 ? 1 : 0); - LO jtmp = j - (this->offsets[1] > 0 ? 1 : 0); - LO ktmp = k - (this->offsets[2] > 0 ? 1 : 0); - myLID = 0; - if(ktmp*this->coarseRate[2] < this->lFineNodesPerDir[2]) { - myLID += ktmp*this->coarseRate[2]*this->lNumCoarseNodes10; - } else { - myLID += (this->lFineNodesPerDir[2] - 1)*this->lNumCoarseNodes10; - } - - if(jtmp*this->coarseRate[1] < this->lFineNodesPerDir[1]) { - myLID += jtmp*this->coarseRate[1]*this->lFineNodesPerDir[0]; - } else { - myLID += (this->lFineNodesPerDir[1] - 1)*this->lFineNodesPerDir[1]; + // Extract the fine LIDs of the coarse nodes and store the corresponding GIDs + LO fineLID; + Array lCoarseIndices(3); + Array gCoarseIndices(3); + for (LO coarseLID = 0; coarseLID < this->getNumLocalCoarseNodes(); ++coarseLID) { + this->getCoarseNodeLocalTuple(coarseLID, + lCoarseIndices[0], + lCoarseIndices[1], + lCoarseIndices[2]); + getCoarseNodeFineLID(lCoarseIndices[0], lCoarseIndices[1], lCoarseIndices[2], fineLID); + coarseNodeFineGIDs[coarseLID] = fineNodeGIDs[fineLID]; + + // Get Coarse Global IJK + for (int dim = 0; dim < 3; dim++) { + gCoarseIndices[dim] = coarseStartIndices[dim] + lCoarseIndices[dim]; } - - if(itmp*this->coarseRate[0] < this->lFineNodesPerDir[0]) { - myLID += itmp*this->coarseRate[0]; + getCoarseNodeGID(gCoarseIndices[0], + gCoarseIndices[1], + gCoarseIndices[2], + coarseNodeCoarseGIDs[coarseLID]); + } +} + +template +std::vector > GlobalLexicographicIndexManager:: + getCoarseMeshData() const { + std::vector > coarseMeshData; + return coarseMeshData; +} + +template +void GlobalLexicographicIndexManager:: + getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const { + GO tmp; + k = myGID / this->gNumFineNodes10; + tmp = myGID % this->gNumFineNodes10; + j = tmp / this->gFineNodesPerDir[0]; + i = tmp % this->gFineNodesPerDir[0]; +} + +template +void GlobalLexicographicIndexManager:: + getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { + LO tmp; + k = myLID / this->lNumFineNodes10; + tmp = myLID % this->lNumFineNodes10; + j = tmp / this->lFineNodesPerDir[0]; + i = tmp % this->lFineNodesPerDir[0]; +} + +template +void GlobalLexicographicIndexManager:: + getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const { + LO tmp; + k = myLID / this->lNumFineNodes10; + tmp = myLID % this->lNumFineNodes10; + j = tmp / this->lFineNodesPerDir[0]; + i = tmp % this->lFineNodesPerDir[0]; + + k += this->offsets[2]; + j += this->offsets[1]; + i += this->offsets[0]; +} + +template +void GlobalLexicographicIndexManager:: + getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const { + myGID = k * this->gNumFineNodes10 + j * this->gFineNodesPerDir[0] + i; +} + +template +void GlobalLexicographicIndexManager:: + getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const { + myLID = k * this->lNumFineNodes10 + j * this->lFineNodesPerDir[0] + i; +} + +template +void GlobalLexicographicIndexManager:: + getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const { + GO tmp; + k = myGID / this->gNumCoarseNodes10; + tmp = myGID % this->gNumCoarseNodes10; + j = tmp / this->gCoarseNodesPerDir[0]; + i = tmp % this->gCoarseNodesPerDir[0]; +} + +template +void GlobalLexicographicIndexManager:: + getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { + LO tmp; + k = myLID / this->lNumCoarseNodes10; + tmp = myLID % this->lNumCoarseNodes10; + j = tmp / this->lCoarseNodesPerDir[0]; + i = tmp % this->lCoarseNodesPerDir[0]; +} + +template +void GlobalLexicographicIndexManager:: + getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const { + myGID = k * this->gNumCoarseNodes10 + j * this->gCoarseNodesPerDir[0] + i; +} + +template +void GlobalLexicographicIndexManager:: + getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const { + myLID = k * this->lNumCoarseNodes10 + j * this->lCoarseNodesPerDir[0] + i; +} + +template +void GlobalLexicographicIndexManager:: + getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const { + myLID = k * this->numGhostedNodes10 + j * this->ghostedNodesPerDir[0] + i; +} + +template +void GlobalLexicographicIndexManager:: + getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const { + // Assumptions: (i,j,k) is a tuple on the coarse mesh + // myLID is the corresponding local ID on the fine mesh + const LO multiplier[3] = {1, this->lFineNodesPerDir[0], this->lNumFineNodes10}; + const LO indices[3] = {i, j, k}; + + myLID = 0; + for (int dim = 0; dim < 3; ++dim) { + if ((indices[dim] == this->getLocalCoarseNodesInDir(dim) - 1) && this->meshEdge[2 * dim + 1]) { + // We are dealing with the last node on the mesh in direction dim + // so we can simply use the number of nodes on the fine mesh in that direction + myLID += (this->getLocalFineNodesInDir(dim) - 1) * multiplier[dim]; } else { - myLID += this->lFineNodesPerDir[0] - 1; + myLID += (indices[dim] * this->getCoarseningRate(dim) + this->getCoarseNodeOffset(dim)) * multiplier[dim]; } } +} + +template +void GlobalLexicographicIndexManager:: + getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const { + LO itmp = i - (this->offsets[0] > 0 ? 1 : 0); + LO jtmp = j - (this->offsets[1] > 0 ? 1 : 0); + LO ktmp = k - (this->offsets[2] > 0 ? 1 : 0); + myLID = 0; + if (ktmp * this->coarseRate[2] < this->lFineNodesPerDir[2]) { + myLID += ktmp * this->coarseRate[2] * this->lNumCoarseNodes10; + } else { + myLID += (this->lFineNodesPerDir[2] - 1) * this->lNumCoarseNodes10; + } - template - void GlobalLexicographicIndexManager:: - getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const { - LO itmp = i - (this->offsets[0] > 0 ? 1 : 0); - LO jtmp = j - (this->offsets[1] > 0 ? 1 : 0); - LO ktmp = k - (this->offsets[2] > 0 ? 1 : 0); - myLID = ktmp*this->lNumCoarseNodes10 + jtmp*this->lCoarseNodesPerDir[0] + itmp; + if (jtmp * this->coarseRate[1] < this->lFineNodesPerDir[1]) { + myLID += jtmp * this->coarseRate[1] * this->lFineNodesPerDir[0]; + } else { + myLID += (this->lFineNodesPerDir[1] - 1) * this->lFineNodesPerDir[1]; } -} //namespace MueLu + if (itmp * this->coarseRate[0] < this->lFineNodesPerDir[0]) { + myLID += itmp * this->coarseRate[0]; + } else { + myLID += this->lFineNodesPerDir[0] - 1; + } +} + +template +void GlobalLexicographicIndexManager:: + getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const { + LO itmp = i - (this->offsets[0] > 0 ? 1 : 0); + LO jtmp = j - (this->offsets[1] > 0 ? 1 : 0); + LO ktmp = k - (this->offsets[2] > 0 ? 1 : 0); + myLID = ktmp * this->lNumCoarseNodes10 + jtmp * this->lCoarseNodesPerDir[0] + itmp; +} + +} // namespace MueLu #endif /* MUELU_GLOBALLEXICOGRAPHICINDEXMANAGER_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_decl.hpp index a0e809aaba20..c7dab301c795 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_decl.hpp @@ -77,93 +77,90 @@ namespace MueLu { correspond to nodes. While not strictly necessary, it might be convenient. */ - template - class LocalLexicographicIndexManager : public IndexManager { +template +class LocalLexicographicIndexManager : public IndexManager { #undef MUELU_LOCALLEXICOGRAPHICINDEXMANAGER_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: + public: + LocalLexicographicIndexManager() = default; - LocalLexicographicIndexManager() = default; + LocalLexicographicIndexManager(const RCP > comm, const bool coupled, + const int NumDimensions, const int interpolationOrder, + const int MyRank, const int NumRanks, + const Array GFineNodesPerDir, + const Array LFineNodesPerDir, + const Array CoarseRate, const Array MeshData); - LocalLexicographicIndexManager(const RCP > comm, const bool coupled, - const int NumDimensions, const int interpolationOrder, - const int MyRank, const int NumRanks, - const Array GFineNodesPerDir, - const Array LFineNodesPerDir, - const Array CoarseRate, const Array MeshData); + virtual ~LocalLexicographicIndexManager() {} - virtual ~LocalLexicographicIndexManager() {} + void computeGlobalCoarseParameters(); - void computeGlobalCoarseParameters(); + void getGhostedNodesData(const RCP fineMap, + Array& ghostedNodeCoarseLIDs, + Array& ghostedNodeCoarsePIDs, + Array& ghostedNodeCoarseGIDs) const; - void getGhostedNodesData(const RCP fineMap, - Array& ghostedNodeCoarseLIDs, - Array& ghostedNodeCoarsePIDs, - Array& ghostedNodeCoarseGIDs) const; + void getCoarseNodesData(const RCP fineCoordinatesMap, + Array& coarseNodeCoarseGIDs, + Array& coarseNodeFineGIDs) const; - void getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const; + std::vector > getCoarseMeshData() const; - std::vector > getCoarseMeshData() const; + void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; - void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; + void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; - void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const; - void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; - void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; + void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; - void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; + void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; - void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; - void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; + void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const; + private: + const int myRank; ///< Local rank ID. + const int numRanks; ///< Number of ranks used to decompose the problem. - private: + // Iterator delimiting the entries in meshData that correspond to the block that owns the local + // part of the mesh. + typename std::vector >::iterator myBlockStart, myBlockEnd; - const int myRank; ///< Local rank ID. - const int numRanks; ///< Number of ranks used to decompose the problem. + int pi, pj, pk; ///< Number of processors in each diretcion. - // Iterator delimiting the entries in meshData that correspond to the block that owns the local - // part of the mesh. - typename std::vector >::iterator myBlockStart, myBlockEnd; + int numBlocks; ///< Number of mesh block. + int myBlock; ///< local mesh block ID. - int pi, pj, pk; ///< Number of processors in each diretcion. + int myRankIndex; ///< local process index for record in meshData after sorting. + Array rankIndices; ///< mapping between rank ID and reordered rank ID. + std::vector > meshData; ///< layout of indices accross all processes. + std::vector > coarseMeshData; ///< layout of indices accross all processes after coarsening. - int numBlocks; ///< Number of mesh block. - int myBlock; ///< local mesh block ID. + void sortLocalLexicographicData(); - int myRankIndex; ///< local process index for record in meshData after sorting. - Array rankIndices; ///< mapping between rank ID and reordered rank ID. - std::vector > meshData; ///< layout of indices accross all processes. - std::vector > coarseMeshData; ///< layout of indices accross all processes after coarsening. + void computeCoarseLocalLexicographicData(); - void sortLocalLexicographicData(); + void getGIDLocalLexicographic(const LO iGhosted, const LO jGhosted, const LO kGhosted, + const Array coarseNodeFineIndices, GO& myGID, LO& myPID, + LO& myLID) const; +}; - void computeCoarseLocalLexicographicData(); - - void getGIDLocalLexicographic(const LO iGhosted, const LO jGhosted, const LO kGhosted, - const Array coarseNodeFineIndices, GO& myGID, LO& myPID, - LO& myLID) const; - - }; - -} //namespace MueLu +} // namespace MueLu #define MUELU_LOCALLEXICOGRAPHICINDEXMANAGER_SHORT -#endif // MUELU_LOCALLEXICOGRAPHICINDEXMANAGER_DECL_HPP +#endif // MUELU_LOCALLEXICOGRAPHICINDEXMANAGER_DECL_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_def.hpp index d3c3d8448630..ce997c4f2a80 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_def.hpp @@ -51,448 +51,418 @@ namespace MueLu { - template - LocalLexicographicIndexManager:: - LocalLexicographicIndexManager(const RCP > comm, const bool coupled, - const int NumDimensions, const int interpolationOrder, - const int MyRank, const int NumRanks, - const Array GFineNodesPerDir, const Array LFineNodesPerDir, - const Array CoarseRate, const Array MeshData) : - IndexManager(comm, coupled, false, NumDimensions, interpolationOrder, GFineNodesPerDir, LFineNodesPerDir), - myRank(MyRank), numRanks(NumRanks) { - - // Allocate data based on user input - meshData.resize(numRanks); - rankIndices.resize(numRanks); - coarseMeshData.resize(numRanks); - - // Load coarse rate, being careful about formating - for(int dim = 0; dim < 3; ++dim) { - if(dim < this->numDimensions) { - if(CoarseRate.size() == 1) { - this->coarseRate[dim] = CoarseRate[0]; - } else if(CoarseRate.size() == this->numDimensions) { - this->coarseRate[dim] = CoarseRate[dim]; - } - } else { - this->coarseRate[dim] = 1; +template +LocalLexicographicIndexManager:: + LocalLexicographicIndexManager(const RCP > comm, const bool coupled, + const int NumDimensions, const int interpolationOrder, + const int MyRank, const int NumRanks, + const Array GFineNodesPerDir, const Array LFineNodesPerDir, + const Array CoarseRate, const Array MeshData) + : IndexManager(comm, coupled, false, NumDimensions, interpolationOrder, GFineNodesPerDir, LFineNodesPerDir) + , myRank(MyRank) + , numRanks(NumRanks) { + // Allocate data based on user input + meshData.resize(numRanks); + rankIndices.resize(numRanks); + coarseMeshData.resize(numRanks); + + // Load coarse rate, being careful about formating + for (int dim = 0; dim < 3; ++dim) { + if (dim < this->numDimensions) { + if (CoarseRate.size() == 1) { + this->coarseRate[dim] = CoarseRate[0]; + } else if (CoarseRate.size() == this->numDimensions) { + this->coarseRate[dim] = CoarseRate[dim]; } + } else { + this->coarseRate[dim] = 1; } + } - // Load meshData for local lexicographic case - for(int rank = 0; rank < numRanks; ++rank) { - meshData[rank].resize(10); - for(int entry = 0; entry < 10; ++entry) { - meshData[rank][entry] = MeshData[10*rank + entry]; - } - } - - if(this->coupled_) { - myBlock = meshData[myRank][2]; - sortLocalLexicographicData(); - } - - // Start simple parameter calculation - myRankIndex = rankIndices[myRank]; - for(int dim = 0; dim < 3; ++dim) { - this->startIndices[dim] = meshData[myRankIndex][2*dim + 3]; - this->startIndices[dim + 3] = meshData[myRankIndex][2*dim + 4]; + // Load meshData for local lexicographic case + for (int rank = 0; rank < numRanks; ++rank) { + meshData[rank].resize(10); + for (int entry = 0; entry < 10; ++entry) { + meshData[rank][entry] = MeshData[10 * rank + entry]; } + } - this->computeMeshParameters(); - computeGlobalCoarseParameters(); - computeCoarseLocalLexicographicData(); - } // Constructor + if (this->coupled_) { + myBlock = meshData[myRank][2]; + sortLocalLexicographicData(); + } - template - void LocalLexicographicIndexManager:: - computeGlobalCoarseParameters() { - this->gNumCoarseNodes10 = this->gCoarseNodesPerDir[0]*this->gCoarseNodesPerDir[1]; - this->gNumCoarseNodes = this->gNumCoarseNodes10*this->gCoarseNodesPerDir[2]; + // Start simple parameter calculation + myRankIndex = rankIndices[myRank]; + for (int dim = 0; dim < 3; ++dim) { + this->startIndices[dim] = meshData[myRankIndex][2 * dim + 3]; + this->startIndices[dim + 3] = meshData[myRankIndex][2 * dim + 4]; } - template - void LocalLexicographicIndexManager:: - getGhostedNodesData(const RCP/* fineMap */, - Array& ghostedNodeCoarseLIDs, - Array& ghostedNodeCoarsePIDs, - Array& ghostedNodeCoarseGIDs) const { - - // First we allocated memory for the outputs - ghostedNodeCoarseLIDs.resize(this->getNumLocalGhostedNodes()); - ghostedNodeCoarsePIDs.resize(this->getNumLocalGhostedNodes()); - ghostedNodeCoarseGIDs.resize(this->numGhostedNodes); - - // Now the tricky part starts, the coarse nodes / ghosted coarse nodes need to be imported. - // This requires finding what their GID on the fine mesh is. They need to be ordered - // lexicographically to allow for fast sweeps through the mesh. - - // We loop over all ghosted coarse nodes by increasing global lexicographic order - Array ghostedCoarseNodeCoarseIndices(3), ghostedCoarseNodeFineIndices(3); - Array lCoarseNodeCoarseIndices(3); - Array lCoarseNodeCoarseGIDs(this->lNumCoarseNodes); - LO currentIndex = -1, countCoarseNodes = 0; - for(int k = 0; k < this->ghostedNodesPerDir[2]; ++k) { - for(int j = 0; j < this->ghostedNodesPerDir[1]; ++j) { - for(int i = 0; i < this->ghostedNodesPerDir[0]; ++i) { - currentIndex = k*this->numGhostedNodes10 + j*this->ghostedNodesPerDir[0] + i; - ghostedCoarseNodeCoarseIndices[0] = this->startGhostedCoarseNode[0] + i; - ghostedCoarseNodeFineIndices[0] = ghostedCoarseNodeCoarseIndices[0]*this->coarseRate[0]; - if(ghostedCoarseNodeFineIndices[0] > this->gFineNodesPerDir[0] - 1) { - ghostedCoarseNodeFineIndices[0] = this->gFineNodesPerDir[0] - 1; - } - ghostedCoarseNodeCoarseIndices[1] = this->startGhostedCoarseNode[1] + j; - ghostedCoarseNodeFineIndices[1] = ghostedCoarseNodeCoarseIndices[1]*this->coarseRate[1]; - if(ghostedCoarseNodeFineIndices[1] > this->gFineNodesPerDir[1] - 1) { - ghostedCoarseNodeFineIndices[1] = this->gFineNodesPerDir[1] - 1; - } - ghostedCoarseNodeCoarseIndices[2] = this->startGhostedCoarseNode[2] + k; - ghostedCoarseNodeFineIndices[2] = ghostedCoarseNodeCoarseIndices[2]*this->coarseRate[2]; - if(ghostedCoarseNodeFineIndices[2] > this->gFineNodesPerDir[2] - 1) { - ghostedCoarseNodeFineIndices[2] = this->gFineNodesPerDir[2] - 1; - } + this->computeMeshParameters(); + computeGlobalCoarseParameters(); + computeCoarseLocalLexicographicData(); +} // Constructor + +template +void LocalLexicographicIndexManager:: + computeGlobalCoarseParameters() { + this->gNumCoarseNodes10 = this->gCoarseNodesPerDir[0] * this->gCoarseNodesPerDir[1]; + this->gNumCoarseNodes = this->gNumCoarseNodes10 * this->gCoarseNodesPerDir[2]; +} + +template +void LocalLexicographicIndexManager:: + getGhostedNodesData(const RCP /* fineMap */, + Array& ghostedNodeCoarseLIDs, + Array& ghostedNodeCoarsePIDs, + Array& ghostedNodeCoarseGIDs) const { + // First we allocated memory for the outputs + ghostedNodeCoarseLIDs.resize(this->getNumLocalGhostedNodes()); + ghostedNodeCoarsePIDs.resize(this->getNumLocalGhostedNodes()); + ghostedNodeCoarseGIDs.resize(this->numGhostedNodes); + + // Now the tricky part starts, the coarse nodes / ghosted coarse nodes need to be imported. + // This requires finding what their GID on the fine mesh is. They need to be ordered + // lexicographically to allow for fast sweeps through the mesh. + + // We loop over all ghosted coarse nodes by increasing global lexicographic order + Array ghostedCoarseNodeCoarseIndices(3), ghostedCoarseNodeFineIndices(3); + Array lCoarseNodeCoarseIndices(3); + Array lCoarseNodeCoarseGIDs(this->lNumCoarseNodes); + LO currentIndex = -1, countCoarseNodes = 0; + for (int k = 0; k < this->ghostedNodesPerDir[2]; ++k) { + for (int j = 0; j < this->ghostedNodesPerDir[1]; ++j) { + for (int i = 0; i < this->ghostedNodesPerDir[0]; ++i) { + currentIndex = k * this->numGhostedNodes10 + j * this->ghostedNodesPerDir[0] + i; + ghostedCoarseNodeCoarseIndices[0] = this->startGhostedCoarseNode[0] + i; + ghostedCoarseNodeFineIndices[0] = ghostedCoarseNodeCoarseIndices[0] * this->coarseRate[0]; + if (ghostedCoarseNodeFineIndices[0] > this->gFineNodesPerDir[0] - 1) { + ghostedCoarseNodeFineIndices[0] = this->gFineNodesPerDir[0] - 1; + } + ghostedCoarseNodeCoarseIndices[1] = this->startGhostedCoarseNode[1] + j; + ghostedCoarseNodeFineIndices[1] = ghostedCoarseNodeCoarseIndices[1] * this->coarseRate[1]; + if (ghostedCoarseNodeFineIndices[1] > this->gFineNodesPerDir[1] - 1) { + ghostedCoarseNodeFineIndices[1] = this->gFineNodesPerDir[1] - 1; + } + ghostedCoarseNodeCoarseIndices[2] = this->startGhostedCoarseNode[2] + k; + ghostedCoarseNodeFineIndices[2] = ghostedCoarseNodeCoarseIndices[2] * this->coarseRate[2]; + if (ghostedCoarseNodeFineIndices[2] > this->gFineNodesPerDir[2] - 1) { + ghostedCoarseNodeFineIndices[2] = this->gFineNodesPerDir[2] - 1; + } - GO myGID = -1, myCoarseGID = -1; - LO myLID = -1, myPID = -1, myCoarseLID = -1; - getGIDLocalLexicographic(i, j, k, ghostedCoarseNodeFineIndices, myGID, myPID, myLID); + GO myGID = -1, myCoarseGID = -1; + LO myLID = -1, myPID = -1, myCoarseLID = -1; + getGIDLocalLexicographic(i, j, k, ghostedCoarseNodeFineIndices, myGID, myPID, myLID); - int rankIndex = rankIndices[myPID]; - for(int dim = 0; dim < 3; ++dim) { - if(dim < this->numDimensions) { - lCoarseNodeCoarseIndices[dim] = ghostedCoarseNodeCoarseIndices[dim] - - coarseMeshData[rankIndex][3 + 2*dim]; - } - } - LO myRankIndexCoarseNodesInDir0 = coarseMeshData[rankIndex][4] - - coarseMeshData[rankIndex][3] + 1; - LO myRankIndexCoarseNodes10 = (coarseMeshData[rankIndex][6] - - coarseMeshData[rankIndex][5] + 1) - *myRankIndexCoarseNodesInDir0; - myCoarseLID = lCoarseNodeCoarseIndices[2]*myRankIndexCoarseNodes10 - + lCoarseNodeCoarseIndices[1]*myRankIndexCoarseNodesInDir0 - + lCoarseNodeCoarseIndices[0]; - myCoarseGID = myCoarseLID + coarseMeshData[rankIndex][9]; - - ghostedNodeCoarseLIDs[currentIndex] = myCoarseLID; - ghostedNodeCoarsePIDs[currentIndex] = myPID; - ghostedNodeCoarseGIDs[currentIndex] = myCoarseGID; - - if(myPID == myRank) { - lCoarseNodeCoarseGIDs[countCoarseNodes] = myCoarseGID; - ++countCoarseNodes; + int rankIndex = rankIndices[myPID]; + for (int dim = 0; dim < 3; ++dim) { + if (dim < this->numDimensions) { + lCoarseNodeCoarseIndices[dim] = ghostedCoarseNodeCoarseIndices[dim] - coarseMeshData[rankIndex][3 + 2 * dim]; } } + LO myRankIndexCoarseNodesInDir0 = coarseMeshData[rankIndex][4] - coarseMeshData[rankIndex][3] + 1; + LO myRankIndexCoarseNodes10 = (coarseMeshData[rankIndex][6] - coarseMeshData[rankIndex][5] + 1) * myRankIndexCoarseNodesInDir0; + myCoarseLID = lCoarseNodeCoarseIndices[2] * myRankIndexCoarseNodes10 + lCoarseNodeCoarseIndices[1] * myRankIndexCoarseNodesInDir0 + lCoarseNodeCoarseIndices[0]; + myCoarseGID = myCoarseLID + coarseMeshData[rankIndex][9]; + + ghostedNodeCoarseLIDs[currentIndex] = myCoarseLID; + ghostedNodeCoarsePIDs[currentIndex] = myPID; + ghostedNodeCoarseGIDs[currentIndex] = myCoarseGID; + + if (myPID == myRank) { + lCoarseNodeCoarseGIDs[countCoarseNodes] = myCoarseGID; + ++countCoarseNodes; + } } } } - - template - void LocalLexicographicIndexManager:: - getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const { - - // Allocate sufficient storage space for outputs - coarseNodeCoarseGIDs.resize(this->getNumLocalCoarseNodes()); - coarseNodeFineGIDs.resize(this->getNumLocalCoarseNodes()); - - // Load all the GIDs on the fine mesh - ArrayView fineNodeGIDs = fineCoordinatesMap->getLocalElementList(); - - Array coarseStartIndices(3); - for(int dim = 0; dim < 3; ++dim) { - coarseStartIndices[dim] = this->coarseMeshData[myRankIndex][2*dim + 3]; - } - - // Extract the fine LIDs of the coarse nodes and store the corresponding GIDs - LO fineLID; - for(LO coarseLID = 0; coarseLID < this->getNumLocalCoarseNodes(); ++coarseLID) { - Array coarseIndices(3), fineIndices(3), gCoarseIndices(3); - this->getCoarseNodeLocalTuple(coarseLID, - coarseIndices[0], - coarseIndices[1], - coarseIndices[2]); - getCoarseNodeFineLID(coarseIndices[0],coarseIndices[1],coarseIndices[2],fineLID); - coarseNodeFineGIDs[coarseLID] = fineNodeGIDs[fineLID]; - - LO myRankIndexCoarseNodesInDir0 = coarseMeshData[myRankIndex][4] - - coarseMeshData[myRankIndex][3] + 1; - LO myRankIndexCoarseNodes10 = (coarseMeshData[myRankIndex][6] - - coarseMeshData[myRankIndex][5] + 1) - *myRankIndexCoarseNodesInDir0; - LO myCoarseLID = coarseIndices[2]*myRankIndexCoarseNodes10 - + coarseIndices[1]*myRankIndexCoarseNodesInDir0 - + coarseIndices[0]; - GO myCoarseGID = myCoarseLID + coarseMeshData[myRankIndex][9]; - coarseNodeCoarseGIDs[coarseLID] = myCoarseGID; - } - +} + +template +void LocalLexicographicIndexManager:: + getCoarseNodesData(const RCP fineCoordinatesMap, + Array& coarseNodeCoarseGIDs, + Array& coarseNodeFineGIDs) const { + // Allocate sufficient storage space for outputs + coarseNodeCoarseGIDs.resize(this->getNumLocalCoarseNodes()); + coarseNodeFineGIDs.resize(this->getNumLocalCoarseNodes()); + + // Load all the GIDs on the fine mesh + ArrayView fineNodeGIDs = fineCoordinatesMap->getLocalElementList(); + + Array coarseStartIndices(3); + for (int dim = 0; dim < 3; ++dim) { + coarseStartIndices[dim] = this->coarseMeshData[myRankIndex][2 * dim + 3]; } - template - void LocalLexicographicIndexManager:: - getGIDLocalLexicographic(const LO iGhosted, const LO jGhosted, const LO kGhosted, - const Array coarseNodeFineIndices, - GO& myGID, LO& myPID, LO& myLID) const { - - LO ni = -1, nj = -1, li = -1, lj = -1, lk = -1; - LO myRankGuess = myRankIndex; - // We try to make a logical guess as to which PID owns the current coarse node - if(iGhosted == 0 && this->ghostInterface[0]) { - --myRankGuess; - } else if((iGhosted == this->ghostedNodesPerDir[0] - 1) && this->ghostInterface[1]) { - ++myRankGuess; - } - if(jGhosted == 0 && this->ghostInterface[2]) { - myRankGuess -= pi; - } else if((jGhosted == this->ghostedNodesPerDir[1] - 1) && this->ghostInterface[3]) { - myRankGuess += pi; - } - if(kGhosted == 0 && this->ghostInterface[4]) { - myRankGuess -= pj*pi; - } else if((kGhosted == this->ghostedNodesPerDir[2] - 1) && this->ghostInterface[5]) { - myRankGuess += pj*pi; - } - if(coarseNodeFineIndices[0] >= meshData[myRankGuess][3] - && coarseNodeFineIndices[0] <= meshData[myRankGuess][4] - && coarseNodeFineIndices[1] >= meshData[myRankGuess][5] - && coarseNodeFineIndices[1] <= meshData[myRankGuess][6] - && coarseNodeFineIndices[2] >= meshData[myRankGuess][7] - && coarseNodeFineIndices[2] <= meshData[myRankGuess][8] - && myRankGuess < numRanks - 1) { - myPID = meshData[myRankGuess][0]; - ni = meshData[myRankGuess][4] - meshData[myRankGuess][3] + 1; - nj = meshData[myRankGuess][6] - meshData[myRankGuess][5] + 1; - li = coarseNodeFineIndices[0] - meshData[myRankGuess][3]; - lj = coarseNodeFineIndices[1] - meshData[myRankGuess][5]; - lk = coarseNodeFineIndices[2] - meshData[myRankGuess][7]; - myLID = lk*nj*ni + lj*ni + li; - myGID = meshData[myRankGuess][9] + myLID; - } else { // The guess failed, let us use the heavy artilery: std::find_if() - // It could be interesting to monitor how many times this branch of the code gets - // used as it is far more expensive than the above one... - auto nodeRank = std::find_if(myBlockStart, myBlockEnd, - [coarseNodeFineIndices](const std::vector& vec){ - if(coarseNodeFineIndices[0] >= vec[3] - && coarseNodeFineIndices[0] <= vec[4] - && coarseNodeFineIndices[1] >= vec[5] - && coarseNodeFineIndices[1] <= vec[6] - && coarseNodeFineIndices[2] >= vec[7] - && coarseNodeFineIndices[2] <= vec[8]) { - return true; - } else { - return false; - } - }); - myPID = (*nodeRank)[0]; - ni = (*nodeRank)[4] - (*nodeRank)[3] + 1; - nj = (*nodeRank)[6] - (*nodeRank)[5] + 1; - li = coarseNodeFineIndices[0] - (*nodeRank)[3]; - lj = coarseNodeFineIndices[1] - (*nodeRank)[5]; - lk = coarseNodeFineIndices[2] - (*nodeRank)[7]; - myLID = lk*nj*ni + lj*ni + li; - myGID = (*nodeRank)[9] + myLID; - } + // Extract the fine LIDs of the coarse nodes and store the corresponding GIDs + LO fineLID; + for (LO coarseLID = 0; coarseLID < this->getNumLocalCoarseNodes(); ++coarseLID) { + Array coarseIndices(3), fineIndices(3), gCoarseIndices(3); + this->getCoarseNodeLocalTuple(coarseLID, + coarseIndices[0], + coarseIndices[1], + coarseIndices[2]); + getCoarseNodeFineLID(coarseIndices[0], coarseIndices[1], coarseIndices[2], fineLID); + coarseNodeFineGIDs[coarseLID] = fineNodeGIDs[fineLID]; + + LO myRankIndexCoarseNodesInDir0 = coarseMeshData[myRankIndex][4] - coarseMeshData[myRankIndex][3] + 1; + LO myRankIndexCoarseNodes10 = (coarseMeshData[myRankIndex][6] - coarseMeshData[myRankIndex][5] + 1) * myRankIndexCoarseNodesInDir0; + LO myCoarseLID = coarseIndices[2] * myRankIndexCoarseNodes10 + coarseIndices[1] * myRankIndexCoarseNodesInDir0 + coarseIndices[0]; + GO myCoarseGID = myCoarseLID + coarseMeshData[myRankIndex][9]; + coarseNodeCoarseGIDs[coarseLID] = myCoarseGID; } - - template - void LocalLexicographicIndexManager:: - sortLocalLexicographicData() { - - std::sort(meshData.begin(), meshData.end(), - [](const std::vector& a, const std::vector& b)->bool { - // The below function sorts ranks by blockID, kmin, jmin and imin - if(a[2] < b[2]) { +} + +template +void LocalLexicographicIndexManager:: + getGIDLocalLexicographic(const LO iGhosted, const LO jGhosted, const LO kGhosted, + const Array coarseNodeFineIndices, + GO& myGID, LO& myPID, LO& myLID) const { + LO ni = -1, nj = -1, li = -1, lj = -1, lk = -1; + LO myRankGuess = myRankIndex; + // We try to make a logical guess as to which PID owns the current coarse node + if (iGhosted == 0 && this->ghostInterface[0]) { + --myRankGuess; + } else if ((iGhosted == this->ghostedNodesPerDir[0] - 1) && this->ghostInterface[1]) { + ++myRankGuess; + } + if (jGhosted == 0 && this->ghostInterface[2]) { + myRankGuess -= pi; + } else if ((jGhosted == this->ghostedNodesPerDir[1] - 1) && this->ghostInterface[3]) { + myRankGuess += pi; + } + if (kGhosted == 0 && this->ghostInterface[4]) { + myRankGuess -= pj * pi; + } else if ((kGhosted == this->ghostedNodesPerDir[2] - 1) && this->ghostInterface[5]) { + myRankGuess += pj * pi; + } + if (coarseNodeFineIndices[0] >= meshData[myRankGuess][3] && coarseNodeFineIndices[0] <= meshData[myRankGuess][4] && coarseNodeFineIndices[1] >= meshData[myRankGuess][5] && coarseNodeFineIndices[1] <= meshData[myRankGuess][6] && coarseNodeFineIndices[2] >= meshData[myRankGuess][7] && coarseNodeFineIndices[2] <= meshData[myRankGuess][8] && myRankGuess < numRanks - 1) { + myPID = meshData[myRankGuess][0]; + ni = meshData[myRankGuess][4] - meshData[myRankGuess][3] + 1; + nj = meshData[myRankGuess][6] - meshData[myRankGuess][5] + 1; + li = coarseNodeFineIndices[0] - meshData[myRankGuess][3]; + lj = coarseNodeFineIndices[1] - meshData[myRankGuess][5]; + lk = coarseNodeFineIndices[2] - meshData[myRankGuess][7]; + myLID = lk * nj * ni + lj * ni + li; + myGID = meshData[myRankGuess][9] + myLID; + } else { // The guess failed, let us use the heavy artilery: std::find_if() + // It could be interesting to monitor how many times this branch of the code gets + // used as it is far more expensive than the above one... + auto nodeRank = std::find_if(myBlockStart, myBlockEnd, + [coarseNodeFineIndices](const std::vector& vec) { + if (coarseNodeFineIndices[0] >= vec[3] && coarseNodeFineIndices[0] <= vec[4] && coarseNodeFineIndices[1] >= vec[5] && coarseNodeFineIndices[1] <= vec[6] && coarseNodeFineIndices[2] >= vec[7] && coarseNodeFineIndices[2] <= vec[8]) { + return true; + } else { + return false; + } + }); + myPID = (*nodeRank)[0]; + ni = (*nodeRank)[4] - (*nodeRank)[3] + 1; + nj = (*nodeRank)[6] - (*nodeRank)[5] + 1; + li = coarseNodeFineIndices[0] - (*nodeRank)[3]; + lj = coarseNodeFineIndices[1] - (*nodeRank)[5]; + lk = coarseNodeFineIndices[2] - (*nodeRank)[7]; + myLID = lk * nj * ni + lj * ni + li; + myGID = (*nodeRank)[9] + myLID; + } +} + +template +void LocalLexicographicIndexManager:: + sortLocalLexicographicData() { + std::sort(meshData.begin(), meshData.end(), + [](const std::vector& a, const std::vector& b) -> bool { + // The below function sorts ranks by blockID, kmin, jmin and imin + if (a[2] < b[2]) { + return true; + } else if (a[2] == b[2]) { + if (a[7] < b[7]) { return true; - } else if(a[2] == b[2]) { - if(a[7] < b[7]) { + } else if (a[7] == b[7]) { + if (a[5] < b[5]) { return true; - } else if(a[7] == b[7]) { - if(a[5] < b[5]) { + } else if (a[5] == b[5]) { + if (a[3] < b[3]) { return true; - } else if(a[5] == b[5]) { - if(a[3] < b[3]) {return true;} } } } - return false; - }); - - numBlocks = meshData[numRanks - 1][2] + 1; - // Find the range of the current block - myBlockStart = std::lower_bound(meshData.begin(), meshData.end(), myBlock - 1, - [] (const std::vector& vec, const GO val)->bool { - return (vec[2] < val) ? true : false; - }); - myBlockEnd = std::upper_bound(meshData.begin(), meshData.end(), myBlock, - [] (const GO val, const std::vector& vec)->bool { - return (val < vec[2]) ? true : false; + } + return false; + }); + + numBlocks = meshData[numRanks - 1][2] + 1; + // Find the range of the current block + myBlockStart = std::lower_bound(meshData.begin(), meshData.end(), myBlock - 1, + [](const std::vector& vec, const GO val) -> bool { + return (vec[2] < val) ? true : false; }); - // Assuming that i,j,k and ranges are split in pi, pj and pk processors - // we search for these numbers as they will allow us to find quickly the PID of processors - // owning ghost nodes. - auto myKEnd = std::upper_bound(myBlockStart, myBlockEnd, (*myBlockStart)[3], - [] (const GO val, const std::vector& vec)->bool { - return (val < vec[7]) ? true : false; - }); - auto myJEnd = std::upper_bound(myBlockStart, myKEnd, (*myBlockStart)[3], - [] (const GO val, const std::vector& vec)->bool { - return (val < vec[5]) ? true : false; - }); - pi = std::distance(myBlockStart, myJEnd); - pj = std::distance(myBlockStart, myKEnd) / pi; - pk = std::distance(myBlockStart, myBlockEnd) / (pj*pi); - - // We also look for the index of the local rank in the current block. - const int MyRank = myRank; - myRankIndex = std::distance(meshData.begin(), - std::find_if(myBlockStart, myBlockEnd, - [MyRank] (const std::vector& vec)->bool { - return (vec[0] == MyRank) ? true : false; - }) - ); - // We also construct a mapping of rank to rankIndex in the meshData vector, - // this will allow us to access data quickly later on. - for(int rankIndex = 0; rankIndex < numRanks; ++rankIndex) { - rankIndices[meshData[rankIndex][0]] = rankIndex; - } + myBlockEnd = std::upper_bound(meshData.begin(), meshData.end(), myBlock, + [](const GO val, const std::vector& vec) -> bool { + return (val < vec[2]) ? true : false; + }); + // Assuming that i,j,k and ranges are split in pi, pj and pk processors + // we search for these numbers as they will allow us to find quickly the PID of processors + // owning ghost nodes. + auto myKEnd = std::upper_bound(myBlockStart, myBlockEnd, (*myBlockStart)[3], + [](const GO val, const std::vector& vec) -> bool { + return (val < vec[7]) ? true : false; + }); + auto myJEnd = std::upper_bound(myBlockStart, myKEnd, (*myBlockStart)[3], + [](const GO val, const std::vector& vec) -> bool { + return (val < vec[5]) ? true : false; + }); + pi = std::distance(myBlockStart, myJEnd); + pj = std::distance(myBlockStart, myKEnd) / pi; + pk = std::distance(myBlockStart, myBlockEnd) / (pj * pi); + + // We also look for the index of the local rank in the current block. + const int MyRank = myRank; + myRankIndex = std::distance(meshData.begin(), + std::find_if(myBlockStart, myBlockEnd, + [MyRank](const std::vector& vec) -> bool { + return (vec[0] == MyRank) ? true : false; + })); + // We also construct a mapping of rank to rankIndex in the meshData vector, + // this will allow us to access data quickly later on. + for (int rankIndex = 0; rankIndex < numRanks; ++rankIndex) { + rankIndices[meshData[rankIndex][0]] = rankIndex; } - - template - void LocalLexicographicIndexManager:: - computeCoarseLocalLexicographicData() { - Array rankOffset(3); - for(int rank = 0; rank < numRanks; ++rank) { - coarseMeshData[rank].resize(10); - coarseMeshData[rank][0] = meshData[rank][0]; - coarseMeshData[rank][1] = meshData[rank][1]; - coarseMeshData[rank][2] = meshData[rank][2]; - for(int dim = 0; dim < 3; ++dim) { - coarseMeshData[rank][3 + 2*dim] = meshData[rank][3 + 2*dim] / this->coarseRate[dim]; - if(meshData[rank][3 + 2*dim] % this->coarseRate[dim] > 0) { - ++coarseMeshData[rank][3 + 2*dim]; - } - coarseMeshData[rank][3 + 2*dim + 1] = meshData[rank][3 + 2*dim + 1] / this->coarseRate[dim]; - if(meshData[rank][3 + 2*dim + 1] == this->gFineNodesPerDir[dim] - 1 && - meshData[rank][3 + 2*dim + 1] % this->coarseRate[dim] > 0) { - //this->endRate[dim] < this->coarseRate[dim]) { - ++coarseMeshData[rank][3 + 2*dim + 1]; - } +} + +template +void LocalLexicographicIndexManager:: + computeCoarseLocalLexicographicData() { + Array rankOffset(3); + for (int rank = 0; rank < numRanks; ++rank) { + coarseMeshData[rank].resize(10); + coarseMeshData[rank][0] = meshData[rank][0]; + coarseMeshData[rank][1] = meshData[rank][1]; + coarseMeshData[rank][2] = meshData[rank][2]; + for (int dim = 0; dim < 3; ++dim) { + coarseMeshData[rank][3 + 2 * dim] = meshData[rank][3 + 2 * dim] / this->coarseRate[dim]; + if (meshData[rank][3 + 2 * dim] % this->coarseRate[dim] > 0) { + ++coarseMeshData[rank][3 + 2 * dim]; } - if(rank > 0) { - coarseMeshData[rank][9] = coarseMeshData[rank - 1][9] - + (coarseMeshData[rank - 1][8] - coarseMeshData[rank - 1][7] + 1) - * (coarseMeshData[rank - 1][6] - coarseMeshData[rank - 1][5] + 1) - * (coarseMeshData[rank - 1][4] - coarseMeshData[rank - 1][3] + 1); + coarseMeshData[rank][3 + 2 * dim + 1] = meshData[rank][3 + 2 * dim + 1] / this->coarseRate[dim]; + if (meshData[rank][3 + 2 * dim + 1] == this->gFineNodesPerDir[dim] - 1 && + meshData[rank][3 + 2 * dim + 1] % this->coarseRate[dim] > 0) { + // this->endRate[dim] < this->coarseRate[dim]) { + ++coarseMeshData[rank][3 + 2 * dim + 1]; } } + if (rank > 0) { + coarseMeshData[rank][9] = coarseMeshData[rank - 1][9] + (coarseMeshData[rank - 1][8] - coarseMeshData[rank - 1][7] + 1) * (coarseMeshData[rank - 1][6] - coarseMeshData[rank - 1][5] + 1) * (coarseMeshData[rank - 1][4] - coarseMeshData[rank - 1][3] + 1); + } } - - template - std::vector > LocalLexicographicIndexManager:: - getCoarseMeshData() const {return coarseMeshData;} - - template - void LocalLexicographicIndexManager:: - getFineNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { - } - - template - void LocalLexicographicIndexManager:: - getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumFineNodes10; - tmp = myLID % this->lNumFineNodes10; - j = tmp / this->lFineNodesPerDir[0]; - i = tmp % this->lFineNodesPerDir[0]; - } - - template - void LocalLexicographicIndexManager:: - getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumFineNodes10; - tmp = myLID % this->lNumFineNodes10; - j = tmp / this->lFineNodesPerDir[0]; - i = tmp % this->lFineNodesPerDir[0]; - - k += this->offsets[2]; - j += this->offsets[1]; - i += this->offsets[0]; - } - - template - void LocalLexicographicIndexManager:: - getFineNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { - } - - template - void LocalLexicographicIndexManager:: - getFineNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } - - template - void LocalLexicographicIndexManager:: - getCoarseNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { - } - - template - void LocalLexicographicIndexManager:: - getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumCoarseNodes10; - tmp = myLID % this->lNumCoarseNodes10; - j = tmp / this->lCoarseNodesPerDir[0]; - i = tmp % this->lCoarseNodesPerDir[0]; - } - - template - void LocalLexicographicIndexManager:: - getCoarseNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { - } - - template - void LocalLexicographicIndexManager:: - getCoarseNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } - - template - void LocalLexicographicIndexManager:: - getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const { - myLID = k*this->numGhostedNodes10 + j*this->ghostedNodesPerDir[0] + i; - } - - template - void LocalLexicographicIndexManager:: - getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const { - // Assumptions: (i,j,k) is a tuple on the coarse mesh - // myLID is the corresponding local ID on the fine mesh - const LO multiplier[3] = {1, this->lFineNodesPerDir[0], this->lNumFineNodes10}; - const LO indices[3] = {i, j, k}; - - myLID = 0; - for(int dim = 0; dim < 3; ++dim) { - if((indices[dim] == this->getLocalCoarseNodesInDir(dim) - 1) && this->meshEdge[2*dim + 1]) { - // We are dealing with the last node on the mesh in direction dim - // so we can simply use the number of nodes on the fine mesh in that direction - myLID += (this->getLocalFineNodesInDir(dim) - 1)*multiplier[dim]; - } else { - myLID += (indices[dim]*this->getCoarseningRate(dim) + this->getCoarseNodeOffset(dim)) - *multiplier[dim]; - } +} + +template +std::vector > LocalLexicographicIndexManager:: + getCoarseMeshData() const { return coarseMeshData; } + +template +void LocalLexicographicIndexManager:: + getFineNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { +} + +template +void LocalLexicographicIndexManager:: + getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { + LO tmp; + k = myLID / this->lNumFineNodes10; + tmp = myLID % this->lNumFineNodes10; + j = tmp / this->lFineNodesPerDir[0]; + i = tmp % this->lFineNodesPerDir[0]; +} + +template +void LocalLexicographicIndexManager:: + getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const { + LO tmp; + k = myLID / this->lNumFineNodes10; + tmp = myLID % this->lNumFineNodes10; + j = tmp / this->lFineNodesPerDir[0]; + i = tmp % this->lFineNodesPerDir[0]; + + k += this->offsets[2]; + j += this->offsets[1]; + i += this->offsets[0]; +} + +template +void LocalLexicographicIndexManager:: + getFineNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { +} + +template +void LocalLexicographicIndexManager:: + getFineNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { +} + +template +void LocalLexicographicIndexManager:: + getCoarseNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { +} + +template +void LocalLexicographicIndexManager:: + getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { + LO tmp; + k = myLID / this->lNumCoarseNodes10; + tmp = myLID % this->lNumCoarseNodes10; + j = tmp / this->lCoarseNodesPerDir[0]; + i = tmp % this->lCoarseNodesPerDir[0]; +} + +template +void LocalLexicographicIndexManager:: + getCoarseNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { +} + +template +void LocalLexicographicIndexManager:: + getCoarseNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { +} + +template +void LocalLexicographicIndexManager:: + getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const { + myLID = k * this->numGhostedNodes10 + j * this->ghostedNodesPerDir[0] + i; +} + +template +void LocalLexicographicIndexManager:: + getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const { + // Assumptions: (i,j,k) is a tuple on the coarse mesh + // myLID is the corresponding local ID on the fine mesh + const LO multiplier[3] = {1, this->lFineNodesPerDir[0], this->lNumFineNodes10}; + const LO indices[3] = {i, j, k}; + + myLID = 0; + for (int dim = 0; dim < 3; ++dim) { + if ((indices[dim] == this->getLocalCoarseNodesInDir(dim) - 1) && this->meshEdge[2 * dim + 1]) { + // We are dealing with the last node on the mesh in direction dim + // so we can simply use the number of nodes on the fine mesh in that direction + myLID += (this->getLocalFineNodesInDir(dim) - 1) * multiplier[dim]; + } else { + myLID += (indices[dim] * this->getCoarseningRate(dim) + this->getCoarseNodeOffset(dim)) * multiplier[dim]; } } +} - template - void LocalLexicographicIndexManager:: - getGhostedNodeFineLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } +template +void LocalLexicographicIndexManager:: + getGhostedNodeFineLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { +} - template - void LocalLexicographicIndexManager:: - getGhostedNodeCoarseLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } +template +void LocalLexicographicIndexManager:: + getGhostedNodeCoarseLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { +} -} //namespace MueLu +} // namespace MueLu #endif /* MUELU_LOCALLEXICOGRAPHICINDEXMANAGER_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_decl.hpp index 4a8d987b423b..fbafcf783928 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_decl.hpp @@ -77,76 +77,73 @@ namespace MueLu { correspond to nodes. While not strictly necessary, it might be convenient. */ - template - class UncoupledIndexManager : public IndexManager { +template +class UncoupledIndexManager : public IndexManager { #undef MUELU_UNCOUPLEDINDEXMANAGER_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: + public: + // LBV: I doubt that it makes sense to have + // this particular constructor since it is + // not used anywhere and parameters cannot + // all accessible after construction. + UncoupledIndexManager() = default; - //LBV: I doubt that it makes sense to have - // this particular constructor since it is - // not used anywhere and parameters cannot - // all accessible after construction. - UncoupledIndexManager() = default; + UncoupledIndexManager(const RCP > comm, const bool coupled, + const int NumDimensions, const int interpolationOrder, + const int MyRank, const int NumRanks, + const Array GFineNodesPerDir, + const Array LFineNodesPerDir, + const Array CoarseRate, + const bool singleCoarsePoint); - UncoupledIndexManager(const RCP > comm, const bool coupled, - const int NumDimensions, const int interpolationOrder, - const int MyRank, const int NumRanks, - const Array GFineNodesPerDir, - const Array LFineNodesPerDir, - const Array CoarseRate, - const bool singleCoarsePoint); + virtual ~UncoupledIndexManager() {} - virtual ~UncoupledIndexManager() {} + void computeGlobalCoarseParameters(); - void computeGlobalCoarseParameters(); + std::vector > getCoarseMeshData() const; - std::vector > getCoarseMeshData() const; + void getGhostedNodesData(const RCP fineMap, + Array& ghostedNodeCoarseLIDs, + Array& ghostedNodeCoarsePIDs, + Array& ghostedNodeCoarseGIDs) const; - void getGhostedNodesData(const RCP fineMap, - Array& ghostedNodeCoarseLIDs, - Array& ghostedNodeCoarsePIDs, - Array& ghostedNodeCoarseGIDs) const; + void getCoarseNodesData(const RCP fineCoordinatesMap, + Array& coarseNodeCoarseGIDs, + Array& coarseNodeFineGIDs) const; - void getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const; + void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; - void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; + void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; - void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const; - void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; - void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; + void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; - void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; + void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; - void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; - void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; + void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const; + private: + const int myRank; ///< Local rank ID. + const int numRanks; ///< Number of ranks used to decompose the problem. +}; - private: - - const int myRank; ///< Local rank ID. - const int numRanks; ///< Number of ranks used to decompose the problem. - - }; - -} //namespace MueLu +} // namespace MueLu #define MUELU_UNCOUPLEDINDEXMANAGER_SHORT -#endif // MUELU_UNCOUPLEDINDEXMANAGER_DECL_HPP +#endif // MUELU_UNCOUPLEDINDEXMANAGER_DECL_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_def.hpp index efd12ebd68c9..a297eb60ecf8 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_def.hpp @@ -52,194 +52,188 @@ namespace MueLu { - template - UncoupledIndexManager:: - UncoupledIndexManager(const RCP > comm, const bool coupled, - const int NumDimensions, const int interpolationOrder, - const int MyRank, const int NumRanks, - const Array GFineNodesPerDir, const Array LFineNodesPerDir, - const Array CoarseRate, const bool singleCoarsePoint) : - IndexManager(comm, coupled, singleCoarsePoint, NumDimensions, interpolationOrder, - Array(3, -1), LFineNodesPerDir), - myRank(MyRank), numRanks(NumRanks) - { - - // Load coarse rate, being careful about formating - for(int dim = 0; dim < 3; ++dim) { - if(dim < this->numDimensions) { - if(CoarseRate.size() == 1) { - this->coarseRate[dim] = CoarseRate[0]; - } else if(CoarseRate.size() == this->numDimensions) { - this->coarseRate[dim] = CoarseRate[dim]; - } - } else { - this->coarseRate[dim] = 1; +template +UncoupledIndexManager:: + UncoupledIndexManager(const RCP > comm, const bool coupled, + const int NumDimensions, const int interpolationOrder, + const int MyRank, const int NumRanks, + const Array GFineNodesPerDir, const Array LFineNodesPerDir, + const Array CoarseRate, const bool singleCoarsePoint) + : IndexManager(comm, coupled, singleCoarsePoint, NumDimensions, interpolationOrder, + Array(3, -1), LFineNodesPerDir) + , myRank(MyRank) + , numRanks(NumRanks) { + // Load coarse rate, being careful about formating + for (int dim = 0; dim < 3; ++dim) { + if (dim < this->numDimensions) { + if (CoarseRate.size() == 1) { + this->coarseRate[dim] = CoarseRate[0]; + } else if (CoarseRate.size() == this->numDimensions) { + this->coarseRate[dim] = CoarseRate[dim]; } + } else { + this->coarseRate[dim] = 1; } + } - this->computeMeshParameters(); - this->gNumCoarseNodes10 = Teuchos::OrdinalTraits::invalid(); - this->gNumCoarseNodes = Teuchos::OrdinalTraits::invalid(); - } // Constructor - - template - void UncoupledIndexManager:: - computeGlobalCoarseParameters() { - GO input[1] = {as(this->lNumCoarseNodes)}, output[1] = {0}; - Teuchos::reduceAll(*(this->comm_), Teuchos::REDUCE_SUM, 1, input, output); - this->gNumCoarseNodes = output[0]; - } // computeGlobalCoarseParameters - - template - void UncoupledIndexManager:: - getGhostedNodesData(const RCP/* fineMap */, - Array& ghostedNodeCoarseLIDs, - Array& ghostedNodeCoarsePIDs, - Array& /* ghostedNodeCoarseGIDs */) const { - - // First we allocate memory for the outputs - ghostedNodeCoarseLIDs.resize(this->getNumLocalGhostedNodes()); - ghostedNodeCoarsePIDs.resize(this->getNumLocalGhostedNodes()); - // In the uncoupled case the data required is trivial to provide! - for(LO idx = 0; idx < this->getNumLocalGhostedNodes(); ++idx) { - ghostedNodeCoarseLIDs[idx] = idx; - ghostedNodeCoarsePIDs[idx] = myRank; - } - } // getGhostedNodesData - - template - void UncoupledIndexManager:: - getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const { - - // Allocate sufficient amount of storage in output arrays - coarseNodeCoarseGIDs.resize(this->getNumLocalCoarseNodes()); - coarseNodeFineGIDs.resize(this->getNumLocalCoarseNodes()); - - // Load all the GIDs on the fine mesh - ArrayView fineNodeGIDs = fineCoordinatesMap->getLocalElementList(); - - // Extract the fine LIDs of the coarse nodes and store the corresponding GIDs - LO fineLID; - for(LO coarseLID = 0; coarseLID < this->getNumLocalCoarseNodes(); ++coarseLID) { - Array coarseIndices(3), fineIndices(3); - this->getCoarseNodeLocalTuple(coarseLID, - coarseIndices[0], - coarseIndices[1], - coarseIndices[2]); - for(int dim = 0; dim < 3; ++dim) { - if(coarseIndices[dim] == this->lCoarseNodesPerDir[dim] - 1) { - if(this->lCoarseNodesPerDir[dim] == 1) { - fineIndices[dim] = 0; - } else { - fineIndices[dim] = this->lFineNodesPerDir[dim] - 1; - } + this->computeMeshParameters(); + this->gNumCoarseNodes10 = Teuchos::OrdinalTraits::invalid(); + this->gNumCoarseNodes = Teuchos::OrdinalTraits::invalid(); +} // Constructor + +template +void UncoupledIndexManager:: + computeGlobalCoarseParameters() { + GO input[1] = {as(this->lNumCoarseNodes)}, output[1] = {0}; + Teuchos::reduceAll(*(this->comm_), Teuchos::REDUCE_SUM, 1, input, output); + this->gNumCoarseNodes = output[0]; +} // computeGlobalCoarseParameters + +template +void UncoupledIndexManager:: + getGhostedNodesData(const RCP /* fineMap */, + Array& ghostedNodeCoarseLIDs, + Array& ghostedNodeCoarsePIDs, + Array& /* ghostedNodeCoarseGIDs */) const { + // First we allocate memory for the outputs + ghostedNodeCoarseLIDs.resize(this->getNumLocalGhostedNodes()); + ghostedNodeCoarsePIDs.resize(this->getNumLocalGhostedNodes()); + // In the uncoupled case the data required is trivial to provide! + for (LO idx = 0; idx < this->getNumLocalGhostedNodes(); ++idx) { + ghostedNodeCoarseLIDs[idx] = idx; + ghostedNodeCoarsePIDs[idx] = myRank; + } +} // getGhostedNodesData + +template +void UncoupledIndexManager:: + getCoarseNodesData(const RCP fineCoordinatesMap, + Array& coarseNodeCoarseGIDs, + Array& coarseNodeFineGIDs) const { + // Allocate sufficient amount of storage in output arrays + coarseNodeCoarseGIDs.resize(this->getNumLocalCoarseNodes()); + coarseNodeFineGIDs.resize(this->getNumLocalCoarseNodes()); + + // Load all the GIDs on the fine mesh + ArrayView fineNodeGIDs = fineCoordinatesMap->getLocalElementList(); + + // Extract the fine LIDs of the coarse nodes and store the corresponding GIDs + LO fineLID; + for (LO coarseLID = 0; coarseLID < this->getNumLocalCoarseNodes(); ++coarseLID) { + Array coarseIndices(3), fineIndices(3); + this->getCoarseNodeLocalTuple(coarseLID, + coarseIndices[0], + coarseIndices[1], + coarseIndices[2]); + for (int dim = 0; dim < 3; ++dim) { + if (coarseIndices[dim] == this->lCoarseNodesPerDir[dim] - 1) { + if (this->lCoarseNodesPerDir[dim] == 1) { + fineIndices[dim] = 0; } else { - fineIndices[dim] = coarseIndices[dim]*this->coarseRate[dim]; + fineIndices[dim] = this->lFineNodesPerDir[dim] - 1; } + } else { + fineIndices[dim] = coarseIndices[dim] * this->coarseRate[dim]; } - - fineLID = fineIndices[2]*this->lNumFineNodes10 - + fineIndices[1]*this->lFineNodesPerDir[0] - + fineIndices[0]; - coarseNodeFineGIDs[coarseLID] = fineNodeGIDs[fineLID]; - } - } // getCoarseNodesData - - template - std::vector > UncoupledIndexManager:: - getCoarseMeshData() const { - std::vector > coarseMeshData; - return coarseMeshData; - } - - template - void UncoupledIndexManager:: - getFineNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { - } - - template - void UncoupledIndexManager:: - getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumFineNodes10; - tmp = myLID % this->lNumFineNodes10; - j = tmp / this->lFineNodesPerDir[0]; - i = tmp % this->lFineNodesPerDir[0]; - } // getFineNodeLocalTuple - - template - void UncoupledIndexManager:: - getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumFineNodes10; - tmp = myLID % this->lNumFineNodes10; - j = tmp / this->lFineNodesPerDir[0]; - i = tmp % this->lFineNodesPerDir[0]; - - k += this->offsets[2]; - j += this->offsets[1]; - i += this->offsets[0]; - } // getFineNodeGhostedTuple - - template - void UncoupledIndexManager:: - getFineNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { - } - template - void UncoupledIndexManager:: - getFineNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { + fineLID = fineIndices[2] * this->lNumFineNodes10 + fineIndices[1] * this->lFineNodesPerDir[0] + fineIndices[0]; + coarseNodeFineGIDs[coarseLID] = fineNodeGIDs[fineLID]; } - - template - void UncoupledIndexManager:: - getCoarseNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { - } - - template - void UncoupledIndexManager:: - getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumCoarseNodes10; - tmp = myLID % this->lNumCoarseNodes10; - j = tmp / this->lCoarseNodesPerDir[0]; - i = tmp % this->lCoarseNodesPerDir[0]; - } // getCoarseNodeLocalTuple - - template - void UncoupledIndexManager:: - getCoarseNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { - } - - template - void UncoupledIndexManager:: - getCoarseNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } - - template - void UncoupledIndexManager:: - getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const { - myLID = k*this->numGhostedNodes10 + j*this->ghostedNodesPerDir[0] + i; - } // getCoarseNodeGhostedLID - - template - void UncoupledIndexManager:: - getCoarseNodeFineLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } - - template - void UncoupledIndexManager:: - getGhostedNodeFineLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } - - template - void UncoupledIndexManager:: - getGhostedNodeCoarseLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } - -} //namespace MueLu +} // getCoarseNodesData + +template +std::vector > UncoupledIndexManager:: + getCoarseMeshData() const { + std::vector > coarseMeshData; + return coarseMeshData; +} + +template +void UncoupledIndexManager:: + getFineNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { +} + +template +void UncoupledIndexManager:: + getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { + LO tmp; + k = myLID / this->lNumFineNodes10; + tmp = myLID % this->lNumFineNodes10; + j = tmp / this->lFineNodesPerDir[0]; + i = tmp % this->lFineNodesPerDir[0]; +} // getFineNodeLocalTuple + +template +void UncoupledIndexManager:: + getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const { + LO tmp; + k = myLID / this->lNumFineNodes10; + tmp = myLID % this->lNumFineNodes10; + j = tmp / this->lFineNodesPerDir[0]; + i = tmp % this->lFineNodesPerDir[0]; + + k += this->offsets[2]; + j += this->offsets[1]; + i += this->offsets[0]; +} // getFineNodeGhostedTuple + +template +void UncoupledIndexManager:: + getFineNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { +} + +template +void UncoupledIndexManager:: + getFineNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { +} + +template +void UncoupledIndexManager:: + getCoarseNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { +} + +template +void UncoupledIndexManager:: + getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { + LO tmp; + k = myLID / this->lNumCoarseNodes10; + tmp = myLID % this->lNumCoarseNodes10; + j = tmp / this->lCoarseNodesPerDir[0]; + i = tmp % this->lCoarseNodesPerDir[0]; +} // getCoarseNodeLocalTuple + +template +void UncoupledIndexManager:: + getCoarseNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { +} + +template +void UncoupledIndexManager:: + getCoarseNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { +} + +template +void UncoupledIndexManager:: + getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const { + myLID = k * this->numGhostedNodes10 + j * this->ghostedNodesPerDir[0] + i; +} // getCoarseNodeGhostedLID + +template +void UncoupledIndexManager:: + getCoarseNodeFineLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { +} + +template +void UncoupledIndexManager:: + getGhostedNodeFineLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { +} + +template +void UncoupledIndexManager:: + getGhostedNodeCoarseLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { +} + +} // namespace MueLu #endif /* MUELU_UNCOUPLEDINDEXMANAGER_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_decl.hpp index aacc182dc4b3..da0151d090ea 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_decl.hpp @@ -55,75 +55,71 @@ #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! - @class AggregationPhase1Algorithm class. - @brief Algorithm for coarsening a graph with uncoupled aggregation. - - @ingroup Aggregation - - ### Idea ### - Phase 1 tries to build new aggregates which fulfill the user chosen aggregation - criteria (i.e. minimum and maximum size of aggregates). Especially the chosen - ordering for the input nodes may have some influence on the final aggregates. - Phase 1 is the most important aggregation routine for building new aggregates. - - ### Parameters ### - Parameter | Meaning - ----------|-------- - aggregation: ordering | Ordering of graph nodes in which the nodes are processed for aggregation. The options are natural, random and graph. - aggregation: max selected neighbors | Maximum number of neighbor nodes which have already been added to aggregates. - aggregation: min agg size | minimum number of nodes which have to be in an aggregate. - aggregation: max agg size | maximum allowed number of nodes in an aggregate - - ### Comments ### - Only nodes with state READY are changed to AGGREGATED. Nodes with other states are not touched. - */ - - template - class AggregationPhase1Algorithm : - public MueLu::AggregationAlgorithmBase { +/*! + @class AggregationPhase1Algorithm class. + @brief Algorithm for coarsening a graph with uncoupled aggregation. + + @ingroup Aggregation + + ### Idea ### + Phase 1 tries to build new aggregates which fulfill the user chosen aggregation + criteria (i.e. minimum and maximum size of aggregates). Especially the chosen + ordering for the input nodes may have some influence on the final aggregates. + Phase 1 is the most important aggregation routine for building new aggregates. + + ### Parameters ### + Parameter | Meaning + ----------|-------- + aggregation: ordering | Ordering of graph nodes in which the nodes are processed for aggregation. The options are natural, random and graph. + aggregation: max selected neighbors | Maximum number of neighbor nodes which have already been added to aggregates. + aggregation: min agg size | minimum number of nodes which have to be in an aggregate. + aggregation: max agg size | maximum allowed number of nodes in an aggregate + + ### Comments ### + Only nodes with state READY are changed to AGGREGATED. Nodes with other states are not touched. +*/ + +template +class AggregationPhase1Algorithm : public MueLu::AggregationAlgorithmBase { #undef MUELU_AGGREGATIONPHASE1ALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - AggregationPhase1Algorithm(const RCP& /* graphFact */ = Teuchos::null) { } + public: + //! @name Constructors/Destructors. + //@{ - //! Destructor. - virtual ~AggregationPhase1Algorithm() { } + //! Constructor. + AggregationPhase1Algorithm(const RCP& /* graphFact */ = Teuchos::null) {} - //@} + //! Destructor. + virtual ~AggregationPhase1Algorithm() {} + //@} - //! @name Aggregation methods. - //@{ + //! @name Aggregation methods. + //@{ - /*! @brief Local aggregation. */ + /*! @brief Local aggregation. */ - void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; + //@} - std::string description() const { return "Phase 1 (main)"; } + std::string description() const { return "Phase 1 (main)"; } - private: - - /*! @brief Utility to take a list of integers and reorder them randomly (by using a local permutation). - @param list On input, a bunch of integers. On output, the same integers in a different order - that is determined randomly. - */ - void RandomReorder(ArrayRCP list) const; - - /*! @brief Generate a random number in the range [min, max] */ - int RandomOrdinal(int min, int max) const; + private: + /*! @brief Utility to take a list of integers and reorder them randomly (by using a local permutation). + @param list On input, a bunch of integers. On output, the same integers in a different order + that is determined randomly. + */ + void RandomReorder(ArrayRCP list) const; - }; + /*! @brief Generate a random number in the range [min, max] */ + int RandomOrdinal(int min, int max) const; +}; -} //namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONPHASE1ALGORITHM_SHORT #endif /* MUELU_AGGREGATIONPHASE1ALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_def.hpp index 958e5b8d0930..6367fe104516 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_def.hpp @@ -62,189 +62,188 @@ namespace MueLu { - template - void AggregationPhase1Algorithm:: - BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - - std::string orderingStr = params.get("aggregation: ordering"); - int maxNeighAlreadySelected = params.get ("aggregation: max selected neighbors"); - int minNodesPerAggregate = params.get ("aggregation: min agg size"); - int maxNodesPerAggregate = params.get ("aggregation: max agg size"); - bool matchMLBehavior = params.get("aggregation: match ML phase1"); - - TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate, Exceptions::RuntimeError, - "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!"); - - enum { - O_NATURAL, - O_RANDOM, - O_GRAPH - } ordering; - ordering = O_NATURAL; // initialize variable (fix CID 143665) - if (orderingStr == "natural") ordering = O_NATURAL; - if (orderingStr == "random" ) ordering = O_RANDOM; - if (orderingStr == "graph" ) ordering = O_GRAPH; - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); - - LO numLocalAggregates = aggregates.GetNumAggregates(); - - ArrayRCP randomVector; - if (ordering == O_RANDOM) { - randomVector = arcp(numRows); - for (LO i = 0; i < numRows; i++) - randomVector[i] = i; - RandomReorder(randomVector); - } +template +void AggregationPhase1Algorithm:: + BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, + LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + + std::string orderingStr = params.get("aggregation: ordering"); + int maxNeighAlreadySelected = params.get("aggregation: max selected neighbors"); + int minNodesPerAggregate = params.get("aggregation: min agg size"); + int maxNodesPerAggregate = params.get("aggregation: max agg size"); + bool matchMLBehavior = params.get("aggregation: match ML phase1"); + + TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate, Exceptions::RuntimeError, + "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!"); + + enum { + O_NATURAL, + O_RANDOM, + O_GRAPH + } ordering; + ordering = O_NATURAL; // initialize variable (fix CID 143665) + if (orderingStr == "natural") ordering = O_NATURAL; + if (orderingStr == "random") ordering = O_RANDOM; + if (orderingStr == "graph") ordering = O_GRAPH; + + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); + + LO numLocalAggregates = aggregates.GetNumAggregates(); + + ArrayRCP randomVector; + if (ordering == O_RANDOM) { + randomVector = arcp(numRows); + for (LO i = 0; i < numRows; i++) + randomVector[i] = i; + RandomReorder(randomVector); + } - int aggIndex = -1; - size_t aggSize = 0; - std::vector aggList(graph.getLocalMaxNumRowEntries()); - - std::queue graphOrderQueue; - - // Main loop over all local rows of graph(A) - for (LO i = 0; i < numRows; i++) { - // Step 1: pick the next node to aggregate - LO rootCandidate = 0; - if (ordering == O_NATURAL) rootCandidate = i; - else if (ordering == O_RANDOM) rootCandidate = randomVector[i]; - else if (ordering == O_GRAPH) { - - if (graphOrderQueue.size() == 0) { - // Current queue is empty for "graph" ordering, populate with one READY node - for (LO jnode = 0; jnode < numRows; jnode++) - if (aggStat[jnode] == READY) { - graphOrderQueue.push(jnode); - break; - } - } - if (graphOrderQueue.size() == 0) { - // There are no more ready nodes, end the phase - break; - } - rootCandidate = graphOrderQueue.front(); // take next node from graph ordering queue - graphOrderQueue.pop(); // delete this node in list + int aggIndex = -1; + size_t aggSize = 0; + std::vector aggList(graph.getLocalMaxNumRowEntries()); + + std::queue graphOrderQueue; + + // Main loop over all local rows of graph(A) + for (LO i = 0; i < numRows; i++) { + // Step 1: pick the next node to aggregate + LO rootCandidate = 0; + if (ordering == O_NATURAL) + rootCandidate = i; + else if (ordering == O_RANDOM) + rootCandidate = randomVector[i]; + else if (ordering == O_GRAPH) { + if (graphOrderQueue.size() == 0) { + // Current queue is empty for "graph" ordering, populate with one READY node + for (LO jnode = 0; jnode < numRows; jnode++) + if (aggStat[jnode] == READY) { + graphOrderQueue.push(jnode); + break; + } + } + if (graphOrderQueue.size() == 0) { + // There are no more ready nodes, end the phase + break; } + rootCandidate = graphOrderQueue.front(); // take next node from graph ordering queue + graphOrderQueue.pop(); // delete this node in list + } - if (aggStat[rootCandidate] != READY) - continue; + if (aggStat[rootCandidate] != READY) + continue; - // Step 2: build tentative aggregate - aggSize = 0; - aggList[aggSize++] = rootCandidate; + // Step 2: build tentative aggregate + aggSize = 0; + aggList[aggSize++] = rootCandidate; - ArrayView neighOfINode = graph.getNeighborVertices(rootCandidate); + ArrayView neighOfINode = graph.getNeighborVertices(rootCandidate); - // If the number of neighbors is less than the minimum number of nodes - // per aggregate, we know this is not going to be a valid root, and we - // may skip it, but only for "natural" and "random" (for "graph" we still - // need to fetch the list of local neighbors to continue) - if ((ordering == O_NATURAL || ordering == O_RANDOM) && - neighOfINode.size() < minNodesPerAggregate) { - continue; - } + // If the number of neighbors is less than the minimum number of nodes + // per aggregate, we know this is not going to be a valid root, and we + // may skip it, but only for "natural" and "random" (for "graph" we still + // need to fetch the list of local neighbors to continue) + if ((ordering == O_NATURAL || ordering == O_RANDOM) && + neighOfINode.size() < minNodesPerAggregate) { + continue; + } - LO numAggregatedNeighbours = 0; - - for (int j = 0; j < neighOfINode.size(); j++) { - LO neigh = neighOfINode[j]; - - if (neigh != rootCandidate && graph.isLocalNeighborVertex(neigh)) { - - if (aggStat[neigh] == READY || aggStat[neigh] == NOTSEL) { - // If aggregate size does not exceed max size, add node to the - // tentative aggregate - // NOTE: We do not exit the loop over all neighbours since we have - // still to count all aggregated neighbour nodes for the - // aggregation criteria - // NOTE: We check here for the maximum aggregation size. If we - // would do it below with all the other check too big aggregates - // would not be accepted at all. - if (aggSize < as(maxNodesPerAggregate)) - aggList[aggSize++] = neigh; - - } else if(!matchMLBehavior || aggStat[neigh] != IGNORED) { - // NOTE: ML checks against BOUNDARY here, but boundary nodes are flagged as IGNORED by - // the time we get to Phase 1, so we check IGNORED instead - numAggregatedNeighbours++; - } + LO numAggregatedNeighbours = 0; + + for (int j = 0; j < neighOfINode.size(); j++) { + LO neigh = neighOfINode[j]; + + if (neigh != rootCandidate && graph.isLocalNeighborVertex(neigh)) { + if (aggStat[neigh] == READY || aggStat[neigh] == NOTSEL) { + // If aggregate size does not exceed max size, add node to the + // tentative aggregate + // NOTE: We do not exit the loop over all neighbours since we have + // still to count all aggregated neighbour nodes for the + // aggregation criteria + // NOTE: We check here for the maximum aggregation size. If we + // would do it below with all the other check too big aggregates + // would not be accepted at all. + if (aggSize < as(maxNodesPerAggregate)) + aggList[aggSize++] = neigh; + + } else if (!matchMLBehavior || aggStat[neigh] != IGNORED) { + // NOTE: ML checks against BOUNDARY here, but boundary nodes are flagged as IGNORED by + // the time we get to Phase 1, so we check IGNORED instead + numAggregatedNeighbours++; } } + } - // Step 3: check if tentative aggregate is acceptable - if ((numAggregatedNeighbours <= maxNeighAlreadySelected) && // too many connections to other aggregates - (aggSize >= as(minNodesPerAggregate))) { // too few nodes in the tentative aggregate - // Accept new aggregate - // rootCandidate becomes the root of the newly formed aggregate - aggregates.SetIsRoot(rootCandidate); - aggIndex = numLocalAggregates++; - - for (size_t k = 0; k < aggSize; k++) { - aggStat [aggList[k]] = AGGREGATED; - vertex2AggId[aggList[k]] = aggIndex; - procWinner [aggList[k]] = myRank; - } + // Step 3: check if tentative aggregate is acceptable + if ((numAggregatedNeighbours <= maxNeighAlreadySelected) && // too many connections to other aggregates + (aggSize >= as(minNodesPerAggregate))) { // too few nodes in the tentative aggregate + // Accept new aggregate + // rootCandidate becomes the root of the newly formed aggregate + aggregates.SetIsRoot(rootCandidate); + aggIndex = numLocalAggregates++; + + for (size_t k = 0; k < aggSize; k++) { + aggStat[aggList[k]] = AGGREGATED; + vertex2AggId[aggList[k]] = aggIndex; + procWinner[aggList[k]] = myRank; + } - numNonAggregatedNodes -= aggSize; + numNonAggregatedNodes -= aggSize; - } else { - // Aggregate is not accepted - aggStat[rootCandidate] = NOTSEL; + } else { + // Aggregate is not accepted + aggStat[rootCandidate] = NOTSEL; - // Need this for the "graph" ordering below - // The original candidate is always aggList[0] - aggSize = 1; - } + // Need this for the "graph" ordering below + // The original candidate is always aggList[0] + aggSize = 1; + } - if (ordering == O_GRAPH) { - // Add candidates to the list of nodes - // NOTE: the code have slightly different meanings depending on context: - // - if aggregate was accepted, we add neighbors of neighbors of the original candidate - // - if aggregate was not accepted, we add neighbors of the original candidate - for (size_t k = 0; k < aggSize; k++) { - ArrayView neighOfJNode = graph.getNeighborVertices(aggList[k]); + if (ordering == O_GRAPH) { + // Add candidates to the list of nodes + // NOTE: the code have slightly different meanings depending on context: + // - if aggregate was accepted, we add neighbors of neighbors of the original candidate + // - if aggregate was not accepted, we add neighbors of the original candidate + for (size_t k = 0; k < aggSize; k++) { + ArrayView neighOfJNode = graph.getNeighborVertices(aggList[k]); - for (int j = 0; j < neighOfJNode.size(); j++) { - LO neigh = neighOfJNode[j]; + for (int j = 0; j < neighOfJNode.size(); j++) { + LO neigh = neighOfJNode[j]; - if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) - graphOrderQueue.push(neigh); - } + if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) + graphOrderQueue.push(neigh); } } } - - // Reset all NOTSEL vertices to READY - // This simplifies other algorithms - for (LO i = 0; i < numRows; i++) - if (aggStat[i] == NOTSEL) - aggStat[i] = READY; - - // update aggregate object - aggregates.SetNumAggregates(numLocalAggregates); - } - - template - void AggregationPhase1Algorithm::RandomReorder(ArrayRCP list) const { - //TODO: replace int - int n = list.size(); - for(int i = 0; i < n-1; i++) - std::swap(list[i], list[RandomOrdinal(i,n-1)]); } - template - int AggregationPhase1Algorithm::RandomOrdinal(int min, int max) const { - return min + as((max-min+1) * (static_cast(std::rand()) / (RAND_MAX + 1.0))); - } - -} // end namespace - + // Reset all NOTSEL vertices to READY + // This simplifies other algorithms + for (LO i = 0; i < numRows; i++) + if (aggStat[i] == NOTSEL) + aggStat[i] = READY; + + // update aggregate object + aggregates.SetNumAggregates(numLocalAggregates); +} + +template +void AggregationPhase1Algorithm::RandomReorder(ArrayRCP list) const { + // TODO: replace int + int n = list.size(); + for (int i = 0; i < n - 1; i++) + std::swap(list[i], list[RandomOrdinal(i, n - 1)]); +} + +template +int AggregationPhase1Algorithm::RandomOrdinal(int min, int max) const { + return min + as((max - min + 1) * (static_cast(std::rand()) / (RAND_MAX + 1.0))); +} + +} // namespace MueLu #endif /* MUELU_AGGREGATIONPHASE1ALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_decl.hpp index 295f9d927e43..dd7e9579b8b6 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_decl.hpp @@ -58,84 +58,81 @@ #include "MueLu_LWGraph_kokkos.hpp" namespace MueLu { - /*! - @class AggregationPhase1Algorithm class. - @brief Algorithm for coarsening a graph with uncoupled aggregation. - - @ingroup Aggregation - - ### Idea ### - Phase 1 tries to build new aggregates which fulfill the user chosen aggregation - criteria (i.e. minimum and maximum size of aggregates). Especially the chosen - ordering for the input nodes may have some influence on the final aggregates. - Phase 1 is the most important aggregation routine for building new aggregates. - - ### Parameters ### - Parameter | Meaning - ----------|-------- - aggregation: ordering | Ordering of graph nodes in which the nodes are processed for aggregation. The options are natural, random and graph. - aggregation: max selected neighbors | Maximum number of neighbor nodes which have already been added to aggregates. - aggregation: min agg size | minimum number of nodes which have to be in an aggregate. - aggregation: max agg size | maximum allowed number of nodes in an aggregate - - ### Comments ### - Only nodes with state READY are changed to AGGREGATED. Nodes with other states are not touched. - */ - - template - class AggregationPhase1Algorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +/*! + @class AggregationPhase1Algorithm class. + @brief Algorithm for coarsening a graph with uncoupled aggregation. + + @ingroup Aggregation + + ### Idea ### + Phase 1 tries to build new aggregates which fulfill the user chosen aggregation + criteria (i.e. minimum and maximum size of aggregates). Especially the chosen + ordering for the input nodes may have some influence on the final aggregates. + Phase 1 is the most important aggregation routine for building new aggregates. + + ### Parameters ### + Parameter | Meaning + ----------|-------- + aggregation: ordering | Ordering of graph nodes in which the nodes are processed for aggregation. The options are natural, random and graph. + aggregation: max selected neighbors | Maximum number of neighbor nodes which have already been added to aggregates. + aggregation: min agg size | minimum number of nodes which have to be in an aggregate. + aggregation: max agg size | maximum allowed number of nodes in an aggregate + + ### Comments ### + Only nodes with state READY are changed to AGGREGATED. Nodes with other states are not touched. +*/ + +template +class AggregationPhase1Algorithm_kokkos : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using execution_space = typename LWGraph_kokkos::execution_space; - using memory_space = typename LWGraph_kokkos::memory_space; + public: + using device_type = typename LWGraph_kokkos::device_type; + using execution_space = typename LWGraph_kokkos::execution_space; + using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase1Algorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase1Algorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase1Algorithm_kokkos() { } + //! Destructor. + virtual ~AggregationPhase1Algorithm_kokkos() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const Teuchos::ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; - void BuildAggregates(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesRandom(const LO maxAggSize, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; - void BuildAggregatesRandom(const LO maxAggSize, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesDeterministic(const LO maxAggSize, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; + //@} - void BuildAggregatesDeterministic(const LO maxAggSize, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 1 (main)"; } +}; - std::string description() const { return "Phase 1 (main)"; } - - }; - -} //namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_SHORT -#endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DECL_HPP +#endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_def.hpp index 82e837c0ef41..d69a41b93193 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_def.hpp @@ -66,221 +66,217 @@ namespace MueLu { - template - void AggregationPhase1Algorithm_kokkos:: - BuildAggregates(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { +template +void AggregationPhase1Algorithm_kokkos:: + BuildAggregates(const Teuchos::ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + int minNodesPerAggregate = params.get("aggregation: min agg size"); + int maxNodesPerAggregate = params.get("aggregation: max agg size"); - int minNodesPerAggregate = params.get ("aggregation: min agg size"); - int maxNodesPerAggregate = params.get ("aggregation: max agg size"); + TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate, + Exceptions::RuntimeError, + "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!"); - TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate, - Exceptions::RuntimeError, - "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!"); - - // Distance-2 gives less control than serial uncoupled phase 1 - // no custom row reordering because would require making deep copy - // of local matrix entries and permuting it can only enforce - // max aggregate size - { - if(params.get("aggregation: deterministic")) - { - Monitor m(*this, "BuildAggregatesDeterministic"); - BuildAggregatesDeterministic(maxNodesPerAggregate, graph, - aggregates, aggStat, numNonAggregatedNodes); - } else { - Monitor m(*this, "BuildAggregatesRandom"); - BuildAggregatesRandom(maxNodesPerAggregate, graph, - aggregates, aggStat, numNonAggregatedNodes); - } + // Distance-2 gives less control than serial uncoupled phase 1 + // no custom row reordering because would require making deep copy + // of local matrix entries and permuting it can only enforce + // max aggregate size + { + if (params.get("aggregation: deterministic")) { + Monitor m(*this, "BuildAggregatesDeterministic"); + BuildAggregatesDeterministic(maxNodesPerAggregate, graph, + aggregates, aggStat, numNonAggregatedNodes); + } else { + Monitor m(*this, "BuildAggregatesRandom"); + BuildAggregatesRandom(maxNodesPerAggregate, graph, + aggregates, aggStat, numNonAggregatedNodes); } } +} - template - void AggregationPhase1Algorithm_kokkos:: - BuildAggregatesRandom(const LO maxAggSize, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const - { - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - // Extract data from aggregates - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); +template +void AggregationPhase1Algorithm_kokkos:: + BuildAggregatesRandom(const LO maxAggSize, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); - auto lclLWGraph = graph.getLocalLWGraph(); - - LO numAggregatedNodes = 0; - LO numLocalAggregates = aggregates.GetNumAggregates(); - Kokkos::View aggCount("aggCount"); - Kokkos::deep_copy(aggCount, numLocalAggregates); - Kokkos::parallel_for("Aggregation Phase 1: initial reduction over color == 1", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const LO nodeIdx) { - if(colors(nodeIdx) == 1 && aggStat(nodeIdx) == READY) { - const LO aggIdx = Kokkos::atomic_fetch_add (&aggCount(), 1); - vertex2AggId(nodeIdx, 0) = aggIdx; - aggStat(nodeIdx) = AGGREGATED; - procWinner(nodeIdx, 0) = myRank; - } - }); - // Truely we wish to compute: numAggregatedNodes = aggCount - numLocalAggregates - // before updating the value of numLocalAggregates. - // But since we also do not want to create a host mirror of aggCount we do some trickery... - numAggregatedNodes -= numLocalAggregates; - Kokkos::deep_copy(numLocalAggregates, aggCount); - numAggregatedNodes += numLocalAggregates; - - // Compute the initial size of the aggregates. - // Note lbv 12-21-17: I am pretty sure that the aggregates will always be of size 1 - // at this point so we could simplify the code below a lot if this - // assumption is correct... - Kokkos::View aggSizesView("aggSizes", numLocalAggregates); - { - // Here there is a possibility that two vertices assigned to two different threads contribute - // to the same aggregate if somethings happened before phase 1? - auto aggSizesScatterView = Kokkos::Experimental::create_scatter_view(aggSizesView); - Kokkos::parallel_for("Aggregation Phase 1: compute initial aggregates size", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const LO nodeIdx) { - auto aggSizesScatterViewAccess = aggSizesScatterView.access(); - if(vertex2AggId(nodeIdx, 0) >= 0) - aggSizesScatterViewAccess(vertex2AggId(nodeIdx, 0)) += 1; - }); - Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView); - } + // Extract data from aggregates + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); - LO tmpNumAggregatedNodes = 0; - Kokkos::parallel_reduce("Aggregation Phase 1: main parallel_reduce over aggSizes", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const size_t nodeIdx, LO & lNumAggregatedNodes) { - if(colors(nodeIdx) != 1 - && (aggStat(nodeIdx) == READY || aggStat(nodeIdx) == NOTSEL)) { - // Get neighbors of vertex i and look for local, aggregated, - // color 1 neighbor (valid root). - auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx); - for(LO j = 0; j < neighbors.length; ++j) { - auto nei = neighbors.colidx(j); - if(lclLWGraph.isLocalNeighborVertex(nei) && colors(nei) == 1 - && aggStat(nei) == AGGREGATED) { + auto lclLWGraph = graph.getLocalLWGraph(); - // This atomic guarentees that any other node trying to - // join aggregate agg has the correct size. - LO agg = vertex2AggId(nei, 0); - const LO aggSize = Kokkos::atomic_fetch_add (&aggSizesView(agg), - 1); - if(aggSize < maxAggSize) { - //assign vertex i to aggregate with root j - vertex2AggId(nodeIdx, 0) = agg; - procWinner(nodeIdx, 0) = myRank; - aggStat(nodeIdx) = AGGREGATED; - ++lNumAggregatedNodes; - break; - } else { - // Decrement back the value of aggSizesView(agg) - Kokkos::atomic_decrement(&aggSizesView(agg)); - } - } - } - } - // if(aggStat(nodeIdx) != AGGREGATED) { - // lNumNonAggregatedNodes++; - if(aggStat(nodeIdx) == NOTSEL) { aggStat(nodeIdx) = READY; } - // } - }, tmpNumAggregatedNodes); - numAggregatedNodes += tmpNumAggregatedNodes; - numNonAggregatedNodes -= numAggregatedNodes; + LO numAggregatedNodes = 0; + LO numLocalAggregates = aggregates.GetNumAggregates(); + Kokkos::View aggCount("aggCount"); + Kokkos::deep_copy(aggCount, numLocalAggregates); + Kokkos::parallel_for( + "Aggregation Phase 1: initial reduction over color == 1", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO nodeIdx) { + if (colors(nodeIdx) == 1 && aggStat(nodeIdx) == READY) { + const LO aggIdx = Kokkos::atomic_fetch_add(&aggCount(), 1); + vertex2AggId(nodeIdx, 0) = aggIdx; + aggStat(nodeIdx) = AGGREGATED; + procWinner(nodeIdx, 0) = myRank; + } + }); + // Truely we wish to compute: numAggregatedNodes = aggCount - numLocalAggregates + // before updating the value of numLocalAggregates. + // But since we also do not want to create a host mirror of aggCount we do some trickery... + numAggregatedNodes -= numLocalAggregates; + Kokkos::deep_copy(numLocalAggregates, aggCount); + numAggregatedNodes += numLocalAggregates; - // update aggregate object - aggregates.SetNumAggregates(numLocalAggregates); + // Compute the initial size of the aggregates. + // Note lbv 12-21-17: I am pretty sure that the aggregates will always be of size 1 + // at this point so we could simplify the code below a lot if this + // assumption is correct... + Kokkos::View aggSizesView("aggSizes", numLocalAggregates); + { + // Here there is a possibility that two vertices assigned to two different threads contribute + // to the same aggregate if somethings happened before phase 1? + auto aggSizesScatterView = Kokkos::Experimental::create_scatter_view(aggSizesView); + Kokkos::parallel_for( + "Aggregation Phase 1: compute initial aggregates size", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO nodeIdx) { + auto aggSizesScatterViewAccess = aggSizesScatterView.access(); + if (vertex2AggId(nodeIdx, 0) >= 0) + aggSizesScatterViewAccess(vertex2AggId(nodeIdx, 0)) += 1; + }); + Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView); } - template - void AggregationPhase1Algorithm_kokkos:: - BuildAggregatesDeterministic(const LO maxAggSize, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const - { - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); + LO tmpNumAggregatedNodes = 0; + Kokkos::parallel_reduce( + "Aggregation Phase 1: main parallel_reduce over aggSizes", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const size_t nodeIdx, LO& lNumAggregatedNodes) { + if (colors(nodeIdx) != 1 && (aggStat(nodeIdx) == READY || aggStat(nodeIdx) == NOTSEL)) { + // Get neighbors of vertex i and look for local, aggregated, + // color 1 neighbor (valid root). + auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx); + for (LO j = 0; j < neighbors.length; ++j) { + auto nei = neighbors.colidx(j); + if (lclLWGraph.isLocalNeighborVertex(nei) && colors(nei) == 1 && aggStat(nei) == AGGREGATED) { + // This atomic guarentees that any other node trying to + // join aggregate agg has the correct size. + LO agg = vertex2AggId(nei, 0); + const LO aggSize = Kokkos::atomic_fetch_add(&aggSizesView(agg), + 1); + if (aggSize < maxAggSize) { + // assign vertex i to aggregate with root j + vertex2AggId(nodeIdx, 0) = agg; + procWinner(nodeIdx, 0) = myRank; + aggStat(nodeIdx) = AGGREGATED; + ++lNumAggregatedNodes; + break; + } else { + // Decrement back the value of aggSizesView(agg) + Kokkos::atomic_decrement(&aggSizesView(agg)); + } + } + } + } + // if(aggStat(nodeIdx) != AGGREGATED) { + // lNumNonAggregatedNodes++; + if (aggStat(nodeIdx) == NOTSEL) { + aggStat(nodeIdx) = READY; + } + // } + }, + tmpNumAggregatedNodes); + numAggregatedNodes += tmpNumAggregatedNodes; + numNonAggregatedNodes -= numAggregatedNodes; - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); + // update aggregate object + aggregates.SetNumAggregates(numLocalAggregates); +} - auto lclLWGraph = graph.getLocalLWGraph(); +template +void AggregationPhase1Algorithm_kokkos:: + BuildAggregatesDeterministic(const LO maxAggSize, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); - LO numLocalAggregates = aggregates.GetNumAggregates(); - Kokkos::View numLocalAggregatesView("Num aggregates"); - { - auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView); - h_nla() = numLocalAggregates; - Kokkos::deep_copy(numLocalAggregatesView, h_nla); - } + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); - Kokkos::View newRoots("New root LIDs", numNonAggregatedNodes); - Kokkos::View numNewRoots("Number of new aggregates of current color"); - auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots); + auto lclLWGraph = graph.getLocalLWGraph(); - //first loop build the set of new roots - Kokkos::parallel_for("Aggregation Phase 1: building list of new roots", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA(const LO i) - { - if(colors(i) == 1 && aggStat(i) == READY) - { - //i will become a root - newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i; - } - }); - Kokkos::deep_copy(h_numNewRoots, numNewRoots); - //sort new roots by LID to guarantee determinism in agg IDs - Kokkos::sort(newRoots, 0, h_numNewRoots()); - LO numAggregated = 0; - Kokkos::parallel_reduce("Aggregation Phase 1: aggregating nodes", - Kokkos::RangePolicy(0, h_numNewRoots()), - KOKKOS_LAMBDA(const LO rootIndex, LO& lnumAggregated) - { - LO root = newRoots(rootIndex); - LO aggID = numLocalAggregatesView() + rootIndex; - LO aggSize = 1; - vertex2AggId(root, 0) = aggID; - procWinner(root, 0) = myRank; - aggStat(root) = AGGREGATED; - auto neighOfRoot = lclLWGraph.getNeighborVertices(root); - for(LO n = 0; n < neighOfRoot.length; n++) - { - LO neigh = neighOfRoot(n); - if (lclLWGraph.isLocalNeighborVertex(neigh) && aggStat(neigh) == READY) - { - //add neigh to aggregate - vertex2AggId(neigh, 0) = aggID; - procWinner(neigh, 0) = myRank; - aggStat(neigh) = AGGREGATED; - aggSize++; - if(aggSize == maxAggSize) - { - //can't add any more nodes - break; - } - } - } - lnumAggregated += aggSize; - }, numAggregated); - numNonAggregatedNodes -= numAggregated; - // update aggregate object - aggregates.SetNumAggregates(numLocalAggregates + h_numNewRoots()); + LO numLocalAggregates = aggregates.GetNumAggregates(); + Kokkos::View numLocalAggregatesView("Num aggregates"); + { + auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView); + h_nla() = numLocalAggregates; + Kokkos::deep_copy(numLocalAggregatesView, h_nla); } -} // end namespace + Kokkos::View newRoots("New root LIDs", numNonAggregatedNodes); + Kokkos::View numNewRoots("Number of new aggregates of current color"); + auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots); + + // first loop build the set of new roots + Kokkos::parallel_for( + "Aggregation Phase 1: building list of new roots", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO i) { + if (colors(i) == 1 && aggStat(i) == READY) { + // i will become a root + newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i; + } + }); + Kokkos::deep_copy(h_numNewRoots, numNewRoots); + // sort new roots by LID to guarantee determinism in agg IDs + Kokkos::sort(newRoots, 0, h_numNewRoots()); + LO numAggregated = 0; + Kokkos::parallel_reduce( + "Aggregation Phase 1: aggregating nodes", + Kokkos::RangePolicy(0, h_numNewRoots()), + KOKKOS_LAMBDA(const LO rootIndex, LO& lnumAggregated) { + LO root = newRoots(rootIndex); + LO aggID = numLocalAggregatesView() + rootIndex; + LO aggSize = 1; + vertex2AggId(root, 0) = aggID; + procWinner(root, 0) = myRank; + aggStat(root) = AGGREGATED; + auto neighOfRoot = lclLWGraph.getNeighborVertices(root); + for (LO n = 0; n < neighOfRoot.length; n++) { + LO neigh = neighOfRoot(n); + if (lclLWGraph.isLocalNeighborVertex(neigh) && aggStat(neigh) == READY) { + // add neigh to aggregate + vertex2AggId(neigh, 0) = aggID; + procWinner(neigh, 0) = myRank; + aggStat(neigh) = AGGREGATED; + aggSize++; + if (aggSize == maxAggSize) { + // can't add any more nodes + break; + } + } + } + lnumAggregated += aggSize; + }, + numAggregated); + numNonAggregatedNodes -= numAggregated; + // update aggregate object + aggregates.SetNumAggregates(numLocalAggregates + h_numNewRoots()); +} + +} // namespace MueLu -#endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP +#endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_decl.hpp index 0535904d2311..735ac8cdcb4e 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_decl.hpp @@ -56,63 +56,60 @@ #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! - @class AggregationPhase2aAlgorithm class. - @brief Among unaggregated points, see if we can make a reasonable size aggregate out of it. - @ingroup Aggregation - - ### Idea ### - Among unaggregated points, see if we can make a reasonable size - aggregate out of it. We do this by looking at neighbors and seeing - how many are unaggregated and on my processor. Loosely, base the - number of new aggregates created on the percentage of unaggregated nodes. - - ### Parameters ### - Parameter | Meaning - ----------|-------- - aggregation: min agg size | minimum number of nodes which have to be in an aggregate. - aggregation: max agg size | maximum allowed number of nodes in an aggregate - - ### Comments ### - Only nodes with state READY are changed to AGGREGATED. - - */ - - template - class AggregationPhase2aAlgorithm : - public MueLu::AggregationAlgorithmBase { +/*! + @class AggregationPhase2aAlgorithm class. + @brief Among unaggregated points, see if we can make a reasonable size aggregate out of it. + @ingroup Aggregation + + ### Idea ### + Among unaggregated points, see if we can make a reasonable size + aggregate out of it. We do this by looking at neighbors and seeing + how many are unaggregated and on my processor. Loosely, base the + number of new aggregates created on the percentage of unaggregated nodes. + + ### Parameters ### + Parameter | Meaning + ----------|-------- + aggregation: min agg size | minimum number of nodes which have to be in an aggregate. + aggregation: max agg size | maximum allowed number of nodes in an aggregate + + ### Comments ### + Only nodes with state READY are changed to AGGREGATED. + +*/ + +template +class AggregationPhase2aAlgorithm : public MueLu::AggregationAlgorithmBase { #undef MUELU_AGGREGATIONPHASE2AALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase2aAlgorithm(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase2aAlgorithm(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase2aAlgorithm() { } + //! Destructor. + virtual ~AggregationPhase2aAlgorithm() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; + //@} - void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 2a (secondary)"; } +}; - std::string description() const { return "Phase 2a (secondary)"; } - }; - -} //namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONPHASE2AALGORITHM_SHORT - #endif /* MUELU_AGGREGATIONPHASE2AALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_def.hpp index fdd92afd4f8e..b9f2b394d01d 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_def.hpp @@ -46,7 +46,6 @@ #ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_ #define MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_ - #include #include @@ -61,139 +60,133 @@ namespace MueLu { - template - void AggregationPhase2aAlgorithm::BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - - int minNodesPerAggregate = params.get("aggregation: min agg size"); - int maxNodesPerAggregate = params.get("aggregation: max agg size"); - bool matchMLbehavior = params.get("aggregation: match ML phase2a"); - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); +template +void AggregationPhase2aAlgorithm::BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + + int minNodesPerAggregate = params.get("aggregation: min agg size"); + int maxNodesPerAggregate = params.get("aggregation: max agg size"); + bool matchMLbehavior = params.get("aggregation: match ML phase2a"); + + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); + + LO numLocalAggregates = aggregates.GetNumAggregates(); + + LO numLocalNodes = procWinner.size(); + LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes; + + const double aggFactor = params.get("aggregation: phase2a agg factor"); + double factor; + + if (matchMLbehavior) { + // Note: ML uses global counts to set the factor + // Passing # of nonaggregated nodes and # of nodes via aggStat + GO in_data[2] = {(GO)numNonAggregatedNodes, (GO)aggStat.size()}; + GO out_data[2]; + Teuchos::reduceAll(*graph.GetComm(), Teuchos::REDUCE_SUM, 2, in_data, out_data); + GO phase_one_aggregated = out_data[1] - out_data[0]; + factor = as(phase_one_aggregated) / (out_data[1] + 1); + + LO agg_stat_unaggregated = 0; + LO agg_stat_aggregated = 0; + LO agg_stat_bdry = 0; + for (LO i = 0; i < (LO)aggStat.size(); i++) { + if (aggStat[i] == AGGREGATED) + agg_stat_aggregated++; + else if (aggStat[i] == BOUNDARY) + agg_stat_bdry++; + else + agg_stat_unaggregated++; + } - ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); + // NOTE: ML always uses 3 as minNodesPerAggregate + minNodesPerAggregate = 3; - LO numLocalAggregates = aggregates.GetNumAggregates(); + } else { + // MueLu defaults to using local counts to set the factor + factor = as(numLocalAggregated) / (numLocalNodes + 1); + } - LO numLocalNodes = procWinner.size(); - LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes; + // Now apply aggFactor + factor = pow(factor, aggFactor); - const double aggFactor = params.get("aggregation: phase2a agg factor"); - double factor; + int aggIndex = -1; + size_t aggSize = 0; + std::vector aggList(graph.getLocalMaxNumRowEntries()); + for (LO rootCandidate = 0; rootCandidate < numRows; rootCandidate++) { + if (aggStat[rootCandidate] != READY) { + continue; + } + LO numNeighbors = 0; + aggSize = 0; if (matchMLbehavior) { - // Note: ML uses global counts to set the factor - // Passing # of nonaggregated nodes and # of nodes via aggStat - GO in_data[2] ={(GO)numNonAggregatedNodes,(GO) aggStat.size()}; - GO out_data[2]; - Teuchos::reduceAll(*graph.GetComm(), Teuchos::REDUCE_SUM, 2, in_data, out_data); - GO phase_one_aggregated = out_data[1] - out_data[0]; - factor = as(phase_one_aggregated) / (out_data[1]+1); - - LO agg_stat_unaggregated=0; - LO agg_stat_aggregated=0; - LO agg_stat_bdry=0; - for (LO i=0; i<(LO)aggStat.size(); i++) { - if (aggStat[i] == AGGREGATED) - agg_stat_aggregated++; - else if (aggStat[i] == BOUNDARY) - agg_stat_bdry++; - else - agg_stat_unaggregated++; - } - - // NOTE: ML always uses 3 as minNodesPerAggregate - minNodesPerAggregate=3; - - } - else { - // MueLu defaults to using local counts to set the factor - factor = as(numLocalAggregated)/(numLocalNodes+1); + aggList[aggSize++] = rootCandidate; + numNeighbors++; } - // Now apply aggFactor - factor = pow(factor, aggFactor); - - int aggIndex = -1; - size_t aggSize = 0; - std::vector aggList(graph.getLocalMaxNumRowEntries()); - - for (LO rootCandidate = 0; rootCandidate < numRows; rootCandidate++) { - if (aggStat[rootCandidate] != READY) { - continue; - } + ArrayView neighOfINode = graph.getNeighborVertices(rootCandidate); + + LO num_nonaggd_neighbors = 0, num_local_neighbors = 0; + for (int j = 0; j < neighOfINode.size(); j++) { + LO neigh = neighOfINode[j]; + if (graph.isLocalNeighborVertex(neigh)) + num_local_neighbors++; + + if (neigh != rootCandidate) { + if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) { + // If aggregate size does not exceed max size, add node to the tentative aggregate + // NOTE: We do not exit the loop over all neighbours since we have still + // to count all aggregated neighbour nodes for the aggregation criteria + // NOTE: We check here for the maximum aggregation size. If we would do it below + // with all the other check too big aggregates would not be accepted at all. + if (aggSize < as(maxNodesPerAggregate)) + aggList[aggSize++] = neigh; + num_nonaggd_neighbors++; + } - LO numNeighbors = 0; - aggSize = 0; - if (matchMLbehavior) { - aggList[aggSize++] = rootCandidate; numNeighbors++; } + } - ArrayView neighOfINode = graph.getNeighborVertices(rootCandidate); - - LO num_nonaggd_neighbors=0, num_local_neighbors=0; - for (int j = 0; j < neighOfINode.size(); j++) { - LO neigh = neighOfINode[j]; - if (graph.isLocalNeighborVertex(neigh)) - num_local_neighbors++; - + bool accept_aggregate; + if (matchMLbehavior) { + // ML does this calculation slightly differently than MueLu does by default, specifically it + // uses the *local* number of neigbors, regardless of what they are. + // NOTE: ML does zero compression here. Not sure if it matters + // NOTE: ML uses a hardcoded value 3 instead of minNodesPerAggregate. This has been set above + LO rowi_N = num_local_neighbors; + num_nonaggd_neighbors++; // ML counts the node itself as a nonaggd_neighbor + accept_aggregate = (rowi_N > as(minNodesPerAggregate)) && (num_nonaggd_neighbors > (factor * rowi_N)); + } else { + accept_aggregate = (aggSize > as(minNodesPerAggregate)) && (aggSize > factor * numNeighbors); + } - if (neigh != rootCandidate) { - if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) { - // If aggregate size does not exceed max size, add node to the tentative aggregate - // NOTE: We do not exit the loop over all neighbours since we have still - // to count all aggregated neighbour nodes for the aggregation criteria - // NOTE: We check here for the maximum aggregation size. If we would do it below - // with all the other check too big aggregates would not be accepted at all. - if (aggSize < as(maxNodesPerAggregate)) - aggList[aggSize++] = neigh; - num_nonaggd_neighbors++; - } + if (accept_aggregate) { + // Accept new aggregate + // rootCandidate becomes the root of the newly formed aggregate + aggregates.SetIsRoot(rootCandidate); + aggIndex = numLocalAggregates++; - numNeighbors++; - } + for (size_t k = 0; k < aggSize; k++) { + aggStat[aggList[k]] = AGGREGATED; + vertex2AggId[aggList[k]] = aggIndex; + procWinner[aggList[k]] = myRank; } - - bool accept_aggregate; - if (matchMLbehavior) { - // ML does this calculation slightly differently than MueLu does by default, specifically it - // uses the *local* number of neigbors, regardless of what they are. - // NOTE: ML does zero compression here. Not sure if it matters - // NOTE: ML uses a hardcoded value 3 instead of minNodesPerAggregate. This has been set above - LO rowi_N = num_local_neighbors; - num_nonaggd_neighbors++; // ML counts the node itself as a nonaggd_neighbor - accept_aggregate = (rowi_N > as(minNodesPerAggregate)) && (num_nonaggd_neighbors > (factor*rowi_N)); - } - else { - accept_aggregate = (aggSize > as(minNodesPerAggregate)) && (aggSize > factor*numNeighbors); - } - - - if (accept_aggregate) { - // Accept new aggregate - // rootCandidate becomes the root of the newly formed aggregate - aggregates.SetIsRoot(rootCandidate); - aggIndex = numLocalAggregates++; - - for (size_t k = 0; k < aggSize; k++) { - aggStat [aggList[k]] = AGGREGATED; - vertex2AggId[aggList[k]] = aggIndex; - procWinner [aggList[k]] = myRank; - } - - numNonAggregatedNodes -= aggSize; - } + numNonAggregatedNodes -= aggSize; } - - // update aggregate object - aggregates.SetNumAggregates(numLocalAggregates); } -} // end namespace + // update aggregate object + aggregates.SetNumAggregates(numLocalAggregates); +} + +} // namespace MueLu #endif /* MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_decl.hpp index d253e891cd71..b5487d78812f 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_decl.hpp @@ -57,81 +57,79 @@ #include "MueLu_FactoryBase_fwd.hpp" namespace MueLu { - /*! - @class AggregationPhase2aAlgorithm class. - @brief Among unaggregated points, see if we can make a reasonable size aggregate out of it. - @ingroup Aggregation - - ### Idea ### - Among unaggregated points, see if we can make a reasonable size - aggregate out of it. We do this by looking at neighbors and seeing - how many are unaggregated and on my processor. Loosely, base the - number of new aggregates created on the percentage of unaggregated nodes. - - ### Parameters ### - Parameter | Meaning - ----------|-------- - aggregation: min agg size | minimum number of nodes which have to be in an aggregate. - aggregation: max agg size | maximum allowed number of nodes in an aggregate - - ### Comments ### - Only nodes with state READY are changed to AGGREGATED. - - */ - - template - class AggregationPhase2aAlgorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +/*! + @class AggregationPhase2aAlgorithm class. + @brief Among unaggregated points, see if we can make a reasonable size aggregate out of it. + @ingroup Aggregation + + ### Idea ### + Among unaggregated points, see if we can make a reasonable size + aggregate out of it. We do this by looking at neighbors and seeing + how many are unaggregated and on my processor. Loosely, base the + number of new aggregates created on the percentage of unaggregated nodes. + + ### Parameters ### + Parameter | Meaning + ----------|-------- + aggregation: min agg size | minimum number of nodes which have to be in an aggregate. + aggregation: max agg size | maximum allowed number of nodes in an aggregate + + ### Comments ### + Only nodes with state READY are changed to AGGREGATED. + +*/ + +template +class AggregationPhase2aAlgorithm_kokkos : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using execution_space = typename LWGraph_kokkos::execution_space; - using memory_space = typename LWGraph_kokkos::memory_space; + public: + using device_type = typename LWGraph_kokkos::device_type; + using execution_space = typename LWGraph_kokkos::execution_space; + using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase2aAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase2aAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase2aAlgorithm_kokkos() { } + //! Destructor. + virtual ~AggregationPhase2aAlgorithm_kokkos() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const Teuchos::ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; - void BuildAggregates(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesRandom(const Teuchos::ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; - void BuildAggregatesRandom(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesDeterministic(const Teuchos::ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; + //@} - void BuildAggregatesDeterministic(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 2a (secondary)"; } +}; - std::string description() const { return "Phase 2a (secondary)"; } - }; - -} //namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_SHORT -#endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DECL_HPP +#endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_def.hpp index 01fcb20a21b9..d016b747c7bc 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_def.hpp @@ -62,268 +62,266 @@ namespace MueLu { - template - void AggregationPhase2aAlgorithm_kokkos:: - BuildAggregates(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - - if(params.get("aggregation: deterministic")) { - Monitor m(*this, "BuildAggregatesDeterministic"); - BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes); - } else { - Monitor m(*this, "BuildAggregatesRandom"); - BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); - } +template +void AggregationPhase2aAlgorithm_kokkos:: + BuildAggregates(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + if (params.get("aggregation: deterministic")) { + Monitor m(*this, "BuildAggregatesDeterministic"); + BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes); + } else { + Monitor m(*this, "BuildAggregatesRandom"); + BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); + } - } // BuildAggregates - - template - void AggregationPhase2aAlgorithm_kokkos:: - BuildAggregatesRandom(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const - { - const int minNodesPerAggregate = params.get("aggregation: min agg size"); - const int maxNodesPerAggregate = params.get("aggregation: max agg size"); - bool matchMLbehavior = params.get("aggregation: match ML phase2a"); - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); - const LO numColors = aggregates.GetGraphNumColors(); - - auto lclLWGraph = graph.getLocalLWGraph(); - - LO numLocalNodes = numRows; - LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes; - - const double aggFactor = 0.5; - double factor = static_cast(numLocalAggregated)/(numLocalNodes+1); - factor = pow(factor, aggFactor); - - // LBV on Sept 12, 2019: this looks a little heavy handed, - // I'm not sure a view is needed to perform atomic updates. - // If we can avoid this and use a simple LO that would be - // simpler for later maintenance. - Kokkos::View numLocalAggregates("numLocalAggregates"); - typename Kokkos::View::HostMirror h_numLocalAggregates = +} // BuildAggregates + +template +void AggregationPhase2aAlgorithm_kokkos:: + BuildAggregatesRandom(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + const int minNodesPerAggregate = params.get("aggregation: min agg size"); + const int maxNodesPerAggregate = params.get("aggregation: max agg size"); + bool matchMLbehavior = params.get("aggregation: match ML phase2a"); + + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); + const LO numColors = aggregates.GetGraphNumColors(); + + auto lclLWGraph = graph.getLocalLWGraph(); + + LO numLocalNodes = numRows; + LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes; + + const double aggFactor = 0.5; + double factor = static_cast(numLocalAggregated) / (numLocalNodes + 1); + factor = pow(factor, aggFactor); + + // LBV on Sept 12, 2019: this looks a little heavy handed, + // I'm not sure a view is needed to perform atomic updates. + // If we can avoid this and use a simple LO that would be + // simpler for later maintenance. + Kokkos::View numLocalAggregates("numLocalAggregates"); + typename Kokkos::View::HostMirror h_numLocalAggregates = Kokkos::create_mirror_view(numLocalAggregates); - h_numLocalAggregates() = aggregates.GetNumAggregates(); - Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates); + h_numLocalAggregates() = aggregates.GetNumAggregates(); + Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates); + + // Now we create new aggregates using root nodes in all colors other than the first color, + // as the first color was already exhausted in Phase 1. + for (int color = 2; color < numColors + 1; ++color) { + LO tmpNumNonAggregatedNodes = 0; + Kokkos::parallel_reduce( + "Aggregation Phase 2a: loop over each individual color", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO rootCandidate, LO& lNumNonAggregatedNodes) { + if (aggStat(rootCandidate) == READY && + colors(rootCandidate) == color) { + LO numNeighbors = 0; + LO aggSize = 0; + if (matchMLbehavior) { + aggSize += 1; + numNeighbors += 1; + } + + auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate); + + // Loop over neighbors to count how many nodes could join + // the new aggregate + + for (int j = 0; j < neighbors.length; ++j) { + LO neigh = neighbors(j); + if (neigh != rootCandidate) { + if (lclLWGraph.isLocalNeighborVertex(neigh) && + (aggStat(neigh) == READY) && + (aggSize < maxNodesPerAggregate)) { + ++aggSize; + } + ++numNeighbors; + } + } + + // If a sufficient number of nodes can join the new aggregate + // then we actually create the aggregate. + if (aggSize > minNodesPerAggregate && + (aggSize > factor * numNeighbors)) { + // aggregates.SetIsRoot(rootCandidate); + LO aggIndex = Kokkos:: + atomic_fetch_add(&numLocalAggregates(), 1); + + LO numAggregated = 0; + + if (matchMLbehavior) { + // Add the root. + aggStat(rootCandidate) = AGGREGATED; + vertex2AggId(rootCandidate, 0) = aggIndex; + procWinner(rootCandidate, 0) = myRank; + ++numAggregated; + --lNumNonAggregatedNodes; + } + + for (int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) { + LO neigh = neighbors(neighIdx); + if (neigh != rootCandidate) { + if (lclLWGraph.isLocalNeighborVertex(neigh) && + (aggStat(neigh) == READY) && + (numAggregated < aggSize)) { + aggStat(neigh) = AGGREGATED; + vertex2AggId(neigh, 0) = aggIndex; + procWinner(neigh, 0) = myRank; + + ++numAggregated; + --lNumNonAggregatedNodes; + } + } + } + } + } + }, + tmpNumNonAggregatedNodes); + numNonAggregatedNodes += tmpNumNonAggregatedNodes; + } - // Now we create new aggregates using root nodes in all colors other than the first color, - // as the first color was already exhausted in Phase 1. - for(int color = 2; color < numColors + 1; ++color) { - LO tmpNumNonAggregatedNodes = 0; - Kokkos::parallel_reduce("Aggregation Phase 2a: loop over each individual color", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const LO rootCandidate, LO& lNumNonAggregatedNodes) { - if(aggStat(rootCandidate) == READY && - colors(rootCandidate) == color) { - - LO numNeighbors = 0; - LO aggSize = 0; - if (matchMLbehavior) { - aggSize += 1; - numNeighbors +=1; - } - - auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate); - - // Loop over neighbors to count how many nodes could join - // the new aggregate - - for(int j = 0; j < neighbors.length; ++j) { - LO neigh = neighbors(j); - if(neigh != rootCandidate) { - if(lclLWGraph.isLocalNeighborVertex(neigh) && - (aggStat(neigh) == READY) && - (aggSize < maxNodesPerAggregate)) { - ++aggSize; - } - ++numNeighbors; - } - } - - // If a sufficient number of nodes can join the new aggregate - // then we actually create the aggregate. - if(aggSize > minNodesPerAggregate && - (aggSize > factor*numNeighbors)) { - - // aggregates.SetIsRoot(rootCandidate); - LO aggIndex = Kokkos:: - atomic_fetch_add(&numLocalAggregates(), 1); - - LO numAggregated = 0; - - if (matchMLbehavior) { - // Add the root. - aggStat(rootCandidate) = AGGREGATED; - vertex2AggId(rootCandidate, 0) = aggIndex; - procWinner(rootCandidate, 0) = myRank; - ++numAggregated; - --lNumNonAggregatedNodes; - } - - for(int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) { - LO neigh = neighbors(neighIdx); - if(neigh != rootCandidate) { - if(lclLWGraph.isLocalNeighborVertex(neigh) && - (aggStat(neigh) == READY) && - (numAggregated < aggSize)) { - aggStat(neigh) = AGGREGATED; - vertex2AggId(neigh, 0) = aggIndex; - procWinner(neigh, 0) = myRank; - - ++numAggregated; - --lNumNonAggregatedNodes; - } - } - } - } - } - }, tmpNumNonAggregatedNodes); - numNonAggregatedNodes += tmpNumNonAggregatedNodes; - } + // update aggregate object + Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates); + aggregates.SetNumAggregates(h_numLocalAggregates()); +} // BuildAggregatesRandom + +template +void AggregationPhase2aAlgorithm_kokkos:: + BuildAggregatesDeterministic(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + const int minNodesPerAggregate = params.get("aggregation: min agg size"); + const int maxNodesPerAggregate = params.get("aggregation: max agg size"); - // update aggregate object - Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates); - aggregates.SetNumAggregates(h_numLocalAggregates()); - } // BuildAggregatesRandom - - template - void AggregationPhase2aAlgorithm_kokkos:: - BuildAggregatesDeterministic(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const - { - const int minNodesPerAggregate = params.get("aggregation: min agg size"); - const int maxNodesPerAggregate = params.get("aggregation: max agg size"); - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); - const LO numColors = aggregates.GetGraphNumColors(); - - auto lclLWGraph = graph.getLocalLWGraph(); - - LO numLocalNodes = procWinner.size(); - LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes; - - const double aggFactor = 0.5; - double factor = as(numLocalAggregated)/(numLocalNodes+1); - factor = pow(factor, aggFactor); - - Kokkos::View numLocalAggregates("numLocalAggregates"); - typename Kokkos::View::HostMirror h_numLocalAggregates = + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); + const LO numColors = aggregates.GetGraphNumColors(); + + auto lclLWGraph = graph.getLocalLWGraph(); + + LO numLocalNodes = procWinner.size(); + LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes; + + const double aggFactor = 0.5; + double factor = as(numLocalAggregated) / (numLocalNodes + 1); + factor = pow(factor, aggFactor); + + Kokkos::View numLocalAggregates("numLocalAggregates"); + typename Kokkos::View::HostMirror h_numLocalAggregates = Kokkos::create_mirror_view(numLocalAggregates); - h_numLocalAggregates() = aggregates.GetNumAggregates(); - Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates); - - // Now we create new aggregates using root nodes in all colors other than the first color, - // as the first color was already exhausted in Phase 1. - // - // In the deterministic version, exactly the same set of aggregates will be created - // (as the nondeterministic version) - // because no vertex V can be a neighbor of two vertices of the same color, so two root - // candidates can't fight over V - // - // But, the precise values in vertex2AggId need to match exactly, so just sort the new - // roots of each color before assigning aggregate IDs - - //numNonAggregatedNodes is the best available upper bound for the number of aggregates - //which may be created in this phase, so use it for the size of newRoots - Kokkos::View newRoots("New root LIDs", numNonAggregatedNodes); - Kokkos::View numNewRoots("Number of new aggregates of current color"); - auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots); - for(int color = 1; color < numColors + 1; ++color) { - h_numNewRoots() = 0; - Kokkos::deep_copy(numNewRoots, h_numNewRoots); - Kokkos::parallel_for("Aggregation Phase 2a: determining new roots of current color", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA(const LO rootCandidate) { - if(aggStat(rootCandidate) == READY && - colors(rootCandidate) == color) { - LO aggSize = 0; - auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate); - // Loop over neighbors to count how many nodes could join - // the new aggregate - LO numNeighbors = 0; - for(int j = 0; j < neighbors.length; ++j) { - LO neigh = neighbors(j); - if(neigh != rootCandidate) - { - if(lclLWGraph.isLocalNeighborVertex(neigh) && - aggStat(neigh) == READY && - aggSize < maxNodesPerAggregate) - { - ++aggSize; - } - ++numNeighbors; - } - } - // If a sufficient number of nodes can join the new aggregate - // then we mark rootCandidate as a future root. - if(aggSize > minNodesPerAggregate && aggSize > factor*numNeighbors) { - LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1); - newRoots(newRootIndex) = rootCandidate; - } - } - }); - Kokkos::deep_copy(h_numNewRoots, numNewRoots); - - if(h_numNewRoots() > 0) { - //sort the new root indices - Kokkos::sort(newRoots, 0, h_numNewRoots()); - //now, loop over all new roots again and actually create the aggregates - LO tmpNumNonAggregatedNodes = 0; - //First, just find the set of color vertices which will become aggregate roots - Kokkos::parallel_reduce("Aggregation Phase 2a: create new aggregates", - Kokkos::RangePolicy(0, h_numNewRoots()), - KOKKOS_LAMBDA (const LO newRootIndex, LO& lNumNonAggregatedNodes) { - LO root = newRoots(newRootIndex); - LO newAggID = numLocalAggregates() + newRootIndex; - auto neighbors = lclLWGraph.getNeighborVertices(root); - // Loop over neighbors and add them to new aggregate - aggStat(root) = AGGREGATED; - vertex2AggId(root, 0) = newAggID; - LO aggSize = 1; - for(int j = 0; j < neighbors.length; ++j) { - LO neigh = neighbors(j); - if(neigh != root) { - if(lclLWGraph.isLocalNeighborVertex(neigh) && - aggStat(neigh) == READY && - aggSize < maxNodesPerAggregate) { - aggStat(neigh) = AGGREGATED; - vertex2AggId(neigh, 0) = newAggID; - procWinner(neigh, 0) = myRank; - aggSize++; - } - } - } - lNumNonAggregatedNodes -= aggSize; - }, tmpNumNonAggregatedNodes); - numNonAggregatedNodes += tmpNumNonAggregatedNodes; - h_numLocalAggregates() += h_numNewRoots(); - Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates); - } + h_numLocalAggregates() = aggregates.GetNumAggregates(); + Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates); + + // Now we create new aggregates using root nodes in all colors other than the first color, + // as the first color was already exhausted in Phase 1. + // + // In the deterministic version, exactly the same set of aggregates will be created + // (as the nondeterministic version) + // because no vertex V can be a neighbor of two vertices of the same color, so two root + // candidates can't fight over V + // + // But, the precise values in vertex2AggId need to match exactly, so just sort the new + // roots of each color before assigning aggregate IDs + + // numNonAggregatedNodes is the best available upper bound for the number of aggregates + // which may be created in this phase, so use it for the size of newRoots + Kokkos::View newRoots("New root LIDs", numNonAggregatedNodes); + Kokkos::View numNewRoots("Number of new aggregates of current color"); + auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots); + for (int color = 1; color < numColors + 1; ++color) { + h_numNewRoots() = 0; + Kokkos::deep_copy(numNewRoots, h_numNewRoots); + Kokkos::parallel_for( + "Aggregation Phase 2a: determining new roots of current color", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO rootCandidate) { + if (aggStat(rootCandidate) == READY && + colors(rootCandidate) == color) { + LO aggSize = 0; + auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate); + // Loop over neighbors to count how many nodes could join + // the new aggregate + LO numNeighbors = 0; + for (int j = 0; j < neighbors.length; ++j) { + LO neigh = neighbors(j); + if (neigh != rootCandidate) { + if (lclLWGraph.isLocalNeighborVertex(neigh) && + aggStat(neigh) == READY && + aggSize < maxNodesPerAggregate) { + ++aggSize; + } + ++numNeighbors; + } + } + // If a sufficient number of nodes can join the new aggregate + // then we mark rootCandidate as a future root. + if (aggSize > minNodesPerAggregate && aggSize > factor * numNeighbors) { + LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1); + newRoots(newRootIndex) = rootCandidate; + } + } + }); + Kokkos::deep_copy(h_numNewRoots, numNewRoots); + + if (h_numNewRoots() > 0) { + // sort the new root indices + Kokkos::sort(newRoots, 0, h_numNewRoots()); + // now, loop over all new roots again and actually create the aggregates + LO tmpNumNonAggregatedNodes = 0; + // First, just find the set of color vertices which will become aggregate roots + Kokkos::parallel_reduce( + "Aggregation Phase 2a: create new aggregates", + Kokkos::RangePolicy(0, h_numNewRoots()), + KOKKOS_LAMBDA(const LO newRootIndex, LO& lNumNonAggregatedNodes) { + LO root = newRoots(newRootIndex); + LO newAggID = numLocalAggregates() + newRootIndex; + auto neighbors = lclLWGraph.getNeighborVertices(root); + // Loop over neighbors and add them to new aggregate + aggStat(root) = AGGREGATED; + vertex2AggId(root, 0) = newAggID; + LO aggSize = 1; + for (int j = 0; j < neighbors.length; ++j) { + LO neigh = neighbors(j); + if (neigh != root) { + if (lclLWGraph.isLocalNeighborVertex(neigh) && + aggStat(neigh) == READY && + aggSize < maxNodesPerAggregate) { + aggStat(neigh) = AGGREGATED; + vertex2AggId(neigh, 0) = newAggID; + procWinner(neigh, 0) = myRank; + aggSize++; + } + } + } + lNumNonAggregatedNodes -= aggSize; + }, + tmpNumNonAggregatedNodes); + numNonAggregatedNodes += tmpNumNonAggregatedNodes; + h_numLocalAggregates() += h_numNewRoots(); + Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates); } - aggregates.SetNumAggregates(h_numLocalAggregates()); } + aggregates.SetNumAggregates(h_numLocalAggregates()); +} -} // end namespace +} // namespace MueLu -#endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP +#endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_decl.hpp index 88b156d23c14..a38f0fe0f310 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_decl.hpp @@ -56,62 +56,59 @@ #include "MueLu_AggregationPhase2bAlgorithm_fwd.hpp" namespace MueLu { - /*! - @class AggregationPhase2bAlgorithm class. - @brief Add leftovers to existing aggregates - @ingroup Aggregation - - ### Idea ### - In phase 2b non-aggregated nodes are added to existing aggregates. - All neighbors of the unaggregated node are checked and the corresponding - aggregate weight is increased. The unaggregated node is added to the aggregate - with the best weight. A simple penalty strategy makes sure that the non-aggregated - nodes are added to different aggregates. - The routine runs twice to cover non-aggregate nodes which have a node distance - of two to existing aggregates. Assuming that the node distance is not greater - than 3 (the aggregate diameter size), running the algorithm only twice should - be sufficient. - - ### Comments ### - Only nodes with state READY are changed to AGGREGATED. There are no aggregation criteria considered. Especially the aggregation: max agg size criterion is ignored. - This is not a problem, since after the previous aggregation phases one should not be able to build too large aggregates. - */ - - template - class AggregationPhase2bAlgorithm : - public MueLu::AggregationAlgorithmBase { +/*! + @class AggregationPhase2bAlgorithm class. + @brief Add leftovers to existing aggregates + @ingroup Aggregation + + ### Idea ### + In phase 2b non-aggregated nodes are added to existing aggregates. + All neighbors of the unaggregated node are checked and the corresponding + aggregate weight is increased. The unaggregated node is added to the aggregate + with the best weight. A simple penalty strategy makes sure that the non-aggregated + nodes are added to different aggregates. + The routine runs twice to cover non-aggregate nodes which have a node distance + of two to existing aggregates. Assuming that the node distance is not greater + than 3 (the aggregate diameter size), running the algorithm only twice should + be sufficient. + + ### Comments ### + Only nodes with state READY are changed to AGGREGATED. There are no aggregation criteria considered. Especially the aggregation: max agg size criterion is ignored. + This is not a problem, since after the previous aggregation phases one should not be able to build too large aggregates. +*/ + +template +class AggregationPhase2bAlgorithm : public MueLu::AggregationAlgorithmBase { #undef MUELU_AGGREGATIONPHASE2BALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase2bAlgorithm(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase2bAlgorithm(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase2bAlgorithm() { } + //! Destructor. + virtual ~AggregationPhase2bAlgorithm() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; + //@} - void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 2b (expansion)"; } +}; - std::string description() const { return "Phase 2b (expansion)"; } - }; - -} //namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONPHASE2BALGORITHM_SHORT - #endif /* MUELU_AGGREGATIONPHASE2BALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_def.hpp index 1dc4ac52c9f1..4e402903e6de 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_def.hpp @@ -60,91 +60,89 @@ namespace MueLu { - // Try to stick unaggregated nodes into a neighboring aggregate if they are - // not already too big - template - void AggregationPhase2bAlgorithm::BuildAggregates(const ParameterList& params , const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - bool matchMLbehavior = params.get("aggregation: match ML phase2b"); - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); - - LO numLocalAggregates = aggregates.GetNumAggregates(); - - const int defaultConnectWeight = 100; - const int penaltyConnectWeight = 10; - - std::vector aggWeight (numLocalAggregates, 0); - std::vector connectWeight(numRows, defaultConnectWeight); - std::vector aggPenalties (numRows, 0); - - // We do this cycle twice. - // I don't know why, but ML does it too - // taw: by running the aggregation routine more than once there is a chance that also - // non-aggregated nodes with a node distance of two are added to existing aggregates. - // Assuming that the aggregate size is 3 in each direction running the algorithm only twice - // should be sufficient. - for (int k = 0; k < 2; k++) { - for (LO i = 0; i < numRows; i++) { - if (aggStat[i] != READY) - continue; - - ArrayView neighOfINode = graph.getNeighborVertices(i); - - for (int j = 0; j < neighOfINode.size(); j++) { - LO neigh = neighOfINode[j]; - - // We don't check (neigh != i), as it is covered by checking (aggStat[neigh] == AGGREGATED) - if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED) - aggWeight[vertex2AggId[neigh]] += connectWeight[neigh]; - } - - int bestScore = -100000; - int bestAggId = -1; - int bestConnect = -1; - - for (int j = 0; j < neighOfINode.size(); j++) { - LO neigh = neighOfINode[j]; - int aggId = vertex2AggId[neigh]; +// Try to stick unaggregated nodes into a neighboring aggregate if they are +// not already too big +template +void AggregationPhase2bAlgorithm::BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + bool matchMLbehavior = params.get("aggregation: match ML phase2b"); + + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); + + LO numLocalAggregates = aggregates.GetNumAggregates(); + + const int defaultConnectWeight = 100; + const int penaltyConnectWeight = 10; + + std::vector aggWeight(numLocalAggregates, 0); + std::vector connectWeight(numRows, defaultConnectWeight); + std::vector aggPenalties(numRows, 0); + + // We do this cycle twice. + // I don't know why, but ML does it too + // taw: by running the aggregation routine more than once there is a chance that also + // non-aggregated nodes with a node distance of two are added to existing aggregates. + // Assuming that the aggregate size is 3 in each direction running the algorithm only twice + // should be sufficient. + for (int k = 0; k < 2; k++) { + for (LO i = 0; i < numRows; i++) { + if (aggStat[i] != READY) + continue; + + ArrayView neighOfINode = graph.getNeighborVertices(i); + + for (int j = 0; j < neighOfINode.size(); j++) { + LO neigh = neighOfINode[j]; + + // We don't check (neigh != i), as it is covered by checking (aggStat[neigh] == AGGREGATED) + if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED) + aggWeight[vertex2AggId[neigh]] += connectWeight[neigh]; + } - // Note: The third condition is only relevant if the ML matching is enabled - if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED - && (!matchMLbehavior || aggWeight[aggId] != 0) ) { + int bestScore = -100000; + int bestAggId = -1; + int bestConnect = -1; - int score = aggWeight[aggId] - aggPenalties[aggId]; + for (int j = 0; j < neighOfINode.size(); j++) { + LO neigh = neighOfINode[j]; + int aggId = vertex2AggId[neigh]; - if (score > bestScore) { - bestAggId = aggId; - bestScore = score; - bestConnect = connectWeight[neigh]; + // Note: The third condition is only relevant if the ML matching is enabled + if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED && (!matchMLbehavior || aggWeight[aggId] != 0)) { + int score = aggWeight[aggId] - aggPenalties[aggId]; - } else if (aggId == bestAggId && connectWeight[neigh] > bestConnect) { - bestConnect = connectWeight[neigh]; - } + if (score > bestScore) { + bestAggId = aggId; + bestScore = score; + bestConnect = connectWeight[neigh]; - // Reset the weights for the next loop - aggWeight[aggId] = 0; + } else if (aggId == bestAggId && connectWeight[neigh] > bestConnect) { + bestConnect = connectWeight[neigh]; } + + // Reset the weights for the next loop + aggWeight[aggId] = 0; } + } - if (bestScore >= 0) { - aggStat [i] = AGGREGATED; - vertex2AggId[i] = bestAggId; - procWinner [i] = myRank; + if (bestScore >= 0) { + aggStat[i] = AGGREGATED; + vertex2AggId[i] = bestAggId; + procWinner[i] = myRank; - numNonAggregatedNodes--; + numNonAggregatedNodes--; - aggPenalties[bestAggId]++; - connectWeight[i] = bestConnect - penaltyConnectWeight; - } + aggPenalties[bestAggId]++; + connectWeight[i] = bestConnect - penaltyConnectWeight; } } } +} -} // end namespace +} // namespace MueLu #endif /* MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_decl.hpp index 941f732e64d0..9172487c9ca1 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_decl.hpp @@ -58,80 +58,78 @@ #include "MueLu_LWGraph_kokkos.hpp" namespace MueLu { - /*! - @class AggregationPhase2bAlgorithm class. - @brief Add leftovers to existing aggregates - @ingroup Aggregation - - ### Idea ### - In phase 2b non-aggregated nodes are added to existing aggregates. - All neighbors of the unaggregated node are checked and the corresponding - aggregate weight is increased. The unaggregated node is added to the aggregate - with the best weight. A simple penalty strategy makes sure that the non-aggregated - nodes are added to different aggregates. - The routine runs twice to cover non-aggregate nodes which have a node distance - of two to existing aggregates. Assuming that the node distance is not greater - than 3 (the aggregate diameter size), running the algorithm only twice should - be sufficient. - - ### Comments ### - Only nodes with state READY are changed to AGGREGATED. There are no aggregation criteria considered. Especially the aggregation: max agg size criterion is ignored. - This is not a problem, since after the previous aggregation phases one should not be able to build too large aggregates. - */ - - template - class AggregationPhase2bAlgorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +/*! + @class AggregationPhase2bAlgorithm class. + @brief Add leftovers to existing aggregates + @ingroup Aggregation + + ### Idea ### + In phase 2b non-aggregated nodes are added to existing aggregates. + All neighbors of the unaggregated node are checked and the corresponding + aggregate weight is increased. The unaggregated node is added to the aggregate + with the best weight. A simple penalty strategy makes sure that the non-aggregated + nodes are added to different aggregates. + The routine runs twice to cover non-aggregate nodes which have a node distance + of two to existing aggregates. Assuming that the node distance is not greater + than 3 (the aggregate diameter size), running the algorithm only twice should + be sufficient. + + ### Comments ### + Only nodes with state READY are changed to AGGREGATED. There are no aggregation criteria considered. Especially the aggregation: max agg size criterion is ignored. + This is not a problem, since after the previous aggregation phases one should not be able to build too large aggregates. +*/ + +template +class AggregationPhase2bAlgorithm_kokkos : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using execution_space = typename LWGraph_kokkos::execution_space; - using memory_space = typename LWGraph_kokkos::memory_space; + public: + using device_type = typename LWGraph_kokkos::device_type; + using execution_space = typename LWGraph_kokkos::execution_space; + using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase2bAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase2bAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase2bAlgorithm_kokkos() { } + //! Destructor. + virtual ~AggregationPhase2bAlgorithm_kokkos() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; - void BuildAggregates(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesRandom(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; - void BuildAggregatesRandom(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesDeterministic(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; + //@} - void BuildAggregatesDeterministic(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 2b (expansion)"; } +}; - std::string description() const { return "Phase 2b (expansion)"; } - }; - -} //namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_SHORT -#endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DECL_HPP +#endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_def.hpp index bccdbf7f98ca..6168904ee0ea 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_def.hpp @@ -60,179 +60,180 @@ namespace MueLu { - // Try to stick unaggregated nodes into a neighboring aggregate if they are - // not already too big - template - void AggregationPhase2bAlgorithm_kokkos:: - BuildAggregates(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - - if(params.get("aggregation: deterministic")) { - Monitor m(*this, "BuildAggregatesDeterministic"); - BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes); - } else { - Monitor m(*this, "BuildAggregatesRandom"); - BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); - } +// Try to stick unaggregated nodes into a neighboring aggregate if they are +// not already too big +template +void AggregationPhase2bAlgorithm_kokkos:: + BuildAggregates(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + if (params.get("aggregation: deterministic")) { + Monitor m(*this, "BuildAggregatesDeterministic"); + BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes); + } else { + Monitor m(*this, "BuildAggregatesRandom"); + BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); + } + +} // BuildAggregates + +template +void AggregationPhase2bAlgorithm_kokkos:: + BuildAggregatesRandom(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); + const LO numColors = aggregates.GetGraphNumColors(); + const LO numLocalAggregates = aggregates.GetNumAggregates(); + + auto lclLWGraph = graph.getLocalLWGraph(); + + const LO defaultConnectWeight = 100; + const LO penaltyConnectWeight = 10; + + Kokkos::View aggWeight(Kokkos::ViewAllocateWithoutInitializing("aggWeight"), numLocalAggregates); // This gets re-initialized at the start of each "color" loop + Kokkos::View connectWeight(Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows); + Kokkos::View aggPenalties("aggPenalties", numLocalAggregates); // This gets initialized to zero here + + Kokkos::deep_copy(connectWeight, defaultConnectWeight); + + // taw: by running the aggregation routine more than once there is a chance that also + // non-aggregated nodes with a node distance of two are added to existing aggregates. + // Assuming that the aggregate size is 3 in each direction running the algorithm only twice + // should be sufficient. + // lbv: If the prior phase of aggregation where run without specifying an aggregate size, + // the distance 2 coloring and phase 1 aggregation actually guarantee that only one iteration + // is needed to reach distance 2 neighbors. + int maxIters = 2; + int maxNodesPerAggregate = params.get("aggregation: max agg size"); + if (maxNodesPerAggregate == std::numeric_limits::max()) { + maxIters = 1; + } + for (int iter = 0; iter < maxIters; ++iter) { + for (LO color = 1; color <= numColors; ++color) { + Kokkos::deep_copy(aggWeight, 0); + + // the reduce counts how many nodes are aggregated by this phase, + // which will then be subtracted from numNonAggregatedNodes + LO numAggregated = 0; + Kokkos::parallel_reduce( + "Aggregation Phase 2b: aggregates expansion", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO i, LO& tmpNumAggregated) { + if (aggStat(i) != READY || colors(i) != color) + return; + + auto neighOfINode = lclLWGraph.getNeighborVertices(i); + for (int j = 0; j < neighOfINode.length; j++) { + LO neigh = neighOfINode(j); - } // BuildAggregates - - template - void AggregationPhase2bAlgorithm_kokkos:: - BuildAggregatesRandom(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); - const LO numColors = aggregates.GetGraphNumColors(); - const LO numLocalAggregates = aggregates.GetNumAggregates(); - - auto lclLWGraph = graph.getLocalLWGraph(); - - const LO defaultConnectWeight = 100; - const LO penaltyConnectWeight = 10; - - Kokkos::View aggWeight (Kokkos::ViewAllocateWithoutInitializing("aggWeight"), numLocalAggregates); // This gets re-initialized at the start of each "color" loop - Kokkos::View connectWeight(Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows); - Kokkos::View aggPenalties ("aggPenalties", numLocalAggregates);// This gets initialized to zero here - - Kokkos::deep_copy(connectWeight, defaultConnectWeight); - - // taw: by running the aggregation routine more than once there is a chance that also - // non-aggregated nodes with a node distance of two are added to existing aggregates. - // Assuming that the aggregate size is 3 in each direction running the algorithm only twice - // should be sufficient. - // lbv: If the prior phase of aggregation where run without specifying an aggregate size, - // the distance 2 coloring and phase 1 aggregation actually guarantee that only one iteration - // is needed to reach distance 2 neighbors. - int maxIters = 2; - int maxNodesPerAggregate = params.get("aggregation: max agg size"); - if(maxNodesPerAggregate == std::numeric_limits::max()) {maxIters = 1;} - for (int iter = 0; iter < maxIters; ++iter) { - for(LO color = 1; color <= numColors; ++color) { - Kokkos::deep_copy(aggWeight, 0); - - //the reduce counts how many nodes are aggregated by this phase, - //which will then be subtracted from numNonAggregatedNodes - LO numAggregated = 0; - Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const LO i, LO& tmpNumAggregated) { - if (aggStat(i) != READY || colors(i) != color) - return; - - auto neighOfINode = lclLWGraph.getNeighborVertices(i); - for (int j = 0; j < neighOfINode.length; j++) { - LO neigh = neighOfINode(j); - - // We don't check (neigh != i), as it is covered by checking - // (aggStat[neigh] == AGGREGATED) - if (lclLWGraph.isLocalNeighborVertex(neigh) && - aggStat(neigh) == AGGREGATED) - Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)), - connectWeight(neigh)); - } - - int bestScore = -100000; - int bestAggId = -1; - int bestConnect = -1; - - for (int j = 0; j < neighOfINode.length; j++) { - LO neigh = neighOfINode(j); - - if (lclLWGraph.isLocalNeighborVertex(neigh) && - aggStat(neigh) == AGGREGATED) { - auto aggId = vertex2AggId(neigh, 0); - int score = aggWeight(aggId) - aggPenalties(aggId); - - if (score > bestScore) { - bestAggId = aggId; - bestScore = score; - bestConnect = connectWeight(neigh); - - } else if (aggId == bestAggId && - connectWeight(neigh) > bestConnect) { - bestConnect = connectWeight(neigh); - } - } - } - if (bestScore >= 0) { - aggStat(i) = AGGREGATED; - vertex2AggId(i, 0) = bestAggId; - procWinner(i, 0) = myRank; - - Kokkos::atomic_add(&aggPenalties(bestAggId), 1); - connectWeight(i) = bestConnect - penaltyConnectWeight; - tmpNumAggregated++; - } - }, numAggregated); //parallel_for - numNonAggregatedNodes -= numAggregated; - } - } // loop over maxIters - - } // BuildAggregatesRandom - - - - template - void AggregationPhase2bAlgorithm_kokkos:: - BuildAggregatesDeterministic(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); - const LO numColors = aggregates.GetGraphNumColors(); - LO numLocalAggregates = aggregates.GetNumAggregates(); - - auto lclLWGraph = graph.getLocalLWGraph(); - - const int defaultConnectWeight = 100; - const int penaltyConnectWeight = 10; - - Kokkos::View connectWeight (Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows); - Kokkos::View aggWeight (Kokkos::ViewAllocateWithoutInitializing("aggWeight"), numLocalAggregates);// This gets re-initialized at the start of each "color" loop - Kokkos::View aggPenaltyUpdates("aggPenaltyUpdates", numLocalAggregates); - Kokkos::View aggPenalties ("aggPenalties", numLocalAggregates); - - Kokkos::deep_copy(connectWeight, defaultConnectWeight); - - // We do this cycle twice. - // I don't know why, but ML does it too - // taw: by running the aggregation routine more than once there is a chance that also - // non-aggregated nodes with a node distance of two are added to existing aggregates. - // Assuming that the aggregate size is 3 in each direction running the algorithm only twice - // should be sufficient. - int maxIters = 2; - int maxNodesPerAggregate = params.get("aggregation: max agg size"); - if(maxNodesPerAggregate == std::numeric_limits::max()) {maxIters = 1;} - for (int iter = 0; iter < maxIters; ++iter) { - for(LO color = 1; color <= numColors; color++) { - Kokkos::deep_copy(aggWeight, 0); - - //the reduce counts how many nodes are aggregated by this phase, - //which will then be subtracted from numNonAggregatedNodes - LO numAggregated = 0; - Kokkos::parallel_for("Aggregation Phase 2b: updating agg weights", + // We don't check (neigh != i), as it is covered by checking + // (aggStat[neigh] == AGGREGATED) + if (lclLWGraph.isLocalNeighborVertex(neigh) && + aggStat(neigh) == AGGREGATED) + Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)), + connectWeight(neigh)); + } + + int bestScore = -100000; + int bestAggId = -1; + int bestConnect = -1; + + for (int j = 0; j < neighOfINode.length; j++) { + LO neigh = neighOfINode(j); + + if (lclLWGraph.isLocalNeighborVertex(neigh) && + aggStat(neigh) == AGGREGATED) { + auto aggId = vertex2AggId(neigh, 0); + int score = aggWeight(aggId) - aggPenalties(aggId); + + if (score > bestScore) { + bestAggId = aggId; + bestScore = score; + bestConnect = connectWeight(neigh); + + } else if (aggId == bestAggId && + connectWeight(neigh) > bestConnect) { + bestConnect = connectWeight(neigh); + } + } + } + if (bestScore >= 0) { + aggStat(i) = AGGREGATED; + vertex2AggId(i, 0) = bestAggId; + procWinner(i, 0) = myRank; + + Kokkos::atomic_add(&aggPenalties(bestAggId), 1); + connectWeight(i) = bestConnect - penaltyConnectWeight; + tmpNumAggregated++; + } + }, + numAggregated); // parallel_for + numNonAggregatedNodes -= numAggregated; + } + } // loop over maxIters + +} // BuildAggregatesRandom + +template +void AggregationPhase2bAlgorithm_kokkos:: + BuildAggregatesDeterministic(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); + const LO numColors = aggregates.GetGraphNumColors(); + LO numLocalAggregates = aggregates.GetNumAggregates(); + + auto lclLWGraph = graph.getLocalLWGraph(); + + const int defaultConnectWeight = 100; + const int penaltyConnectWeight = 10; + + Kokkos::View connectWeight(Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows); + Kokkos::View aggWeight(Kokkos::ViewAllocateWithoutInitializing("aggWeight"), numLocalAggregates); // This gets re-initialized at the start of each "color" loop + Kokkos::View aggPenaltyUpdates("aggPenaltyUpdates", numLocalAggregates); + Kokkos::View aggPenalties("aggPenalties", numLocalAggregates); + + Kokkos::deep_copy(connectWeight, defaultConnectWeight); + + // We do this cycle twice. + // I don't know why, but ML does it too + // taw: by running the aggregation routine more than once there is a chance that also + // non-aggregated nodes with a node distance of two are added to existing aggregates. + // Assuming that the aggregate size is 3 in each direction running the algorithm only twice + // should be sufficient. + int maxIters = 2; + int maxNodesPerAggregate = params.get("aggregation: max agg size"); + if (maxNodesPerAggregate == std::numeric_limits::max()) { + maxIters = 1; + } + for (int iter = 0; iter < maxIters; ++iter) { + for (LO color = 1; color <= numColors; color++) { + Kokkos::deep_copy(aggWeight, 0); + + // the reduce counts how many nodes are aggregated by this phase, + // which will then be subtracted from numNonAggregatedNodes + LO numAggregated = 0; + Kokkos::parallel_for( + "Aggregation Phase 2b: updating agg weights", Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const LO i) - { + KOKKOS_LAMBDA(const LO i) { if (aggStat(i) != READY || colors(i) != color) return; auto neighOfINode = lclLWGraph.getNeighborVertices(i); @@ -242,15 +243,15 @@ namespace MueLu { // (aggStat[neigh] == AGGREGATED) if (lclLWGraph.isLocalNeighborVertex(neigh) && aggStat(neigh) == AGGREGATED) - Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)), - connectWeight(neigh)); + Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)), + connectWeight(neigh)); } }); - Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion", + Kokkos::parallel_reduce( + "Aggregation Phase 2b: aggregates expansion", Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const LO i, LO& tmpNumAggregated) - { + KOKKOS_LAMBDA(const LO i, LO& tmpNumAggregated) { if (aggStat(i) != READY || colors(i) != color) return; int bestScore = -100000; @@ -264,7 +265,7 @@ namespace MueLu { if (lclLWGraph.isLocalNeighborVertex(neigh) && aggStat(neigh) == AGGREGATED) { auto aggId = vertex2AggId(neigh, 0); - int score = aggWeight(aggId) - aggPenalties(aggId); + int score = aggWeight(aggId) - aggPenalties(aggId); if (score > bestScore) { bestAggId = aggId; @@ -272,7 +273,7 @@ namespace MueLu { bestConnect = connectWeight(neigh); } else if (aggId == bestAggId && - connectWeight(neigh) > bestConnect) { + connectWeight(neigh) > bestConnect) { bestConnect = connectWeight(neigh); } } @@ -286,19 +287,20 @@ namespace MueLu { connectWeight(i) = bestConnect - penaltyConnectWeight; tmpNumAggregated++; } - }, numAggregated); //parallel_reduce + }, + numAggregated); // parallel_reduce - Kokkos::parallel_for("Aggregation Phase 2b: updating agg penalties", + Kokkos::parallel_for( + "Aggregation Phase 2b: updating agg penalties", Kokkos::RangePolicy(0, numLocalAggregates), - KOKKOS_LAMBDA (const LO agg) - { + KOKKOS_LAMBDA(const LO agg) { aggPenalties(agg) += aggPenaltyUpdates(agg); aggPenaltyUpdates(agg) = 0; }); - numNonAggregatedNodes -= numAggregated; - } - } // loop over k - } // BuildAggregatesDeterministic -} // end namespace + numNonAggregatedNodes -= numAggregated; + } + } // loop over k +} // BuildAggregatesDeterministic +} // namespace MueLu -#endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP +#endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_decl.hpp index 20e5fc8a7222..cfe035693b80 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_decl.hpp @@ -55,58 +55,55 @@ #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! - @class AggregationPhase3Algorithm class. - @brief Handle leftover nodes. Try to avoid singleton nodes - @ingroup Aggregation - - ### Idea ### - In phase 3 we try to stick unaggregated nodes into a neighboring aggregate. - We try to avoid singletons: we first try to build a new aggregate containing - all neighboring non-aggregated nodes. If we cannot build a new aggregate, - we add the non-aggregated node to the first adjacent aggregate. - Only if there is no adjacent aggregate, we create a singleton node aggregate. - - ### Comments ### - Only nodes with state READY are changed to AGGREGATED. - - */ - - template - class AggregationPhase3Algorithm : - public MueLu::AggregationAlgorithmBase { +/*! + @class AggregationPhase3Algorithm class. + @brief Handle leftover nodes. Try to avoid singleton nodes + @ingroup Aggregation + + ### Idea ### + In phase 3 we try to stick unaggregated nodes into a neighboring aggregate. + We try to avoid singletons: we first try to build a new aggregate containing + all neighboring non-aggregated nodes. If we cannot build a new aggregate, + we add the non-aggregated node to the first adjacent aggregate. + Only if there is no adjacent aggregate, we create a singleton node aggregate. + + ### Comments ### + Only nodes with state READY are changed to AGGREGATED. + +*/ + +template +class AggregationPhase3Algorithm : public MueLu::AggregationAlgorithmBase { #undef MUELU_AGGREGATIONPHASE3ALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase3Algorithm(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase3Algorithm(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase3Algorithm() { } + //! Destructor. + virtual ~AggregationPhase3Algorithm() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; + //@} - void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 3 (cleanup)"; } +}; - std::string description() const { return "Phase 3 (cleanup)"; } - }; - -} //namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONPHASE3ALGORITHM_SHORT - #endif /* MUELU_AGGREGATIONPHASE3ALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_def.hpp index ac462bcccf4b..e958226e5c4f 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_def.hpp @@ -60,150 +60,147 @@ namespace MueLu { - // Try to stick unaggregated nodes into a neighboring aggregate if they are - // not already too big. Otherwise, make a new aggregate - template - void AggregationPhase3Algorithm::BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); +// Try to stick unaggregated nodes into a neighboring aggregate if they are +// not already too big. Otherwise, make a new aggregate +template +void AggregationPhase3Algorithm::BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); - bool makeNonAdjAggs = false; - bool error_on_isolated = false; - if(params.isParameter("aggregation: error on nodes with no on-rank neighbors")) - error_on_isolated = params.get("aggregation: error on nodes with no on-rank neighbors"); - if(params.isParameter("aggregation: phase3 avoid singletons")) - makeNonAdjAggs = params.get("aggregation: phase3 avoid singletons"); + bool makeNonAdjAggs = false; + bool error_on_isolated = false; + if (params.isParameter("aggregation: error on nodes with no on-rank neighbors")) + error_on_isolated = params.get("aggregation: error on nodes with no on-rank neighbors"); + if (params.isParameter("aggregation: phase3 avoid singletons")) + makeNonAdjAggs = params.get("aggregation: phase3 avoid singletons"); - size_t numSingletons=0; + size_t numSingletons = 0; - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); - ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); + ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); - LO numLocalAggregates = aggregates.GetNumAggregates(); + LO numLocalAggregates = aggregates.GetNumAggregates(); - for (LO i = 0; i < numRows; i++) { - if (aggStat[i] == AGGREGATED || aggStat[i] == IGNORED) - continue; + for (LO i = 0; i < numRows; i++) { + if (aggStat[i] == AGGREGATED || aggStat[i] == IGNORED) + continue; - ArrayView neighOfINode = graph.getNeighborVertices(i); + ArrayView neighOfINode = graph.getNeighborVertices(i); - // We don't want a singleton. So lets see if there is an unaggregated - // neighbor that we can also put with this point. - bool isNewAggregate = false; - bool failedToAggregate = true; - for (int j = 0; j < neighOfINode.size(); j++) { - LO neigh = neighOfINode[j]; + // We don't want a singleton. So lets see if there is an unaggregated + // neighbor that we can also put with this point. + bool isNewAggregate = false; + bool failedToAggregate = true; + for (int j = 0; j < neighOfINode.size(); j++) { + LO neigh = neighOfINode[j]; - if (neigh != i && graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) { - isNewAggregate = true; + if (neigh != i && graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) { + isNewAggregate = true; - aggStat [neigh] = AGGREGATED; - vertex2AggId[neigh] = numLocalAggregates; - procWinner [neigh] = myRank; + aggStat[neigh] = AGGREGATED; + vertex2AggId[neigh] = numLocalAggregates; + procWinner[neigh] = myRank; - numNonAggregatedNodes--; - } + numNonAggregatedNodes--; } + } - if (isNewAggregate) { - // Create new aggregate (not singleton) - aggStat [i] = AGGREGATED; - procWinner [i] = myRank; - numNonAggregatedNodes--; - aggregates.SetIsRoot(i); - vertex2AggId[i] = numLocalAggregates++; + if (isNewAggregate) { + // Create new aggregate (not singleton) + aggStat[i] = AGGREGATED; + procWinner[i] = myRank; + numNonAggregatedNodes--; + aggregates.SetIsRoot(i); + vertex2AggId[i] = numLocalAggregates++; + + failedToAggregate = false; + } else { + // We do not want a singleton, but there are no non-aggregated + // neighbors. Lets see if we can connect to any other aggregates + // NOTE: This is very similar to phase 2b, but simplier: we stop with + // the first found aggregate + int j = 0; + for (; j < neighOfINode.size(); j++) { + LO neigh = neighOfINode[j]; - failedToAggregate = false; - } else { - // We do not want a singleton, but there are no non-aggregated - // neighbors. Lets see if we can connect to any other aggregates - // NOTE: This is very similar to phase 2b, but simplier: we stop with - // the first found aggregate - int j = 0; - for (; j < neighOfINode.size(); j++) { - LO neigh = neighOfINode[j]; - - // We don't check (neigh != rootCandidate), as it is covered by checking (aggStat[neigh] == AGGREGATED) - if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED) - break; - } - - if (j < neighOfINode.size()) { - // Assign to an adjacent aggregate - vertex2AggId[i] = vertex2AggId[neighOfINode[j]]; - numNonAggregatedNodes--; - failedToAggregate = false; - } + // We don't check (neigh != rootCandidate), as it is covered by checking (aggStat[neigh] == AGGREGATED) + if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED) + break; } - if (failedToAggregate && makeNonAdjAggs) { - // it we are still didn't find an aggregate home for i (i.e., we have - // a potential singleton), we are desperate. Basically, we seek to - // group i with any other local point to form an aggregate (even if - // it is not a neighbor of i. Either we find a vertex that is already - // aggregated or not aggregated. - // 1) if found vertex is aggregated, then assign i to this aggregate - // 2) if found vertex is not aggregated, create new aggregate - - - for (LO ii = 0; ii < numRows; ii++) { // look for anyone else - if ( (ii != i) && (aggStat[ii] != IGNORED) ) { - failedToAggregate = false; // found someone so start - aggStat[i] = AGGREGATED; // marking i as aggregated - procWinner[i]= myRank; - - if (aggStat[ii] == AGGREGATED) - vertex2AggId[i] = vertex2AggId[ii]; - else { - vertex2AggId[i] = numLocalAggregates; - vertex2AggId[ii] = numLocalAggregates; - aggStat [ii] = AGGREGATED; - procWinner [ii] = myRank; - numNonAggregatedNodes--; // acounts for ii now being aggregated - aggregates.SetIsRoot(i); - numLocalAggregates++; - } - numNonAggregatedNodes--; // accounts for i now being aggregated - break; - } //if ( (ii != i) && (aggStat[ii] != IGNORED ... - } //for (LO ii = 0; ... - } - if (failedToAggregate) { - if (error_on_isolated) { - // Error on this isolated node, as the user has requested - std::ostringstream oss; - oss<<"MueLu::AggregationPhase3Algorithm::BuildAggregates: MueLu has detected a non-Dirichlet node that has no on-rank neighbors and is terminating (by user request). "<GetOStream(Warnings1) << "Found singleton: " << i << std::endl; - numSingletons++; - - aggregates.SetIsRoot(i); - vertex2AggId[i] = numLocalAggregates++; - numNonAggregatedNodes--; - } + if (j < neighOfINode.size()) { + // Assign to an adjacent aggregate + vertex2AggId[i] = vertex2AggId[neighOfINode[j]]; + numNonAggregatedNodes--; + failedToAggregate = false; } + } + + if (failedToAggregate && makeNonAdjAggs) { + // it we are still didn't find an aggregate home for i (i.e., we have + // a potential singleton), we are desperate. Basically, we seek to + // group i with any other local point to form an aggregate (even if + // it is not a neighbor of i. Either we find a vertex that is already + // aggregated or not aggregated. + // 1) if found vertex is aggregated, then assign i to this aggregate + // 2) if found vertex is not aggregated, create new aggregate + + for (LO ii = 0; ii < numRows; ii++) { // look for anyone else + if ((ii != i) && (aggStat[ii] != IGNORED)) { + failedToAggregate = false; // found someone so start + aggStat[i] = AGGREGATED; // marking i as aggregated + procWinner[i] = myRank; + + if (aggStat[ii] == AGGREGATED) + vertex2AggId[i] = vertex2AggId[ii]; + else { + vertex2AggId[i] = numLocalAggregates; + vertex2AggId[ii] = numLocalAggregates; + aggStat[ii] = AGGREGATED; + procWinner[ii] = myRank; + numNonAggregatedNodes--; // acounts for ii now being aggregated + aggregates.SetIsRoot(i); + numLocalAggregates++; + } + numNonAggregatedNodes--; // accounts for i now being aggregated + break; + } // if ( (ii != i) && (aggStat[ii] != IGNORED ... + } // for (LO ii = 0; ... + } + if (failedToAggregate) { + if (error_on_isolated) { + // Error on this isolated node, as the user has requested + std::ostringstream oss; + oss << "MueLu::AggregationPhase3Algorithm::BuildAggregates: MueLu has detected a non-Dirichlet node that has no on-rank neighbors and is terminating (by user request). " << std::endl; + oss << "If this error is being generated at level 0, this is due to an initial partitioning problem in your matrix." << std::endl; + oss << "If this error is being generated at any other level, try turning on repartitioning, which may fix this problem." << std::endl; + throw Exceptions::RuntimeError(oss.str()); + } else { + // Create new aggregate (singleton) + // this->GetOStream(Warnings1) << "Found singleton: " << i << std::endl; + numSingletons++; - // One way or another, the node is aggregated (possibly into a singleton) - aggStat [i] = AGGREGATED; - procWinner[i] = myRank; + aggregates.SetIsRoot(i); + vertex2AggId[i] = numLocalAggregates++; + numNonAggregatedNodes--; + } + } - } // loop over numRows - + // One way or another, the node is aggregated (possibly into a singleton) + aggStat[i] = AGGREGATED; + procWinner[i] = myRank; - if(numSingletons > 0) - this->GetOStream(Runtime0)<<" WARNING Rank "< 0) + this->GetOStream(Runtime0) << " WARNING Rank " << myRank << " singletons :" << numSingletons << " (phase)" << std::endl; - // update aggregate object - aggregates.SetNumAggregates(numLocalAggregates); - } + // update aggregate object + aggregates.SetNumAggregates(numLocalAggregates); +} -} // end namespace +} // namespace MueLu #endif /* MUELU_AGGREGATIONPHASE3ALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_decl.hpp index 9911ac016c5e..5a97db952f94 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_decl.hpp @@ -58,70 +58,68 @@ #include "MueLu_LWGraph_kokkos_fwd.hpp" namespace MueLu { - /*! - @class AggregationPhase3Algorithm class. - @brief Handle leftover nodes. Try to avoid singleton nodes - @ingroup Aggregation - - ### Idea ### - In phase 3 we try to stick unaggregated nodes into a neighboring aggregate. - We try to avoid singletons: we first try to build a new aggregate containing - all neighboring non-aggregated nodes. If we cannot build a new aggregate, - we add the non-aggregated node to the first adjacent aggregate. - Only if there is no adjacent aggregate, we create a singleton node aggregate. - - ### Comments ### - Only nodes with state READY are changed to AGGREGATED. - - */ - - template - class AggregationPhase3Algorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +/*! + @class AggregationPhase3Algorithm class. + @brief Handle leftover nodes. Try to avoid singleton nodes + @ingroup Aggregation + + ### Idea ### + In phase 3 we try to stick unaggregated nodes into a neighboring aggregate. + We try to avoid singletons: we first try to build a new aggregate containing + all neighboring non-aggregated nodes. If we cannot build a new aggregate, + we add the non-aggregated node to the first adjacent aggregate. + Only if there is no adjacent aggregate, we create a singleton node aggregate. + + ### Comments ### + Only nodes with state READY are changed to AGGREGATED. + +*/ + +template +class AggregationPhase3Algorithm_kokkos : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using execution_space = typename LWGraph_kokkos::execution_space; - using memory_space = typename LWGraph_kokkos::memory_space; + public: + using device_type = typename LWGraph_kokkos::device_type; + using execution_space = typename LWGraph_kokkos::execution_space; + using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase3Algorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase3Algorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase3Algorithm_kokkos() { } + //! Destructor. + virtual ~AggregationPhase3Algorithm_kokkos() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; - void BuildAggregates(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesRandom(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; + //@} - void BuildAggregatesRandom(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 3 (cleanup)"; } +}; - std::string description() const { return "Phase 3 (cleanup)"; } - }; - -} //namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_SHORT -#endif // MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_DECL_HPP +#endif // MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_def.hpp index 4bacd24017cd..a684149e179f 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_def.hpp @@ -62,171 +62,171 @@ namespace MueLu { - // Try to stick unaggregated nodes into a neighboring aggregate if they are - // not already too big. Otherwise, make a new aggregate - template - void AggregationPhase3Algorithm_kokkos:: - BuildAggregates(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - - // So far we only have the non-deterministic version of the algorithm... - if(params.get("aggregation: deterministic")) { - Monitor m(*this, "BuildAggregatesDeterministic"); - BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); - } else { - Monitor m(*this, "BuildAggregatesRandom"); - BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); - } - +// Try to stick unaggregated nodes into a neighboring aggregate if they are +// not already too big. Otherwise, make a new aggregate +template +void AggregationPhase3Algorithm_kokkos:: + BuildAggregates(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + // So far we only have the non-deterministic version of the algorithm... + if (params.get("aggregation: deterministic")) { + Monitor m(*this, "BuildAggregatesDeterministic"); + BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); + } else { + Monitor m(*this, "BuildAggregatesRandom"); + BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); } - - // Try to stick unaggregated nodes into a neighboring aggregate if they are - // not already too big. Otherwise, make a new aggregate - template - void AggregationPhase3Algorithm_kokkos:: - BuildAggregatesRandom(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - - bool error_on_isolated = params.get("aggregation: error on nodes with no on-rank neighbors"); - bool makeNonAdjAggs = params.get("aggregation: phase3 avoid singletons"); - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); - const LO numColors = aggregates.GetGraphNumColors(); - - auto lclLWGraph = graph.getLocalLWGraph(); - - Kokkos::View numAggregates("numAggregates"); - Kokkos::deep_copy(numAggregates, aggregates.GetNumAggregates()); - - Kokkos::View aggStatOld(Kokkos::ViewAllocateWithoutInitializing("Initial aggregation status"), aggStat.extent(0)); +} + +// Try to stick unaggregated nodes into a neighboring aggregate if they are +// not already too big. Otherwise, make a new aggregate +template +void AggregationPhase3Algorithm_kokkos:: + BuildAggregatesRandom(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + bool error_on_isolated = params.get("aggregation: error on nodes with no on-rank neighbors"); + bool makeNonAdjAggs = params.get("aggregation: phase3 avoid singletons"); + + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); + const LO numColors = aggregates.GetGraphNumColors(); + + auto lclLWGraph = graph.getLocalLWGraph(); + + Kokkos::View numAggregates("numAggregates"); + Kokkos::deep_copy(numAggregates, aggregates.GetNumAggregates()); + + Kokkos::View aggStatOld(Kokkos::ViewAllocateWithoutInitializing("Initial aggregation status"), aggStat.extent(0)); + Kokkos::deep_copy(aggStatOld, aggStat); + Kokkos::View numNonAggregated("numNonAggregated"); + Kokkos::deep_copy(numNonAggregated, numNonAggregatedNodes); + for (int color = 1; color < numColors + 1; ++color) { + Kokkos::parallel_for( + "Aggregation Phase 3: aggregates clean-up", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO nodeIdx) { + // Check if node has already been treated? + if ((colors(nodeIdx) != color) || + (aggStatOld(nodeIdx) == AGGREGATED) || + (aggStatOld(nodeIdx) == IGNORED)) { + return; + } + + // Grab node neighbors + auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx); + LO neighIdx; + + // We don't want a singleton. + // So lets see if any neighbors can be used to form a new aggregate? + bool isNewAggregate = false; + for (int neigh = 0; neigh < neighbors.length; ++neigh) { + neighIdx = neighbors(neigh); + + if ((neighIdx != nodeIdx) && + lclLWGraph.isLocalNeighborVertex(neighIdx) && + (aggStatOld(neighIdx) == READY)) { + isNewAggregate = true; + break; + } + } + + // We can form a new non singleton aggregate! + if (isNewAggregate) { + // If this is the aggregate root + // we need to process the nodes in the aggregate + const LO aggId = Kokkos::atomic_fetch_add(&numAggregates(), 1); + aggStat(nodeIdx) = AGGREGATED; + procWinner(nodeIdx, 0) = myRank; + vertex2AggId(nodeIdx, 0) = aggId; + // aggregates.SetIsRoot(nodeIdx); + Kokkos::atomic_decrement(&numNonAggregated()); + for (int neigh = 0; neigh < neighbors.length; ++neigh) { + neighIdx = neighbors(neigh); + if ((neighIdx != nodeIdx) && + lclLWGraph.isLocalNeighborVertex(neighIdx) && + (aggStatOld(neighIdx) == READY)) { + aggStat(neighIdx) = AGGREGATED; + procWinner(neighIdx, 0) = myRank; + vertex2AggId(neighIdx, 0) = aggId; + Kokkos::atomic_decrement(&numNonAggregated()); + } + } + return; + } + + // Getting a little desperate! + // Let us try to aggregate into a neighboring aggregate + for (int neigh = 0; neigh < neighbors.length; ++neigh) { + neighIdx = neighbors(neigh); + if (lclLWGraph.isLocalNeighborVertex(neighIdx) && + (aggStatOld(neighIdx) == AGGREGATED)) { + aggStat(nodeIdx) = AGGREGATED; + procWinner(nodeIdx, 0) = myRank; + vertex2AggId(nodeIdx, 0) = vertex2AggId(neighIdx, 0); + Kokkos::atomic_decrement(&numNonAggregated()); + return; + } + } + + // Getting quite desperate! + // Let us try to make a non contiguous aggregate + if (makeNonAdjAggs) { + for (LO otherNodeIdx = 0; otherNodeIdx < numRows; ++otherNodeIdx) { + if ((otherNodeIdx != nodeIdx) && + (aggStatOld(otherNodeIdx) == AGGREGATED)) { + aggStat(nodeIdx) = AGGREGATED; + procWinner(nodeIdx, 0) = myRank; + vertex2AggId(nodeIdx, 0) = vertex2AggId(otherNodeIdx, 0); + Kokkos::atomic_decrement(&numNonAggregated()); + return; + } + } + } + + // Total deperation! + // Let us make a singleton + if (!error_on_isolated) { + const LO aggId = Kokkos::atomic_fetch_add(&numAggregates(), 1); + aggStat(nodeIdx) = AGGREGATED; + procWinner(nodeIdx, 0) = myRank; + vertex2AggId(nodeIdx, 0) = aggId; + Kokkos::atomic_decrement(&numNonAggregated()); + } + }); + // LBV on 09/27/19: here we could copy numNonAggregated to host + // and check for it to be equal to 0 in which case we can stop + // looping over the different colors... Kokkos::deep_copy(aggStatOld, aggStat); - Kokkos::View numNonAggregated("numNonAggregated"); - Kokkos::deep_copy(numNonAggregated, numNonAggregatedNodes); - for(int color = 1; color < numColors + 1; ++color) { - Kokkos::parallel_for("Aggregation Phase 3: aggregates clean-up", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA(const LO nodeIdx) { - // Check if node has already been treated? - if( (colors(nodeIdx) != color) || - (aggStatOld(nodeIdx) == AGGREGATED) || - (aggStatOld(nodeIdx) == IGNORED) ){ return; } - - // Grab node neighbors - auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx); - LO neighIdx; - - // We don't want a singleton. - // So lets see if any neighbors can be used to form a new aggregate? - bool isNewAggregate = false; - for(int neigh = 0; neigh < neighbors.length; ++neigh) { - neighIdx = neighbors(neigh); - - if((neighIdx != nodeIdx) && - lclLWGraph.isLocalNeighborVertex(neighIdx) && - (aggStatOld(neighIdx) == READY)) { - isNewAggregate = true; - break; - } - } - - // We can form a new non singleton aggregate! - if(isNewAggregate) { - // If this is the aggregate root - // we need to process the nodes in the aggregate - const LO aggId = Kokkos::atomic_fetch_add(&numAggregates(), 1); - aggStat(nodeIdx) = AGGREGATED; - procWinner(nodeIdx, 0) = myRank; - vertex2AggId(nodeIdx, 0) = aggId; - // aggregates.SetIsRoot(nodeIdx); - Kokkos::atomic_decrement(&numNonAggregated()); - for(int neigh = 0; neigh < neighbors.length; ++neigh) { - neighIdx = neighbors(neigh); - if((neighIdx != nodeIdx) && - lclLWGraph.isLocalNeighborVertex(neighIdx) && - (aggStatOld(neighIdx) == READY)) { - aggStat(neighIdx) = AGGREGATED; - procWinner(neighIdx, 0) = myRank; - vertex2AggId(neighIdx, 0) = aggId; - Kokkos::atomic_decrement(&numNonAggregated()); - } - } - return; - } - - // Getting a little desperate! - // Let us try to aggregate into a neighboring aggregate - for(int neigh = 0; neigh < neighbors.length; ++neigh) { - neighIdx = neighbors(neigh); - if (lclLWGraph.isLocalNeighborVertex(neighIdx) && - (aggStatOld(neighIdx) == AGGREGATED)) { - aggStat(nodeIdx) = AGGREGATED; - procWinner(nodeIdx, 0) = myRank; - vertex2AggId(nodeIdx, 0) = vertex2AggId(neighIdx, 0); - Kokkos::atomic_decrement(&numNonAggregated()); - return; - } - } - - // Getting quite desperate! - // Let us try to make a non contiguous aggregate - if(makeNonAdjAggs) { - for(LO otherNodeIdx = 0; otherNodeIdx < numRows; ++otherNodeIdx) { - if((otherNodeIdx != nodeIdx) && - (aggStatOld(otherNodeIdx) == AGGREGATED)) { - aggStat(nodeIdx) = AGGREGATED; - procWinner(nodeIdx, 0) = myRank; - vertex2AggId(nodeIdx, 0) = vertex2AggId(otherNodeIdx, 0); - Kokkos::atomic_decrement(&numNonAggregated()); - return; - } - } - } - - // Total deperation! - // Let us make a singleton - if(!error_on_isolated) { - const LO aggId = Kokkos::atomic_fetch_add(&numAggregates(), 1); - aggStat(nodeIdx) = AGGREGATED; - procWinner(nodeIdx, 0) = myRank; - vertex2AggId(nodeIdx, 0) = aggId; - Kokkos::atomic_decrement(&numNonAggregated()); - } - }); - // LBV on 09/27/19: here we could copy numNonAggregated to host - // and check for it to be equal to 0 in which case we can stop - // looping over the different colors... - Kokkos::deep_copy(aggStatOld, aggStat); - } // loop over colors - - auto numNonAggregated_h = Kokkos::create_mirror_view(numNonAggregated); - Kokkos::deep_copy(numNonAggregated_h, numNonAggregated); - numNonAggregatedNodes = numNonAggregated_h(); - if( (error_on_isolated) && (numNonAggregatedNodes > 0) ) { - // Error on this isolated node, as the user has requested - std::ostringstream oss; - oss<<"MueLu::AggregationPhase3Algorithm::BuildAggregates: MueLu has detected a non-Dirichlet node that has no on-rank neighbors and is terminating (by user request). "< 0)) { + // Error on this isolated node, as the user has requested + std::ostringstream oss; + oss << "MueLu::AggregationPhase3Algorithm::BuildAggregates: MueLu has detected a non-Dirichlet node that has no on-rank neighbors and is terminating (by user request). " << std::endl; + oss << "If this error is being generated at level 0, this is due to an initial partitioning problem in your matrix." << std::endl; + oss << "If this error is being generated at any other level, try turning on repartitioning, which may fix this problem." << std::endl; + throw Exceptions::RuntimeError(oss.str()); } -} // end namespace + // update aggregate object + auto numAggregates_h = Kokkos::create_mirror_view(numAggregates); + Kokkos::deep_copy(numAggregates_h, numAggregates); + aggregates.SetNumAggregates(numAggregates_h()); +} + +} // namespace MueLu -#endif // MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_DEF_HPP +#endif // MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_decl.hpp index fb3425d5058c..fd383eca5274 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_decl.hpp @@ -63,54 +63,51 @@ #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! - @class InterfaceAggregationAlgorithm class. - @brief Algorithm for coarsening a graph with uncoupled aggregation. - creates aggregates along an interface using specified root nodes. - - @ingroup Aggregation - - ### Idea ### - The user can mark some nodes as INTERFACE to build aggregates across an interface. - This can be very useful for certain applications. We build aggregates for nodes with - the state INTERFACE. Then, the state is changed to AGGREGATED. - The InterfaceAggregationAlgorithm should run before the Phase1AggregationAlgorithm. - - */ - - template - class InterfaceAggregationAlgorithm : - public MueLu::AggregationAlgorithmBase { -#undef MUELU_INTERFACEAGGREGATIONALGORITHM_SHORT -#include "MueLu_UseShortNamesOrdinal.hpp" +/*! + @class InterfaceAggregationAlgorithm class. + @brief Algorithm for coarsening a graph with uncoupled aggregation. + creates aggregates along an interface using specified root nodes. + + @ingroup Aggregation - public: - //! @name Constructors/Destructors. - //@{ + ### Idea ### + The user can mark some nodes as INTERFACE to build aggregates across an interface. + This can be very useful for certain applications. We build aggregates for nodes with + the state INTERFACE. Then, the state is changed to AGGREGATED. + The InterfaceAggregationAlgorithm should run before the Phase1AggregationAlgorithm. - //! Constructor. - InterfaceAggregationAlgorithm(RCP const &graphFact = Teuchos::null); +*/ + +template +class InterfaceAggregationAlgorithm : public MueLu::AggregationAlgorithmBase { +#undef MUELU_INTERFACEAGGREGATIONALGORITHM_SHORT +#include "MueLu_UseShortNamesOrdinal.hpp" - //! Destructor. - virtual ~InterfaceAggregationAlgorithm() { } + public: + //! @name Constructors/Destructors. + //@{ - //@} + //! Constructor. + InterfaceAggregationAlgorithm(RCP const& graphFact = Teuchos::null); + //! Destructor. + virtual ~InterfaceAggregationAlgorithm() {} - //! @name Aggregation methods. - //@{ + //@} - /*! @brief Local aggregation. */ + //! @name Aggregation methods. + //@{ - void BuildAggregates(Teuchos::ParameterList const & params, GraphBase const & graph, Aggregates & aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + /*! @brief Local aggregation. */ + void BuildAggregates(Teuchos::ParameterList const& params, GraphBase const& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; + //@} - }; //class InterfaceAggregationAlgorithm +}; // class InterfaceAggregationAlgorithm -} //namespace MueLu +} // namespace MueLu #define MUELU_INTERFACEAGGREGATIONALGORITHM_SHORT #endif /* MUELU_INTERFACEAGGREGATIONALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_def.hpp index 22dd58e56fad..0ec5f6842c22 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_def.hpp @@ -69,40 +69,37 @@ namespace MueLu { template -InterfaceAggregationAlgorithm::InterfaceAggregationAlgorithm(RCP const &/* graphFact */) -{ +InterfaceAggregationAlgorithm::InterfaceAggregationAlgorithm(RCP const& /* graphFact */) { } template -void InterfaceAggregationAlgorithm::BuildAggregates(Teuchos::ParameterList const & /* params */, GraphBase const & graph, Aggregates & aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { +void InterfaceAggregationAlgorithm::BuildAggregates(Teuchos::ParameterList const& /* params */, GraphBase const& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { Monitor m(*this, "BuildAggregates"); const LocalOrdinal nRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); + const int myRank = graph.GetComm()->getRank(); // vertex ids for output Teuchos::ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); Teuchos::ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); // some internal variables - LocalOrdinal numLocalAggregates = aggregates.GetNumAggregates(); // number of local aggregates on current proc + LocalOrdinal numLocalAggregates = aggregates.GetNumAggregates(); // number of local aggregates on current proc // main loop over all local rows of graph(A) - for(int iNode1 = 0; iNode1 < nRows; ++iNode1) { - + for (int iNode1 = 0; iNode1 < nRows; ++iNode1) { if (aggStat[iNode1] == INTERFACE) { - - aggregates.SetIsRoot(iNode1); // mark iNode1 as root node for new aggregate 'agg' + aggregates.SetIsRoot(iNode1); // mark iNode1 as root node for new aggregate 'agg' int aggIndex = numLocalAggregates; std::vector aggList; aggList.push_back(iNode1); ArrayView neighOfINode = graph.getNeighborVertices(iNode1); - for(int j = 0; j < neighOfINode.size(); ++j) { + for (int j = 0; j < neighOfINode.size(); ++j) { LO neigh = neighOfINode[j]; - if(neigh != iNode1 && graph.isLocalNeighborVertex(neigh)) { - if(aggStat[neigh] != AGGREGATED && aggStat[neigh] != INTERFACE && - aggStat[neigh] != IGNORED) { + if (neigh != iNode1 && graph.isLocalNeighborVertex(neigh)) { + if (aggStat[neigh] != AGGREGATED && aggStat[neigh] != INTERFACE && + aggStat[neigh] != IGNORED) { aggList.push_back(neigh); } } @@ -117,13 +114,12 @@ void InterfaceAggregationAlgorithm::BuildAggr numNonAggregatedNodes -= aggList.size(); } - } // end for + } // end for // update aggregate object aggregates.SetNumAggregates(numLocalAggregates); } -} // end namespace - +} // namespace MueLu #endif /* MUELU_INTERFACEAGGREGATIONALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_decl.hpp index 47ebb8038952..b89d3e6a83a5 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_decl.hpp @@ -57,61 +57,58 @@ #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! - @class IsolatedNodeAggregationAlgorithm class. - @brief Ignores isolated nodes during aggregation. Marks the node to be "aggregated" without adding real aggregates for them. - - @ingroup Aggregation - - ### Idea ### - The isolated node aggregation algorithm loops over all non-aggregated nodes - (with a state different than aggregated or ignored) which have only themselves - as neighbor node. The state of these "isolated" nodes is then set to ignored such - that they are not considered in the aggregation. This aggregation algorithm should - run as one of the last aggregation algorithms in the aggregation method. - - ### Comments ### - Only nodes with state different than READY or AGGREGATED are changed to IGNORED. - After that, all nodes should have the state AGGREGATED or IGNORED. - - */ - - template - class IsolatedNodeAggregationAlgorithm : - public MueLu::AggregationAlgorithmBase { +/*! + @class IsolatedNodeAggregationAlgorithm class. + @brief Ignores isolated nodes during aggregation. Marks the node to be "aggregated" without adding real aggregates for them. + + @ingroup Aggregation + + ### Idea ### + The isolated node aggregation algorithm loops over all non-aggregated nodes + (with a state different than aggregated or ignored) which have only themselves + as neighbor node. The state of these "isolated" nodes is then set to ignored such + that they are not considered in the aggregation. This aggregation algorithm should + run as one of the last aggregation algorithms in the aggregation method. + + ### Comments ### + Only nodes with state different than READY or AGGREGATED are changed to IGNORED. + After that, all nodes should have the state AGGREGATED or IGNORED. + +*/ + +template +class IsolatedNodeAggregationAlgorithm : public MueLu::AggregationAlgorithmBase { #undef MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - IsolatedNodeAggregationAlgorithm(const RCP& /* graphFact */ = Teuchos::null) { } + public: + //! @name Constructors/Destructors. + //@{ - //! Destructor. - virtual ~IsolatedNodeAggregationAlgorithm() { } + //! Constructor. + IsolatedNodeAggregationAlgorithm(const RCP& /* graphFact */ = Teuchos::null) {} - //@} + //! Destructor. + virtual ~IsolatedNodeAggregationAlgorithm() {} + //@} - //! @name Aggregation methods. - //@{ + //! @name Aggregation methods. + //@{ - /*! @brief Local aggregation. */ + /*! @brief Local aggregation. */ - void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; + //@} - std::string description() const { return "Phase - (isolated)"; } + std::string description() const { return "Phase - (isolated)"; } - }; //class MaxLinkAggregationAlgorithm +}; // class MaxLinkAggregationAlgorithm -} //namespace MueLu +} // namespace MueLu #define MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_SHORT - #endif /* MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_def.hpp index 77147d7990a8..73dd4eddd6c0 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_def.hpp @@ -53,7 +53,6 @@ #ifndef MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_DEF_HPP_ #define MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_DEF_HPP_ - #include #include @@ -68,20 +67,20 @@ namespace MueLu { - template - void IsolatedNodeAggregationAlgorithm::BuildAggregates(const ParameterList& /* params */, const GraphBase& graph, Aggregates& /* aggregates */, std::vector& aggStat, LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); +template +void IsolatedNodeAggregationAlgorithm::BuildAggregates(const ParameterList& /* params */, const GraphBase& graph, Aggregates& /* aggregates */, std::vector& aggStat, LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); - const LO numRows = graph.GetNodeNumVertices(); + const LO numRows = graph.GetNodeNumVertices(); - // Remove all isolated nodes - for (LO i = 0; i < numRows; i++) - if (aggStat[i] != AGGREGATED && aggStat[i] != IGNORED && graph.getNeighborVertices(i).size() == 1) { - aggStat[i] = IGNORED; - numNonAggregatedNodes--; - } - } + // Remove all isolated nodes + for (LO i = 0; i < numRows; i++) + if (aggStat[i] != AGGREGATED && aggStat[i] != IGNORED && graph.getNeighborVertices(i).size() == 1) { + aggStat[i] = IGNORED; + numNonAggregatedNodes--; + } +} -} // end namespace +} // namespace MueLu #endif /* MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_decl.hpp index c5bbb6e9c4b9..afa979fbf9e4 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_decl.hpp @@ -58,65 +58,63 @@ #include "MueLu_LWGraph_kokkos.hpp" namespace MueLu { - /*! - @class IsolatedNodeAggregationAlgorithm class. - @brief Ignores isolated nodes during aggregation. Marks the node to be "aggregated" without adding real aggregates for them. - - @ingroup Aggregation - - ### Idea ### - The isolated node aggregation algorithm loops over all non-aggregated nodes - (with a state different than aggregated or ignored) which have only themselves - as neighbor node. The state of these "isolated" nodes is then set to ignored such - that they are not considered in the aggregation. This aggregation algorithm should - run as one of the last aggregation algorithms in the aggregation method. - - ### Comments ### - Only nodes with state different than READY or AGGREGATED are changed to IGNORED. - After that, all nodes should have the state AGGREGATED or IGNORED. - - */ - - template - class IsolatedNodeAggregationAlgorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +/*! + @class IsolatedNodeAggregationAlgorithm class. + @brief Ignores isolated nodes during aggregation. Marks the node to be "aggregated" without adding real aggregates for them. + + @ingroup Aggregation + + ### Idea ### + The isolated node aggregation algorithm loops over all non-aggregated nodes + (with a state different than aggregated or ignored) which have only themselves + as neighbor node. The state of these "isolated" nodes is then set to ignored such + that they are not considered in the aggregation. This aggregation algorithm should + run as one of the last aggregation algorithms in the aggregation method. + + ### Comments ### + Only nodes with state different than READY or AGGREGATED are changed to IGNORED. + After that, all nodes should have the state AGGREGATED or IGNORED. + +*/ + +template +class IsolatedNodeAggregationAlgorithm_kokkos : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - IsolatedNodeAggregationAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) { } + public: + using device_type = typename LWGraph_kokkos::device_type; + using memory_space = typename LWGraph_kokkos::memory_space; + //! @name Constructors/Destructors. + //@{ - //! Destructor. - virtual ~IsolatedNodeAggregationAlgorithm_kokkos() { } + //! Constructor. + IsolatedNodeAggregationAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) {} - //@} + //! Destructor. + virtual ~IsolatedNodeAggregationAlgorithm_kokkos() {} + //@} - //! @name Aggregation methods. - //@{ + //! @name Aggregation methods. + //@{ - /*! @brief Local aggregation. */ + /*! @brief Local aggregation. */ - void BuildAggregates(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} + void BuildAggregates(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; + //@} - std::string description() const { return "Phase - (isolated)"; } + std::string description() const { return "Phase - (isolated)"; } - }; //class MaxLinkAggregationAlgorithm +}; // class MaxLinkAggregationAlgorithm -} //namespace MueLu +} // namespace MueLu #define MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_KOKKOS_SHORT -#endif // MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_DECL_HPP +#endif // MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_def.hpp index 96ff102a447a..b37aa733ed03 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_def.hpp @@ -60,41 +60,40 @@ namespace MueLu { - template - void IsolatedNodeAggregationAlgorithm_kokkos:: - BuildAggregates(const ParameterList& /* params */, - const LWGraph_kokkos& graph, - Aggregates& /* aggregates */, - Kokkos::View& aggstat, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); +template +void IsolatedNodeAggregationAlgorithm_kokkos:: + BuildAggregates(const ParameterList& /* params */, + const LWGraph_kokkos& graph, + Aggregates& /* aggregates */, + Kokkos::View& aggstat, + LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); - typename Kokkos::View::HostMirror aggstatHost - = Kokkos::create_mirror(aggstat); - Kokkos::deep_copy(aggstatHost, aggstat); - std::vector aggStat; - aggStat.resize(aggstatHost.extent(0)); - for(size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { - aggStat[idx] = aggstatHost(idx); - } - - auto lclLWGraph = graph.getLocalLWGraph(); + typename Kokkos::View::HostMirror aggstatHost = Kokkos::create_mirror(aggstat); + Kokkos::deep_copy(aggstatHost, aggstat); + std::vector aggStat; + aggStat.resize(aggstatHost.extent(0)); + for (size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { + aggStat[idx] = aggstatHost(idx); + } - const LO numRows = graph.GetNodeNumVertices(); + auto lclLWGraph = graph.getLocalLWGraph(); - // Remove all isolated nodes - for (LO i = 0; i < numRows; i++) - if (aggStat[i] != AGGREGATED && aggStat[i] != IGNORED && lclLWGraph.getNeighborVertices(i).length == 1) { - aggStat[i] = IGNORED; - numNonAggregatedNodes--; - } + const LO numRows = graph.GetNodeNumVertices(); - for(size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { - aggstatHost(idx) = aggStat[idx]; + // Remove all isolated nodes + for (LO i = 0; i < numRows; i++) + if (aggStat[i] != AGGREGATED && aggStat[i] != IGNORED && lclLWGraph.getNeighborVertices(i).length == 1) { + aggStat[i] = IGNORED; + numNonAggregatedNodes--; } - Kokkos::deep_copy(aggstat, aggstatHost); + + for (size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { + aggstatHost(idx) = aggStat[idx]; } + Kokkos::deep_copy(aggstat, aggstatHost); +} -} // end namespace +} // namespace MueLu -#endif // MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_KOKKOS_DEF_HPP +#endif // MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_decl.hpp index 6a77eb0d4a29..c7848f6106fa 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_decl.hpp @@ -63,57 +63,54 @@ #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! - @class OnePtAggregationAlgorithm class. - @brief Algorithm for coarsening a graph with uncoupled aggregation. - keep special marked nodes as singleton node aggregates over all multigrid levels +/*! + @class OnePtAggregationAlgorithm class. + @brief Algorithm for coarsening a graph with uncoupled aggregation. + keep special marked nodes as singleton node aggregates over all multigrid levels - @ingroup Aggregation + @ingroup Aggregation - ### Idea ### - The user can mark some nodes as ONEPT to build some single node aggregates. - This can be very useful for certain applications. We build single node aggregates - for nodes with the state ONEPT. Then, the state is changed to ignored. - The OnePtAggregationAlgorithm should run before the Phase1AggregationAlgorithm. + ### Idea ### + The user can mark some nodes as ONEPT to build some single node aggregates. + This can be very useful for certain applications. We build single node aggregates + for nodes with the state ONEPT. Then, the state is changed to ignored. + The OnePtAggregationAlgorithm should run before the Phase1AggregationAlgorithm. - ### Comments ### - Only nodes with state ONEPT are changed to IGNORED. + ### Comments ### + Only nodes with state ONEPT are changed to IGNORED. - */ +*/ - template - class OnePtAggregationAlgorithm : - public MueLu::AggregationAlgorithmBase { +template +class OnePtAggregationAlgorithm : public MueLu::AggregationAlgorithmBase { #undef MUELU_ONEPTAGGREGATIONALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - OnePtAggregationAlgorithm(RCP const &graphFact = Teuchos::null); + //! Constructor. + OnePtAggregationAlgorithm(RCP const& graphFact = Teuchos::null); - //! Destructor. - virtual ~OnePtAggregationAlgorithm() { } + //! Destructor. + virtual ~OnePtAggregationAlgorithm() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(Teuchos::ParameterList const& params, GraphBase const& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; + //@} - void BuildAggregates(Teuchos::ParameterList const & params, GraphBase const & graph, Aggregates & aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} +}; // class OnePtAggregationAlgorithm - - }; //class OnePtAggregationAlgorithm - -} //namespace MueLu +} // namespace MueLu #define MUELU_ONEPTAGGREGATIONALGORITHM_SHORT #endif /* MUELU_ONEPTAGGREGATIONALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_def.hpp index 687778c05654..b85b3cc3a927 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_def.hpp @@ -69,51 +69,47 @@ namespace MueLu { template -OnePtAggregationAlgorithm::OnePtAggregationAlgorithm(RCP const &/* graphFact */) -{ +OnePtAggregationAlgorithm::OnePtAggregationAlgorithm(RCP const& /* graphFact */) { } template -void OnePtAggregationAlgorithm::BuildAggregates(Teuchos::ParameterList const & /* params */, GraphBase const & graph, Aggregates & aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { +void OnePtAggregationAlgorithm::BuildAggregates(Teuchos::ParameterList const& /* params */, GraphBase const& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { Monitor m(*this, "BuildAggregates"); const LocalOrdinal nRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); + const int myRank = graph.GetComm()->getRank(); // vertex ids for output Teuchos::ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); Teuchos::ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); // some internal variables - LocalOrdinal nLocalAggregates = aggregates.GetNumAggregates(); // number of local aggregates on current proc - LocalOrdinal iNode1 = 0; // current node + LocalOrdinal nLocalAggregates = aggregates.GetNumAggregates(); // number of local aggregates on current proc + LocalOrdinal iNode1 = 0; // current node // main loop over all local rows of graph(A) while (iNode1 < nRows) { - if (aggStat[iNode1] == ONEPT) { - - aggregates.SetIsRoot(iNode1); // mark iNode1 as root node for new aggregate 'ag' + aggregates.SetIsRoot(iNode1); // mark iNode1 as root node for new aggregate 'ag' std::vector aggList; aggList.push_back(iNode1); int aggIndex = nLocalAggregates++; for (size_t k = 0; k < aggList.size(); k++) { - aggStat[aggList[k]] = IGNORED; + aggStat[aggList[k]] = IGNORED; vertex2AggId[aggList[k]] = aggIndex; - procWinner[aggList[k]] = myRank; + procWinner[aggList[k]] = myRank; } numNonAggregatedNodes -= aggList.size(); } iNode1++; - } // end while + } // end while // update aggregate object aggregates.SetNumAggregates(nLocalAggregates); } -} // end namespace - +} // namespace MueLu #endif /* MUELU_ONEPTAGGREGATIONALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_decl.hpp index 1cae818205c2..986c2be498ca 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_decl.hpp @@ -58,63 +58,60 @@ #include "MueLu_LWGraph_kokkos.hpp" namespace MueLu { - /*! - @class OnePtAggregationAlgorithm class. - @brief Algorithm for coarsening a graph with uncoupled aggregation. - keep special marked nodes as singleton node aggregates over all multigrid levels +/*! + @class OnePtAggregationAlgorithm class. + @brief Algorithm for coarsening a graph with uncoupled aggregation. + keep special marked nodes as singleton node aggregates over all multigrid levels - @ingroup Aggregation + @ingroup Aggregation - ### Idea ### - The user can mark some nodes as ONEPT to build some single node aggregates. - This can be very useful for certain applications. We build single node aggregates - for nodes with the state ONEPT. Then, the state is changed to ignored. - The OnePtAggregationAlgorithm should run before the Phase1AggregationAlgorithm. + ### Idea ### + The user can mark some nodes as ONEPT to build some single node aggregates. + This can be very useful for certain applications. We build single node aggregates + for nodes with the state ONEPT. Then, the state is changed to ignored. + The OnePtAggregationAlgorithm should run before the Phase1AggregationAlgorithm. - ### Comments ### - Only nodes with state ONEPT are changed to IGNORED. + ### Comments ### + Only nodes with state ONEPT are changed to IGNORED. - */ +*/ - template - class OnePtAggregationAlgorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +template +class OnePtAggregationAlgorithm_kokkos : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_ONEPTAGGREGATIONALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ + public: + using device_type = typename LWGraph_kokkos::device_type; + using memory_space = typename LWGraph_kokkos::memory_space; + //! @name Constructors/Destructors. + //@{ - //! Constructor. - OnePtAggregationAlgorithm_kokkos(RCP const &graphFact = Teuchos::null); + //! Constructor. + OnePtAggregationAlgorithm_kokkos(RCP const& graphFact = Teuchos::null); - //! Destructor. - virtual ~OnePtAggregationAlgorithm_kokkos() { } + //! Destructor. + virtual ~OnePtAggregationAlgorithm_kokkos() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(Teuchos::ParameterList const& params, + LWGraph_kokkos const& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; + //@} - void BuildAggregates(Teuchos::ParameterList const & params, - LWGraph_kokkos const & graph, - Aggregates & aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} +}; // class OnePtAggregationAlgorithm_kokkos - - }; //class OnePtAggregationAlgorithm_kokkos - -} //namespace MueLu +} // namespace MueLu #define MUELU_ONEPTAGGREGATIONALGORITHM_KOKKOS_SHORT -#endif // MUELU_ONEPTAGGREGATIONALGORITHM_KOKKOS_DECL_HPP +#endif // MUELU_ONEPTAGGREGATIONALGORITHM_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_def.hpp index 82dd4881b96c..f016c974ffe4 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_def.hpp @@ -60,71 +60,67 @@ namespace MueLu { - template - OnePtAggregationAlgorithm_kokkos::OnePtAggregationAlgorithm_kokkos(RCP const &/* graphFact */) - { +template +OnePtAggregationAlgorithm_kokkos::OnePtAggregationAlgorithm_kokkos(RCP const& /* graphFact */) { +} + +template +void OnePtAggregationAlgorithm_kokkos:: + BuildAggregates(Teuchos::ParameterList const& /* params */, + LWGraph_kokkos const& graph, + Aggregates& aggregates, + Kokkos::View& aggstat, + LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + + typename Kokkos::View::HostMirror aggstatHost = Kokkos::create_mirror(aggstat); + Kokkos::deep_copy(aggstatHost, aggstat); + std::vector aggStat; + aggStat.resize(aggstatHost.extent(0)); + for (size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { + aggStat[idx] = aggstatHost(idx); } - template - void OnePtAggregationAlgorithm_kokkos:: - BuildAggregates(Teuchos::ParameterList const & /* params */, - LWGraph_kokkos const & graph, - Aggregates & aggregates, - Kokkos::View& aggstat, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - - typename Kokkos::View::HostMirror aggstatHost - = Kokkos::create_mirror(aggstat); - Kokkos::deep_copy(aggstatHost, aggstat); - std::vector aggStat; - aggStat.resize(aggstatHost.extent(0)); - for(size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { - aggStat[idx] = aggstatHost(idx); - } - - const LocalOrdinal nRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - // vertex ids for output - Teuchos::ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - Teuchos::ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); - - // some internal variables - LocalOrdinal nLocalAggregates = aggregates.GetNumAggregates(); // number of local aggregates on current proc - LocalOrdinal iNode1 = 0; // current node - - // main loop over all local rows of graph(A) - while (iNode1 < nRows) { - - if (aggStat[iNode1] == ONEPT) { - - aggregates.SetIsRoot(iNode1); // mark iNode1 as root node for new aggregate 'ag' - std::vector aggList; - aggList.push_back(iNode1); - int aggIndex = nLocalAggregates++; - - // finalize aggregate - for (size_t k = 0; k < aggList.size(); k++) { - aggStat[aggList[k]] = IGNORED; - vertex2AggId[aggList[k]] = aggIndex; - procWinner[aggList[k]] = myRank; - } - numNonAggregatedNodes -= aggList.size(); + const LocalOrdinal nRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + // vertex ids for output + Teuchos::ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + Teuchos::ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); + + // some internal variables + LocalOrdinal nLocalAggregates = aggregates.GetNumAggregates(); // number of local aggregates on current proc + LocalOrdinal iNode1 = 0; // current node + + // main loop over all local rows of graph(A) + while (iNode1 < nRows) { + if (aggStat[iNode1] == ONEPT) { + aggregates.SetIsRoot(iNode1); // mark iNode1 as root node for new aggregate 'ag' + std::vector aggList; + aggList.push_back(iNode1); + int aggIndex = nLocalAggregates++; + + // finalize aggregate + for (size_t k = 0; k < aggList.size(); k++) { + aggStat[aggList[k]] = IGNORED; + vertex2AggId[aggList[k]] = aggIndex; + procWinner[aggList[k]] = myRank; } - - iNode1++; - } // end while - - for(size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { - aggstatHost(idx) = aggStat[idx]; + numNonAggregatedNodes -= aggList.size(); } - Kokkos::deep_copy(aggstat, aggstatHost); - // update aggregate object - aggregates.SetNumAggregates(nLocalAggregates); + iNode1++; + } // end while + + for (size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { + aggstatHost(idx) = aggStat[idx]; } + Kokkos::deep_copy(aggstat, aggstatHost); + + // update aggregate object + aggregates.SetNumAggregates(nLocalAggregates); +} -} // end namespace +} // namespace MueLu -#endif // MUELU_ONEPTAGGREGATIONALGORITHM_KOKKOS_DEF_HPP +#endif // MUELU_ONEPTAGGREGATIONALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_decl.hpp index 8d1af9929a88..b53e449df3b0 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_decl.hpp @@ -55,67 +55,63 @@ #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! - @class PreserveDirichletAggregationAlgorithm class. - @brief Builds one-to-one aggregates for all Dirichlet boundary nodes. For some applications this might - be necessary. (default = off) - - @ingroup Aggregation - - ### Idea ### - Handles Dirichlet boundary nodes with the state Boundary. - Depending on the boolean parameter "aggregation: preserve Dirichlet points" one-to-one aggregates - with singleton nodes are built for all Dirichlet boundary nodes or the aggregates are just - ignored (default behavior). The state of all boundary nodes (state = Boundary) - is set to ignored. That means, that these nodes are not considered for further - aggregation in the later aggregation phases. - - ### Parameters ### - Parameter | Meaning - ----------|-------- - aggregation: preserve Dirichlet points | Boolean parameter stating whether Dirichlet boundary nodes shall be aggregated in singleton aggregates (default: false). - - ### Comments ### - Only nodes with state BOUNDARY are changed to IGNORED. No other nodes are touched. - */ - - template - class PreserveDirichletAggregationAlgorithm : - public MueLu::AggregationAlgorithmBase { +/*! + @class PreserveDirichletAggregationAlgorithm class. + @brief Builds one-to-one aggregates for all Dirichlet boundary nodes. For some applications this might + be necessary. (default = off) + + @ingroup Aggregation + + ### Idea ### + Handles Dirichlet boundary nodes with the state Boundary. + Depending on the boolean parameter "aggregation: preserve Dirichlet points" one-to-one aggregates + with singleton nodes are built for all Dirichlet boundary nodes or the aggregates are just + ignored (default behavior). The state of all boundary nodes (state = Boundary) + is set to ignored. That means, that these nodes are not considered for further + aggregation in the later aggregation phases. + + ### Parameters ### + Parameter | Meaning + ----------|-------- + aggregation: preserve Dirichlet points | Boolean parameter stating whether Dirichlet boundary nodes shall be aggregated in singleton aggregates (default: false). + + ### Comments ### + Only nodes with state BOUNDARY are changed to IGNORED. No other nodes are touched. +*/ + +template +class PreserveDirichletAggregationAlgorithm : public MueLu::AggregationAlgorithmBase { #undef MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - PreserveDirichletAggregationAlgorithm(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + PreserveDirichletAggregationAlgorithm(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~PreserveDirichletAggregationAlgorithm() { } + //! Destructor. + virtual ~PreserveDirichletAggregationAlgorithm() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const Teuchos::ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; + //@} - void BuildAggregates(const Teuchos::ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase - (Dirichlet)"; } - std::string description() const { return "Phase - (Dirichlet)"; } +}; // class PreserveDirichletAggregationAlgorithm - }; //class PreserveDirichletAggregationAlgorithm - -} //namespace MueLu +} // namespace MueLu #define MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_SHORT - - #endif /* MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_def.hpp index 5dbd6d0dbf7f..9362342deaba 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_def.hpp @@ -60,37 +60,37 @@ namespace MueLu { - template - void PreserveDirichletAggregationAlgorithm::BuildAggregates(Teuchos::ParameterList const & params, GraphBase const & graph, Aggregates & aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); +template +void PreserveDirichletAggregationAlgorithm::BuildAggregates(Teuchos::ParameterList const& params, GraphBase const& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); - bool preserve = params.get("aggregation: preserve Dirichlet points"); + bool preserve = params.get("aggregation: preserve Dirichlet points"); - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); - ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); + ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); - LO numLocalAggregates = aggregates.GetNumAggregates(); + LO numLocalAggregates = aggregates.GetNumAggregates(); - for (LO i = 0; i < numRows; i++) - if (aggStat[i] == BOUNDARY) { - aggStat[i] = IGNORED; - numNonAggregatedNodes--; + for (LO i = 0; i < numRows; i++) + if (aggStat[i] == BOUNDARY) { + aggStat[i] = IGNORED; + numNonAggregatedNodes--; - if (preserve) { - aggregates.SetIsRoot(i); + if (preserve) { + aggregates.SetIsRoot(i); - vertex2AggId[i] = numLocalAggregates++; - procWinner [i] = myRank; - } + vertex2AggId[i] = numLocalAggregates++; + procWinner[i] = myRank; } + } - // update aggregate object - aggregates.SetNumAggregates(numLocalAggregates); - } + // update aggregate object + aggregates.SetNumAggregates(numLocalAggregates); +} -} // end namespace +} // namespace MueLu #endif /* MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_decl.hpp index 568889a49cb2..1b5b5697c09c 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_decl.hpp @@ -58,72 +58,70 @@ #include "MueLu_LWGraph_kokkos.hpp" namespace MueLu { - /*! - @class PreserveDirichletAggregationAlgorithm class. - @brief Builds one-to-one aggregates for all Dirichlet boundary nodes. For some applications this might - be necessary. (default = off) - - @ingroup Aggregation - - ### Idea ### - Handles Dirichlet boundary nodes with the state Boundary. - Depending on the boolean parameter "aggregation: preserve Dirichlet points" one-to-one aggregates - with singleton nodes are built for all Dirichlet boundary nodes or the aggregates are just - ignored (default behavior). The state of all boundary nodes (state = Boundary) - is set to ignored. That means, that these nodes are not considered for further - aggregation in the later aggregation phases. - - ### Parameters ### - Parameter | Meaning - ----------|-------- - aggregation: preserve Dirichlet points | Boolean parameter stating whether Dirichlet boundary nodes shall be aggregated in singleton aggregates (default: false). - - ### Comments ### - Only nodes with state BOUNDARY are changed to IGNORED. No other nodes are touched. - */ - - template - class PreserveDirichletAggregationAlgorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +/*! + @class PreserveDirichletAggregationAlgorithm class. + @brief Builds one-to-one aggregates for all Dirichlet boundary nodes. For some applications this might + be necessary. (default = off) + + @ingroup Aggregation + + ### Idea ### + Handles Dirichlet boundary nodes with the state Boundary. + Depending on the boolean parameter "aggregation: preserve Dirichlet points" one-to-one aggregates + with singleton nodes are built for all Dirichlet boundary nodes or the aggregates are just + ignored (default behavior). The state of all boundary nodes (state = Boundary) + is set to ignored. That means, that these nodes are not considered for further + aggregation in the later aggregation phases. + + ### Parameters ### + Parameter | Meaning + ----------|-------- + aggregation: preserve Dirichlet points | Boolean parameter stating whether Dirichlet boundary nodes shall be aggregated in singleton aggregates (default: false). + + ### Comments ### + Only nodes with state BOUNDARY are changed to IGNORED. No other nodes are touched. +*/ + +template +class PreserveDirichletAggregationAlgorithm_kokkos : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using execution_space = typename LWGraph_kokkos::execution_space; - using memory_space = typename LWGraph_kokkos::memory_space; + public: + using device_type = typename LWGraph_kokkos::device_type; + using execution_space = typename LWGraph_kokkos::execution_space; + using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ + //! @name Constructors/Destructors. + //@{ - //! Constructor. - PreserveDirichletAggregationAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + PreserveDirichletAggregationAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~PreserveDirichletAggregationAlgorithm_kokkos() { } + //! Destructor. + virtual ~PreserveDirichletAggregationAlgorithm_kokkos() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const Teuchos::ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; + //@} - void BuildAggregates(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase - (Dirichlet)"; } - std::string description() const { return "Phase - (Dirichlet)"; } +}; // class PreserveDirichletAggregationAlgorithm - }; //class PreserveDirichletAggregationAlgorithm - -} //namespace MueLu +} // namespace MueLu #define MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_KOKKOS_SHORT -#endif // MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_KOKKOS_DECL_HPP +#endif // MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_def.hpp index 498640df9c43..b28171a695a3 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_def.hpp @@ -60,61 +60,61 @@ namespace MueLu { - template - void PreserveDirichletAggregationAlgorithm_kokkos:: - BuildAggregates(Teuchos::ParameterList const & params, - LWGraph_kokkos const & graph, - Aggregates & aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - using local_ordinal_type = typename LWGraph_kokkos::local_ordinal_type; +template +void PreserveDirichletAggregationAlgorithm_kokkos:: + BuildAggregates(Teuchos::ParameterList const& params, + LWGraph_kokkos const& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + using local_ordinal_type = typename LWGraph_kokkos::local_ordinal_type; - // Extract parameters and data from: - // 1) the parameter list - const bool preserve = params.get("aggregation: preserve Dirichlet points"); + // Extract parameters and data from: + // 1) the parameter list + const bool preserve = params.get("aggregation: preserve Dirichlet points"); - // 2) the amalgamated graph - const LO numNodes = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); + // 2) the amalgamated graph + const LO numNodes = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); - // 3) the aggregates - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); + // 3) the aggregates + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); - // A view is needed to count on the fly the current number of local aggregates - Kokkos::View aggCount("aggCount"); - if(preserve) { - Kokkos::deep_copy(aggCount, aggregates.GetNumAggregates()); - } - Kokkos::parallel_for("MueLu - PreserveDirichlet: tagging ignored nodes", - Kokkos::RangePolicy(0, numNodes), - KOKKOS_LAMBDA(const local_ordinal_type nodeIdx) { - if (aggStat(nodeIdx) == BOUNDARY) { - aggStat(nodeIdx) = IGNORED; - const LO aggIdx = Kokkos::atomic_fetch_add(&aggCount(), 1); + // A view is needed to count on the fly the current number of local aggregates + Kokkos::View aggCount("aggCount"); + if (preserve) { + Kokkos::deep_copy(aggCount, aggregates.GetNumAggregates()); + } + Kokkos::parallel_for( + "MueLu - PreserveDirichlet: tagging ignored nodes", + Kokkos::RangePolicy(0, numNodes), + KOKKOS_LAMBDA(const local_ordinal_type nodeIdx) { + if (aggStat(nodeIdx) == BOUNDARY) { + aggStat(nodeIdx) = IGNORED; + const LO aggIdx = Kokkos::atomic_fetch_add(&aggCount(), 1); - if (preserve) { - // aggregates.SetIsRoot(nodeIdx); + if (preserve) { + // aggregates.SetIsRoot(nodeIdx); - vertex2AggId(nodeIdx, 0) = aggIdx; - procWinner(nodeIdx, 0) = myRank; - } - } - }); - typename Kokkos::View::HostMirror aggCount_h - = Kokkos::create_mirror_view(aggCount); - Kokkos::deep_copy(aggCount_h, aggCount); - // In this phase the number of new aggregates is the same - // as the number of newly aggregated nodes. - numNonAggregatedNodes -= (aggCount_h() - aggregates.GetNumAggregates()); + vertex2AggId(nodeIdx, 0) = aggIdx; + procWinner(nodeIdx, 0) = myRank; + } + } + }); + typename Kokkos::View::HostMirror aggCount_h = Kokkos::create_mirror_view(aggCount); + Kokkos::deep_copy(aggCount_h, aggCount); + // In this phase the number of new aggregates is the same + // as the number of newly aggregated nodes. + numNonAggregatedNodes -= (aggCount_h() - aggregates.GetNumAggregates()); - // update aggregate object - if(preserve) { - aggregates.SetNumAggregates(aggCount_h()); - } + // update aggregate object + if (preserve) { + aggregates.SetNumAggregates(aggCount_h()); } +} -} // end namespace +} // namespace MueLu -#endif // MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_KOKKOS_DEF_HPP +#endif // MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_decl.hpp index 21dd4ab2a9bb..daee942895a6 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_decl.hpp @@ -46,7 +46,6 @@ #ifndef MUELU_UNCOUPLEDAGGREGATIONFACTORY_DECL_HPP_ #define MUELU_UNCOUPLEDAGGREGATIONFACTORY_DECL_HPP_ - #include #include @@ -137,14 +136,14 @@ namespace MueLu { | Aggregates | UncoupledAggregationFactory | Container class with aggregation information. See also Aggregates. */ -template +template class UncoupledAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_UNCOUPLEDAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" -public: + public: //! @name Constructors/Destructors. //@{ @@ -152,7 +151,7 @@ class UncoupledAggregationFactory : public SingleLevelFactoryBase { UncoupledAggregationFactory(); //! Destructor. - virtual ~UncoupledAggregationFactory() { } + virtual ~UncoupledAggregationFactory() {} RCP GetValidParameterList() const; @@ -169,16 +168,16 @@ class UncoupledAggregationFactory : public SingleLevelFactoryBase { } // deprecated void SetMaxNeighAlreadySelected(int maxNeighAlreadySelected) { - SetParameter("aggregation: max selected neighbors", ParameterEntry(Teuchos::as(maxNeighAlreadySelected))); // revalidate + SetParameter("aggregation: max selected neighbors", ParameterEntry(Teuchos::as(maxNeighAlreadySelected))); // revalidate } // deprecated void SetMinNodesPerAggregate(int minNodesPerAggregate) { - SetParameter("aggregation: min agg size", ParameterEntry(Teuchos::as(minNodesPerAggregate))); // revalidate + SetParameter("aggregation: min agg size", ParameterEntry(Teuchos::as(minNodesPerAggregate))); // revalidate } // set information about 1-node aggregates (map name and generating factory) void SetOnePtMapName(const std::string name, Teuchos::RCP mapFact) { - SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate - SetFactory("OnePt aggregate map factory",mapFact); + SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate + SetFactory("OnePt aggregate map factory", mapFact); } // deprecated @@ -202,7 +201,7 @@ class UncoupledAggregationFactory : public SingleLevelFactoryBase { //! Input //@{ - void DeclareInput(Level ¤tLevel) const; + void DeclareInput(Level& currentLevel) const; //@} @@ -210,7 +209,7 @@ class UncoupledAggregationFactory : public SingleLevelFactoryBase { //@{ /*! @brief Build aggregates. */ - void Build(Level ¤tLevel) const; + void Build(Level& currentLevel) const; //@} @@ -218,14 +217,13 @@ class UncoupledAggregationFactory : public SingleLevelFactoryBase { //@{ /*! @brief Append a new aggregation algorithm to list of aggregation algorithms */ - //void Append(const RCP > & alg); + // void Append(const RCP > & alg); /*! @brief Remove all aggregation algorithms from list */ - //void ClearAggregationAlgorithms() { algos_.clear(); } + // void ClearAggregationAlgorithms() { algos_.clear(); } //@} -private: - + private: //! aggregation algorithms // will be filled in Build routine mutable std::vector > > algos_; @@ -235,9 +233,9 @@ class UncoupledAggregationFactory : public SingleLevelFactoryBase { //! if false, no change in aggregation algorithms is possible any more mutable bool bDefinitionPhase_; -}; // class UncoupledAggregationFactory +}; // class UncoupledAggregationFactory -} +} // namespace MueLu #define MUELU_UNCOUPLEDAGGREGATIONFACTORY_SHORT #endif /* MUELU_UNCOUPLEDAGGREGATIONFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp index d93dc6dbcdb9..9c73ad85fe25 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp @@ -72,239 +72,233 @@ namespace MueLu { - template - UncoupledAggregationFactory::UncoupledAggregationFactory() - : bDefinitionPhase_(true) - { } +template +UncoupledAggregationFactory::UncoupledAggregationFactory() + : bDefinitionPhase_(true) {} - template - RCP UncoupledAggregationFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP UncoupledAggregationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - // Aggregation parameters (used in aggregation algorithms) - // TODO introduce local member function for each aggregation algorithm such that each aggregation algorithm can define its own parameters + // Aggregation parameters (used in aggregation algorithms) + // TODO introduce local member function for each aggregation algorithm such that each aggregation algorithm can define its own parameters - typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; + typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: max agg size"); - SET_VALID_ENTRY("aggregation: min agg size"); - SET_VALID_ENTRY("aggregation: max selected neighbors"); - SET_VALID_ENTRY("aggregation: ordering"); - validParamList->getEntry("aggregation: ordering").setValidator( - rcp(new validatorType(Teuchos::tuple("natural", "graph", "random"), "aggregation: ordering"))); - SET_VALID_ENTRY("aggregation: enable phase 1"); - SET_VALID_ENTRY("aggregation: enable phase 2a"); - SET_VALID_ENTRY("aggregation: enable phase 2b"); - SET_VALID_ENTRY("aggregation: enable phase 3"); - SET_VALID_ENTRY("aggregation: match ML phase1"); - SET_VALID_ENTRY("aggregation: match ML phase2a"); - SET_VALID_ENTRY("aggregation: match ML phase2b"); - SET_VALID_ENTRY("aggregation: phase2a agg factor"); - SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); - SET_VALID_ENTRY("aggregation: allow user-specified singletons"); - SET_VALID_ENTRY("aggregation: use interface aggregation"); - SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); - SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); - SET_VALID_ENTRY("aggregation: compute aggregate qualities"); - SET_VALID_ENTRY("aggregation: phase 1 algorithm"); -#undef SET_VALID_ENTRY - - // general variables needed in AggregationFactory - validParamList->set< RCP >("Graph", null, "Generating factory of the graph"); - validParamList->set< RCP >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); - validParamList->set< RCP >("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'"); - - // special variables necessary for OnePtAggregationAlgorithm - validParamList->set< std::string > ("OnePt aggregate map name", "", "Name of input map for single node aggregates. (default='')"); - validParamList->set< std::string > ("OnePt aggregate map factory", "", "Generating factory of (DOF) map for single node aggregates."); - //validParamList->set< RCP >("OnePt aggregate map factory", NoFactory::getRCP(), "Generating factory of (DOF) map for single node aggregates."); - - // InterfaceAggregation parameters - //validParamList->set< bool > ("aggregation: use interface aggregation", "false", "Flag to trigger aggregation along an interface using specified aggregate seeds."); - validParamList->set< std::string > ("Interface aggregate map name", "", "Name of input map for interface aggregates. (default='')"); - validParamList->set< std::string > ("Interface aggregate map factory", "", "Generating factory of (DOF) map for interface aggregates."); - validParamList->set > ("nodeOnInterface", Teuchos::null, "Array specifying whether or not a node is on the interface (1 or 0)."); - - return validParamList; - } - - template - void UncoupledAggregationFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "Graph"); - Input(currentLevel, "DofsPerNode"); - - const ParameterList& pL = GetParameterList(); - - // request special data necessary for OnePtAggregationAlgorithm - std::string mapOnePtName = pL.get("OnePt aggregate map name"); - if (mapOnePtName.length() > 0) { - std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); - if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { - currentLevel.DeclareInput(mapOnePtName, NoFactory::get()); - } else { - RCP mapOnePtFact = GetFactory(mapOnePtFactName); - currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get()); - } + SET_VALID_ENTRY("aggregation: max agg size"); + SET_VALID_ENTRY("aggregation: min agg size"); + SET_VALID_ENTRY("aggregation: max selected neighbors"); + SET_VALID_ENTRY("aggregation: ordering"); + validParamList->getEntry("aggregation: ordering").setValidator(rcp(new validatorType(Teuchos::tuple("natural", "graph", "random"), "aggregation: ordering"))); + SET_VALID_ENTRY("aggregation: enable phase 1"); + SET_VALID_ENTRY("aggregation: enable phase 2a"); + SET_VALID_ENTRY("aggregation: enable phase 2b"); + SET_VALID_ENTRY("aggregation: enable phase 3"); + SET_VALID_ENTRY("aggregation: match ML phase1"); + SET_VALID_ENTRY("aggregation: match ML phase2a"); + SET_VALID_ENTRY("aggregation: match ML phase2b"); + SET_VALID_ENTRY("aggregation: phase2a agg factor"); + SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); + SET_VALID_ENTRY("aggregation: allow user-specified singletons"); + SET_VALID_ENTRY("aggregation: use interface aggregation"); + SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); + SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); + SET_VALID_ENTRY("aggregation: compute aggregate qualities"); + SET_VALID_ENTRY("aggregation: phase 1 algorithm"); +#undef SET_VALID_ENTRY + + // general variables needed in AggregationFactory + validParamList->set>("Graph", null, "Generating factory of the graph"); + validParamList->set>("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); + validParamList->set>("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'"); + + // special variables necessary for OnePtAggregationAlgorithm + validParamList->set("OnePt aggregate map name", "", "Name of input map for single node aggregates. (default='')"); + validParamList->set("OnePt aggregate map factory", "", "Generating factory of (DOF) map for single node aggregates."); + // validParamList->set< RCP >("OnePt aggregate map factory", NoFactory::getRCP(), "Generating factory of (DOF) map for single node aggregates."); + + // InterfaceAggregation parameters + // validParamList->set< bool > ("aggregation: use interface aggregation", "false", "Flag to trigger aggregation along an interface using specified aggregate seeds."); + validParamList->set("Interface aggregate map name", "", "Name of input map for interface aggregates. (default='')"); + validParamList->set("Interface aggregate map factory", "", "Generating factory of (DOF) map for interface aggregates."); + validParamList->set>("nodeOnInterface", Teuchos::null, "Array specifying whether or not a node is on the interface (1 or 0)."); + + return validParamList; +} + +template +void UncoupledAggregationFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "Graph"); + Input(currentLevel, "DofsPerNode"); + + const ParameterList& pL = GetParameterList(); + + // request special data necessary for OnePtAggregationAlgorithm + std::string mapOnePtName = pL.get("OnePt aggregate map name"); + if (mapOnePtName.length() > 0) { + std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); + if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { + currentLevel.DeclareInput(mapOnePtName, NoFactory::get()); + } else { + RCP mapOnePtFact = GetFactory(mapOnePtFactName); + currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get()); } + } - // request special data necessary for InterfaceAggregation - if (pL.get("aggregation: use interface aggregation") == true){ - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("nodeOnInterface", NoFactory::get())) { - currentLevel.DeclareInput("nodeOnInterface", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("nodeOnInterface", NoFactory::get()), - Exceptions::RuntimeError, - "nodeOnInterface was not provided by the user on level0!"); - } + // request special data necessary for InterfaceAggregation + if (pL.get("aggregation: use interface aggregation") == true) { + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("nodeOnInterface", NoFactory::get())) { + currentLevel.DeclareInput("nodeOnInterface", NoFactory::get(), this); } else { - Input(currentLevel, "nodeOnInterface"); + TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("nodeOnInterface", NoFactory::get()), + Exceptions::RuntimeError, + "nodeOnInterface was not provided by the user on level0!"); } - } - - if (pL.get("aggregation: compute aggregate qualities")) { - Input(currentLevel, "AggregateQualities"); + } else { + Input(currentLevel, "nodeOnInterface"); } } - template - void UncoupledAggregationFactory::Build(Level ¤tLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - - ParameterList pL = GetParameterList(); - bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed - - if (pL.get("aggregation: max agg size") == -1) - pL.set("aggregation: max agg size", INT_MAX); - - // define aggregation algorithms - RCP graphFact = GetFactory("Graph"); - - // TODO Can we keep different aggregation algorithms over more Build calls? - algos_.clear(); - algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm(graphFact))); - if (pL.get("aggregation: use interface aggregation") == true) algos_.push_back(rcp(new InterfaceAggregationAlgorithm (graphFact))); - if (pL.get("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm (graphFact))); - if (pL.get("aggregation: enable phase 1" ) == true) algos_.push_back(rcp(new AggregationPhase1Algorithm (graphFact))); - if (pL.get("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm (graphFact))); - if (pL.get("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm (graphFact))); - if (pL.get("aggregation: enable phase 3" ) == true) algos_.push_back(rcp(new AggregationPhase3Algorithm (graphFact))); - - // TODO: remove old aggregation mode - //if (pL.get("UseOnePtAggregationAlgorithm") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm (graphFact))); - //if (pL.get("UseUncoupledAggregationAlgorithm") == true) algos_.push_back(rcp(new AggregationPhase1Algorithm (graphFact))); - //if (pL.get("UseMaxLinkAggregationAlgorithm") == true) algos_.push_back(rcp(new MaxLinkAggregationAlgorithm (graphFact))); - //if (pL.get("UseEmergencyAggregationAlgorithm") == true) algos_.push_back(rcp(new EmergencyAggregationAlgorithm (graphFact))); - - std::string mapOnePtName = pL.get("OnePt aggregate map name"); - RCP OnePtMap = Teuchos::null; - if (mapOnePtName.length()) { - std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); - if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { - OnePtMap = currentLevel.Get >(mapOnePtName, NoFactory::get()); - } else { - RCP mapOnePtFact = GetFactory(mapOnePtFactName); - OnePtMap = currentLevel.Get >(mapOnePtName, mapOnePtFact.get()); - } + if (pL.get("aggregation: compute aggregate qualities")) { + Input(currentLevel, "AggregateQualities"); + } +} + +template +void UncoupledAggregationFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + ParameterList pL = GetParameterList(); + bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed + + if (pL.get("aggregation: max agg size") == -1) + pL.set("aggregation: max agg size", INT_MAX); + + // define aggregation algorithms + RCP graphFact = GetFactory("Graph"); + + // TODO Can we keep different aggregation algorithms over more Build calls? + algos_.clear(); + algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm(graphFact))); + if (pL.get("aggregation: use interface aggregation") == true) algos_.push_back(rcp(new InterfaceAggregationAlgorithm(graphFact))); + if (pL.get("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm(graphFact))); + if (pL.get("aggregation: enable phase 1") == true) algos_.push_back(rcp(new AggregationPhase1Algorithm(graphFact))); + if (pL.get("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm(graphFact))); + if (pL.get("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm(graphFact))); + if (pL.get("aggregation: enable phase 3") == true) algos_.push_back(rcp(new AggregationPhase3Algorithm(graphFact))); + + // TODO: remove old aggregation mode + // if (pL.get("UseOnePtAggregationAlgorithm") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm (graphFact))); + // if (pL.get("UseUncoupledAggregationAlgorithm") == true) algos_.push_back(rcp(new AggregationPhase1Algorithm (graphFact))); + // if (pL.get("UseMaxLinkAggregationAlgorithm") == true) algos_.push_back(rcp(new MaxLinkAggregationAlgorithm (graphFact))); + // if (pL.get("UseEmergencyAggregationAlgorithm") == true) algos_.push_back(rcp(new EmergencyAggregationAlgorithm (graphFact))); + + std::string mapOnePtName = pL.get("OnePt aggregate map name"); + RCP OnePtMap = Teuchos::null; + if (mapOnePtName.length()) { + std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); + if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { + OnePtMap = currentLevel.Get>(mapOnePtName, NoFactory::get()); + } else { + RCP mapOnePtFact = GetFactory(mapOnePtFactName); + OnePtMap = currentLevel.Get>(mapOnePtName, mapOnePtFact.get()); } + } - // Set map for interface aggregates - std::string mapInterfaceName = pL.get("Interface aggregate map name"); - RCP InterfaceMap = Teuchos::null; + // Set map for interface aggregates + std::string mapInterfaceName = pL.get("Interface aggregate map name"); + RCP InterfaceMap = Teuchos::null; - RCP graph = Get< RCP >(currentLevel, "Graph"); + RCP graph = Get>(currentLevel, "Graph"); - // Build - RCP aggregates = rcp(new Aggregates(*graph)); - aggregates->setObjectLabel("UC"); + // Build + RCP aggregates = rcp(new Aggregates(*graph)); + aggregates->setObjectLabel("UC"); - const LO numRows = graph->GetNodeNumVertices(); + const LO numRows = graph->GetNodeNumVertices(); - // construct aggStat information - std::vector aggStat(numRows, READY); + // construct aggStat information + std::vector aggStat(numRows, READY); - // interface - if (pL.get("aggregation: use interface aggregation") == true){ - Teuchos::Array nodeOnInterface = Get>(currentLevel,"nodeOnInterface"); - for (LO i = 0; i < numRows; i++) { - if (nodeOnInterface[i]) - aggStat[i] = INTERFACE; - } + // interface + if (pL.get("aggregation: use interface aggregation") == true) { + Teuchos::Array nodeOnInterface = Get>(currentLevel, "nodeOnInterface"); + for (LO i = 0; i < numRows; i++) { + if (nodeOnInterface[i]) + aggStat[i] = INTERFACE; } + } - ArrayRCP dirichletBoundaryMap = graph->GetBoundaryNodeMap(); - if (dirichletBoundaryMap != Teuchos::null) - for (LO i = 0; i < numRows; i++) - if (dirichletBoundaryMap[i] == true) - aggStat[i] = BOUNDARY; - - LO nDofsPerNode = Get(currentLevel, "DofsPerNode"); - GO indexBase = graph->GetDomainMap()->getIndexBase(); - if (OnePtMap != Teuchos::null) { - for (LO i = 0; i < numRows; i++) { - // reconstruct global row id (FIXME only works for contiguous maps) - GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase; - - for (LO kr = 0; kr < nDofsPerNode; kr++) - if (OnePtMap->isNodeGlobalElement(grid + kr)) - aggStat[i] = ONEPT; - } + ArrayRCP dirichletBoundaryMap = graph->GetBoundaryNodeMap(); + if (dirichletBoundaryMap != Teuchos::null) + for (LO i = 0; i < numRows; i++) + if (dirichletBoundaryMap[i] == true) + aggStat[i] = BOUNDARY; + + LO nDofsPerNode = Get(currentLevel, "DofsPerNode"); + GO indexBase = graph->GetDomainMap()->getIndexBase(); + if (OnePtMap != Teuchos::null) { + for (LO i = 0; i < numRows; i++) { + // reconstruct global row id (FIXME only works for contiguous maps) + GO grid = (graph->GetDomainMap()->getGlobalElement(i) - indexBase) * nDofsPerNode + indexBase; + + for (LO kr = 0; kr < nDofsPerNode; kr++) + if (OnePtMap->isNodeGlobalElement(grid + kr)) + aggStat[i] = ONEPT; } + } - - - const RCP > comm = graph->GetComm(); - GO numGlobalRows = 0; - if (IsPrint(Statistics1)) - MueLu_sumAll(comm, as(numRows), numGlobalRows); - - LO numNonAggregatedNodes = numRows; - GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0; - for (size_t a = 0; a < algos_.size(); a++) { - std::string phase = algos_[a]->description(); - SubFactoryMonitor sfm(*this, "Algo " + phase, currentLevel); - - int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose()); - algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); - algos_[a]->SetProcRankVerbose(oldRank); - - if (IsPrint(Statistics1)) { - GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0; - GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0; - MueLu_sumAll(comm, numLocalAggregated, numGlobalAggregated); - MueLu_sumAll(comm, numLocalAggs, numGlobalAggs); - - double aggPercent = 100*as(numGlobalAggregated)/as(numGlobalRows); - if (aggPercent > 99.99 && aggPercent < 100.00) { - // Due to round off (for instance, for 140465733/140466897), we could - // get 100.00% display even if there are some remaining nodes. This - // is bad from the users point of view. It is much better to change - // it to display 99.99%. - aggPercent = 99.99; - } - GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed - << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n" - << " remaining : " << numGlobalRows - numGlobalAggregated << "\n" - << " aggregates : " << numGlobalAggs-numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl; - numGlobalAggregatedPrev = numGlobalAggregated; - numGlobalAggsPrev = numGlobalAggs; + const RCP> comm = graph->GetComm(); + GO numGlobalRows = 0; + if (IsPrint(Statistics1)) + MueLu_sumAll(comm, as(numRows), numGlobalRows); + + LO numNonAggregatedNodes = numRows; + GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0; + for (size_t a = 0; a < algos_.size(); a++) { + std::string phase = algos_[a]->description(); + SubFactoryMonitor sfm(*this, "Algo " + phase, currentLevel); + + int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose()); + algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); + algos_[a]->SetProcRankVerbose(oldRank); + + if (IsPrint(Statistics1)) { + GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0; + GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0; + MueLu_sumAll(comm, numLocalAggregated, numGlobalAggregated); + MueLu_sumAll(comm, numLocalAggs, numGlobalAggs); + + double aggPercent = 100 * as(numGlobalAggregated) / as(numGlobalRows); + if (aggPercent > 99.99 && aggPercent < 100.00) { + // Due to round off (for instance, for 140465733/140466897), we could + // get 100.00% display even if there are some remaining nodes. This + // is bad from the users point of view. It is much better to change + // it to display 99.99%. + aggPercent = 99.99; } + GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed + << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n" + << " remaining : " << numGlobalRows - numGlobalAggregated << "\n" + << " aggregates : " << numGlobalAggs - numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl; + numGlobalAggregatedPrev = numGlobalAggregated; + numGlobalAggsPrev = numGlobalAggs; } + } - TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!"); + TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!"); - aggregates->AggregatesCrossProcessors(false); - aggregates->ComputeAggregateSizes(true/*forceRecompute*/); + aggregates->AggregatesCrossProcessors(false); + aggregates->ComputeAggregateSizes(true /*forceRecompute*/); - Set(currentLevel, "Aggregates", aggregates); - - if (pL.get("aggregation: compute aggregate qualities")) { - RCP> aggQualities = Get>>(currentLevel, "AggregateQualities"); - } + Set(currentLevel, "Aggregates", aggregates); + if (pL.get("aggregation: compute aggregate qualities")) { + RCP> aggQualities = Get>>(currentLevel, "AggregateQualities"); } +} -} //namespace MueLu - +} // namespace MueLu #endif /* MUELU_UNCOUPLEDAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_decl.hpp index 5bf4100d95bb..4c8e70426c4d 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_decl.hpp @@ -133,107 +133,106 @@ namespace MueLu { | Aggregates | UncoupledAggregationFactory | Container class with aggregation information. See also Aggregates. */ - template - class UncoupledAggregationFactory_kokkos : public SingleLevelFactoryBase { +template +class UncoupledAggregationFactory_kokkos : public SingleLevelFactoryBase { #undef MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - UncoupledAggregationFactory_kokkos(); + //! Constructor. + UncoupledAggregationFactory_kokkos(); - //! Destructor. - virtual ~UncoupledAggregationFactory_kokkos() { } + //! Destructor. + virtual ~UncoupledAggregationFactory_kokkos() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! @name Set/get methods. - //@{ + //! @name Set/get methods. + //@{ - // Options shared by all aggregation algorithms + // Options shared by all aggregation algorithms - // deprecated - void SetOrdering(const std::string& ordering) { - SetParameter("aggregation: ordering", ParameterEntry(ordering)); - } - // deprecated - void SetMaxNeighAlreadySelected(int maxNeighAlreadySelected) { - SetParameter("aggregation: max selected neighbors", ParameterEntry(Teuchos::as(maxNeighAlreadySelected))); // revalidate - } - // deprecated - void SetMinNodesPerAggregate(int minNodesPerAggregate) { - SetParameter("aggregation: min agg size", ParameterEntry(Teuchos::as(minNodesPerAggregate))); // revalidate - } - // set information about 1-node aggregates (map name and generating factory) - void SetOnePtMapName(const std::string name, Teuchos::RCP mapFact) { - SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate - SetFactory("OnePt aggregate map factory",mapFact); - } + // deprecated + void SetOrdering(const std::string& ordering) { + SetParameter("aggregation: ordering", ParameterEntry(ordering)); + } + // deprecated + void SetMaxNeighAlreadySelected(int maxNeighAlreadySelected) { + SetParameter("aggregation: max selected neighbors", ParameterEntry(Teuchos::as(maxNeighAlreadySelected))); // revalidate + } + // deprecated + void SetMinNodesPerAggregate(int minNodesPerAggregate) { + SetParameter("aggregation: min agg size", ParameterEntry(Teuchos::as(minNodesPerAggregate))); // revalidate + } + // set information about 1-node aggregates (map name and generating factory) + void SetOnePtMapName(const std::string name, Teuchos::RCP mapFact) { + SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate + SetFactory("OnePt aggregate map factory", mapFact); + } - // deprecated - const std::string& GetOrdering() const { - const ParameterList& pL = GetParameterList(); - return pL.get("aggregation: ordering"); - } - // deprecated - int GetMaxNeighAlreadySelected() const { - const ParameterList& pL = GetParameterList(); - return Teuchos::as(pL.get("aggregation: max selected neighbors")); - } - // deprecated - int GetMinNodesPerAggregate() const { - const ParameterList& pL = GetParameterList(); - return Teuchos::as(pL.get("aggregation: min agg size")); - } + // deprecated + const std::string& GetOrdering() const { + const ParameterList& pL = GetParameterList(); + return pL.get("aggregation: ordering"); + } + // deprecated + int GetMaxNeighAlreadySelected() const { + const ParameterList& pL = GetParameterList(); + return Teuchos::as(pL.get("aggregation: max selected neighbors")); + } + // deprecated + int GetMinNodesPerAggregate() const { + const ParameterList& pL = GetParameterList(); + return Teuchos::as(pL.get("aggregation: min agg size")); + } - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level ¤tLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! @brief Build aggregates. */ - void Build(Level ¤tLevel) const; + /*! @brief Build aggregates. */ + void Build(Level& currentLevel) const; - //@} + //@} - //! @name Definition methods - //@{ + //! @name Definition methods + //@{ - /*! @brief Append a new aggregation algorithm to list of aggregation algorithms */ - //void Append(const RCP > & alg); + /*! @brief Append a new aggregation algorithm to list of aggregation algorithms */ + // void Append(const RCP > & alg); - /*! @brief Remove all aggregation algorithms from list */ - //void ClearAggregationAlgorithms() { algos_.clear(); } - //@} + /*! @brief Remove all aggregation algorithms from list */ + // void ClearAggregationAlgorithms() { algos_.clear(); } + //@} - private: + private: + //! aggregation algorithms + // will be filled in Build routine + mutable std::vector > > algos_; - //! aggregation algorithms - // will be filled in Build routine - mutable std::vector > > algos_; + //! boolean flag: definition phase + //! if true, the aggregation algorithms still can be set and changed. + //! if false, no change in aggregation algorithms is possible any more + mutable bool bDefinitionPhase_; - //! boolean flag: definition phase - //! if true, the aggregation algorithms still can be set and changed. - //! if false, no change in aggregation algorithms is possible any more - mutable bool bDefinitionPhase_; +}; // class UncoupledAggregationFactory_kokkos - }; // class UncoupledAggregationFactory_kokkos - -} +} // namespace MueLu #define MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_SHORT -#endif // MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DECL_HPP +#endif // MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_def.hpp index 0c0daed773ac..ba7f35169768 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_def.hpp @@ -75,338 +75,328 @@ namespace MueLu { - template - UncoupledAggregationFactory_kokkos::UncoupledAggregationFactory_kokkos() - : bDefinitionPhase_(true) - { } +template +UncoupledAggregationFactory_kokkos::UncoupledAggregationFactory_kokkos() + : bDefinitionPhase_(true) {} - template - RCP UncoupledAggregationFactory_kokkos::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP UncoupledAggregationFactory_kokkos::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - // Aggregation parameters (used in aggregation algorithms) - // TODO introduce local member function for each aggregation algorithm such that each aggregation algorithm can define its own parameters + // Aggregation parameters (used in aggregation algorithms) + // TODO introduce local member function for each aggregation algorithm such that each aggregation algorithm can define its own parameters - typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; + typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: max agg size"); - SET_VALID_ENTRY("aggregation: min agg size"); - SET_VALID_ENTRY("aggregation: max selected neighbors"); - SET_VALID_ENTRY("aggregation: ordering"); - validParamList->getEntry("aggregation: ordering").setValidator( - rcp(new validatorType(Teuchos::tuple("natural", "graph", "random"), "aggregation: ordering"))); - SET_VALID_ENTRY("aggregation: deterministic"); - SET_VALID_ENTRY("aggregation: coloring algorithm"); - SET_VALID_ENTRY("aggregation: enable phase 1"); - SET_VALID_ENTRY("aggregation: enable phase 2a"); - SET_VALID_ENTRY("aggregation: enable phase 2b"); - SET_VALID_ENTRY("aggregation: enable phase 3"); - SET_VALID_ENTRY("aggregation: match ML phase1"); - SET_VALID_ENTRY("aggregation: match ML phase2a"); - SET_VALID_ENTRY("aggregation: match ML phase2b"); - SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); - SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); - SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); - SET_VALID_ENTRY("aggregation: allow user-specified singletons"); - SET_VALID_ENTRY("aggregation: phase 1 algorithm"); -#undef SET_VALID_ENTRY - - // general variables needed in AggregationFactory - validParamList->set< RCP >("Graph", null, "Generating factory of the graph"); - validParamList->set< RCP >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); - - // special variables necessary for OnePtAggregationAlgorithm - validParamList->set< std::string > ("OnePt aggregate map name", "", "Name of input map for single node aggregates. (default='')"); - validParamList->set< std::string > ("OnePt aggregate map factory", "", "Generating factory of (DOF) map for single node aggregates."); - //validParamList->set< RCP >("OnePt aggregate map factory", NoFactory::getRCP(), "Generating factory of (DOF) map for single node aggregates."); - - return validParamList; - } - - template - void UncoupledAggregationFactory_kokkos::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "Graph"); - Input(currentLevel, "DofsPerNode"); - - const ParameterList& pL = GetParameterList(); - - // request special data necessary for OnePtAggregationAlgorithm - std::string mapOnePtName = pL.get("OnePt aggregate map name"); - if (mapOnePtName.length() > 0) { - std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); - if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { - currentLevel.DeclareInput(mapOnePtName, NoFactory::get()); - } else { - RCP mapOnePtFact = GetFactory(mapOnePtFactName); - currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get()); - } + SET_VALID_ENTRY("aggregation: max agg size"); + SET_VALID_ENTRY("aggregation: min agg size"); + SET_VALID_ENTRY("aggregation: max selected neighbors"); + SET_VALID_ENTRY("aggregation: ordering"); + validParamList->getEntry("aggregation: ordering").setValidator(rcp(new validatorType(Teuchos::tuple("natural", "graph", "random"), "aggregation: ordering"))); + SET_VALID_ENTRY("aggregation: deterministic"); + SET_VALID_ENTRY("aggregation: coloring algorithm"); + SET_VALID_ENTRY("aggregation: enable phase 1"); + SET_VALID_ENTRY("aggregation: enable phase 2a"); + SET_VALID_ENTRY("aggregation: enable phase 2b"); + SET_VALID_ENTRY("aggregation: enable phase 3"); + SET_VALID_ENTRY("aggregation: match ML phase1"); + SET_VALID_ENTRY("aggregation: match ML phase2a"); + SET_VALID_ENTRY("aggregation: match ML phase2b"); + SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); + SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); + SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); + SET_VALID_ENTRY("aggregation: allow user-specified singletons"); + SET_VALID_ENTRY("aggregation: phase 1 algorithm"); +#undef SET_VALID_ENTRY + + // general variables needed in AggregationFactory + validParamList->set >("Graph", null, "Generating factory of the graph"); + validParamList->set >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); + + // special variables necessary for OnePtAggregationAlgorithm + validParamList->set("OnePt aggregate map name", "", "Name of input map for single node aggregates. (default='')"); + validParamList->set("OnePt aggregate map factory", "", "Generating factory of (DOF) map for single node aggregates."); + // validParamList->set< RCP >("OnePt aggregate map factory", NoFactory::getRCP(), "Generating factory of (DOF) map for single node aggregates."); + + return validParamList; +} + +template +void UncoupledAggregationFactory_kokkos::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "Graph"); + Input(currentLevel, "DofsPerNode"); + + const ParameterList& pL = GetParameterList(); + + // request special data necessary for OnePtAggregationAlgorithm + std::string mapOnePtName = pL.get("OnePt aggregate map name"); + if (mapOnePtName.length() > 0) { + std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); + if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { + currentLevel.DeclareInput(mapOnePtName, NoFactory::get()); + } else { + RCP mapOnePtFact = GetFactory(mapOnePtFactName); + currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get()); } } - - template - void UncoupledAggregationFactory_kokkos:: - Build(Level ¤tLevel) const { - using execution_space = typename LWGraph_kokkos::execution_space; - using memory_space = typename LWGraph_kokkos::memory_space; - using local_ordinal_type = typename LWGraph_kokkos::local_ordinal_type; - FactoryMonitor m(*this, "Build", currentLevel); - - ParameterList pL = GetParameterList(); - bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed - - if (pL.get("aggregation: max agg size") == -1) - pL.set("aggregation: max agg size", INT_MAX); - - // define aggregation algorithms - RCP graphFact = GetFactory("Graph"); - - // TODO Can we keep different aggregation algorithms over more Build calls? - algos_.clear(); - algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm_kokkos(graphFact))); - if (pL.get("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm_kokkos (graphFact))); - if (pL.get("aggregation: enable phase 1" ) == true) algos_.push_back(rcp(new AggregationPhase1Algorithm_kokkos (graphFact))); - if (pL.get("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm_kokkos (graphFact))); - if (pL.get("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm_kokkos (graphFact))); - if (pL.get("aggregation: enable phase 3" ) == true) algos_.push_back(rcp(new AggregationPhase3Algorithm_kokkos (graphFact))); - - // Sanity Checking: match ML behavior is not supported in UncoupledAggregation_Kokkos in Phase 1 or Phase 2b, but is in 2a - TEUCHOS_TEST_FOR_EXCEPTION( pL.get("aggregation: match ML phase1"),std::invalid_argument,"Option: 'aggregation: match ML phase1' is not supported in the Kokkos version of uncoupled aggregation"); - TEUCHOS_TEST_FOR_EXCEPTION( pL.get("aggregation: match ML phase2b"),std::invalid_argument,"Option: 'aggregation: match ML phase2b' is not supported in the Kokkos version of uncoupled aggregation"); - - std::string mapOnePtName = pL.get("OnePt aggregate map name"); - RCP OnePtMap = Teuchos::null; - if (mapOnePtName.length()) { - std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); - if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { - OnePtMap = currentLevel.Get >(mapOnePtName, NoFactory::get()); - } else { - RCP mapOnePtFact = GetFactory(mapOnePtFactName); - OnePtMap = currentLevel.Get >(mapOnePtName, mapOnePtFact.get()); - } +} + +template +void UncoupledAggregationFactory_kokkos:: + Build(Level& currentLevel) const { + using execution_space = typename LWGraph_kokkos::execution_space; + using memory_space = typename LWGraph_kokkos::memory_space; + using local_ordinal_type = typename LWGraph_kokkos::local_ordinal_type; + FactoryMonitor m(*this, "Build", currentLevel); + + ParameterList pL = GetParameterList(); + bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed + + if (pL.get("aggregation: max agg size") == -1) + pL.set("aggregation: max agg size", INT_MAX); + + // define aggregation algorithms + RCP graphFact = GetFactory("Graph"); + + // TODO Can we keep different aggregation algorithms over more Build calls? + algos_.clear(); + algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm_kokkos(graphFact))); + if (pL.get("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm_kokkos(graphFact))); + if (pL.get("aggregation: enable phase 1") == true) algos_.push_back(rcp(new AggregationPhase1Algorithm_kokkos(graphFact))); + if (pL.get("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm_kokkos(graphFact))); + if (pL.get("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm_kokkos(graphFact))); + if (pL.get("aggregation: enable phase 3") == true) algos_.push_back(rcp(new AggregationPhase3Algorithm_kokkos(graphFact))); + + // Sanity Checking: match ML behavior is not supported in UncoupledAggregation_Kokkos in Phase 1 or Phase 2b, but is in 2a + TEUCHOS_TEST_FOR_EXCEPTION(pL.get("aggregation: match ML phase1"), std::invalid_argument, "Option: 'aggregation: match ML phase1' is not supported in the Kokkos version of uncoupled aggregation"); + TEUCHOS_TEST_FOR_EXCEPTION(pL.get("aggregation: match ML phase2b"), std::invalid_argument, "Option: 'aggregation: match ML phase2b' is not supported in the Kokkos version of uncoupled aggregation"); + + std::string mapOnePtName = pL.get("OnePt aggregate map name"); + RCP OnePtMap = Teuchos::null; + if (mapOnePtName.length()) { + std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); + if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { + OnePtMap = currentLevel.Get >(mapOnePtName, NoFactory::get()); + } else { + RCP mapOnePtFact = GetFactory(mapOnePtFactName); + OnePtMap = currentLevel.Get >(mapOnePtName, mapOnePtFact.get()); } + } - RCP graph = Get< RCP >(currentLevel, "Graph"); + RCP graph = Get >(currentLevel, "Graph"); + + // Build + RCP aggregates = rcp(new Aggregates(*graph)); + aggregates->setObjectLabel("UC"); + + const LO numRows = graph->GetNodeNumVertices(); + + // construct aggStat information + Kokkos::View aggStat(Kokkos::ViewAllocateWithoutInitializing("aggregation status"), + numRows); + Kokkos::deep_copy(aggStat, READY); + + // LBV on Sept 06 2019: re-commenting out the dirichlet boundary map + // even if the map is correctly extracted from the graph, aggStat is + // now a Kokkos::View and filling it will + // require a parallel_for or to copy it to the Host which is not really + // good from a performance point of view. + // If dirichletBoundaryMap was an actual Xpetra::Map, one could call + // getLocalMap to have a Kokkos::View on the appropriate memory_space + // instead of an ArrayRCP. + { + typename LWGraph_kokkos::boundary_nodes_type dirichletBoundaryMap = graph->getLocalLWGraph().GetBoundaryNodeMap(); + Kokkos::parallel_for( + "MueLu - UncoupledAggregation: tagging boundary nodes in aggStat", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const local_ordinal_type nodeIdx) { + if (dirichletBoundaryMap(nodeIdx) == true) { + aggStat(nodeIdx) = BOUNDARY; + } + }); + } - // Build - RCP aggregates = rcp(new Aggregates(*graph)); - aggregates->setObjectLabel("UC"); + LO nDofsPerNode = Get(currentLevel, "DofsPerNode"); + GO indexBase = graph->GetDomainMap()->getIndexBase(); - const LO numRows = graph->GetNodeNumVertices(); + /* FIXME: This chunk of code is still executing on the host */ + if (OnePtMap != Teuchos::null) { + typename Kokkos::View::HostMirror aggStatHost = Kokkos::create_mirror_view(aggStat); + Kokkos::deep_copy(aggStatHost, aggStat); - // construct aggStat information - Kokkos::View aggStat(Kokkos::ViewAllocateWithoutInitializing("aggregation status"), - numRows); - Kokkos::deep_copy(aggStat, READY); + for (LO i = 0; i < numRows; i++) { + // reconstruct global row id (FIXME only works for contiguous maps) + GO grid = (graph->GetDomainMap()->getGlobalElement(i) - indexBase) * nDofsPerNode + indexBase; - // LBV on Sept 06 2019: re-commenting out the dirichlet boundary map - // even if the map is correctly extracted from the graph, aggStat is - // now a Kokkos::View and filling it will - // require a parallel_for or to copy it to the Host which is not really - // good from a performance point of view. - // If dirichletBoundaryMap was an actual Xpetra::Map, one could call - // getLocalMap to have a Kokkos::View on the appropriate memory_space - // instead of an ArrayRCP. - { - typename LWGraph_kokkos::boundary_nodes_type dirichletBoundaryMap = graph->getLocalLWGraph().GetBoundaryNodeMap(); - Kokkos::parallel_for("MueLu - UncoupledAggregation: tagging boundary nodes in aggStat", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA(const local_ordinal_type nodeIdx) { - if (dirichletBoundaryMap(nodeIdx) == true) { - aggStat(nodeIdx) = BOUNDARY; - } - }); + for (LO kr = 0; kr < nDofsPerNode; kr++) + if (OnePtMap->isNodeGlobalElement(grid + kr)) + aggStatHost(i) = ONEPT; } - LO nDofsPerNode = Get(currentLevel, "DofsPerNode"); - GO indexBase = graph->GetDomainMap()->getIndexBase(); - - /* FIXME: This chunk of code is still executing on the host */ - if (OnePtMap != Teuchos::null) { - typename Kokkos::View::HostMirror aggStatHost - = Kokkos::create_mirror_view(aggStat); - Kokkos::deep_copy(aggStatHost, aggStat); - - for (LO i = 0; i < numRows; i++) { - // reconstruct global row id (FIXME only works for contiguous maps) - GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase; - - for (LO kr = 0; kr < nDofsPerNode; kr++) - if (OnePtMap->isNodeGlobalElement(grid + kr)) - aggStatHost(i) = ONEPT; - } + Kokkos::deep_copy(aggStat, aggStatHost); + } - Kokkos::deep_copy(aggStat, aggStatHost); + const RCP > comm = graph->GetComm(); + GO numGlobalRows = 0; + if (IsPrint(Statistics1)) + MueLu_sumAll(comm, as(numRows), numGlobalRows); + + LO numNonAggregatedNodes = numRows; + std::string aggAlgo = pL.get("aggregation: coloring algorithm"); + if (aggAlgo == "mis2 coarsening" || aggAlgo == "mis2 aggregation") { + SubFactoryMonitor sfm(*this, "Algo \"MIS2\"", currentLevel); + using graph_t = typename LWGraph_kokkos::local_graph_type; + using device_t = typename graph_t::device_type; + using exec_space = typename device_t::execution_space; + using rowmap_t = typename graph_t::row_map_type; + using colinds_t = typename graph_t::entries_type; + using lno_t = typename colinds_t::non_const_value_type; + rowmap_t aRowptrs = graph->getLocalLWGraph().getRowPtrs(); + colinds_t aColinds = graph->getLocalLWGraph().getEntries(); + lno_t numAggs = 0; + typename colinds_t::non_const_type labels; + + if (aggAlgo == "mis2 coarsening") { + if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: MIS-2 coarsening" << std::endl; + labels = KokkosGraph::graph_mis2_coarsen(aRowptrs, aColinds, numAggs); + } else if (aggAlgo == "mis2 aggregation") { + if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: MIS-2 aggregation" << std::endl; + labels = KokkosGraph::graph_mis2_aggregate(aRowptrs, aColinds, numAggs); } - - const RCP > comm = graph->GetComm(); - GO numGlobalRows = 0; - if (IsPrint(Statistics1)) - MueLu_sumAll(comm, as(numRows), numGlobalRows); - - LO numNonAggregatedNodes = numRows; - std::string aggAlgo = pL.get("aggregation: coloring algorithm"); - if(aggAlgo == "mis2 coarsening" || aggAlgo == "mis2 aggregation") - { - SubFactoryMonitor sfm(*this, "Algo \"MIS2\"", currentLevel); - using graph_t = typename LWGraph_kokkos::local_graph_type; - using device_t = typename graph_t::device_type; - using exec_space = typename device_t::execution_space; - using rowmap_t = typename graph_t::row_map_type; - using colinds_t = typename graph_t::entries_type; - using lno_t = typename colinds_t::non_const_value_type; - rowmap_t aRowptrs = graph->getLocalLWGraph().getRowPtrs(); - colinds_t aColinds = graph->getLocalLWGraph().getEntries(); - lno_t numAggs = 0; - typename colinds_t::non_const_type labels; - - if(aggAlgo == "mis2 coarsening") - { - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: MIS-2 coarsening" << std::endl; - labels = KokkosGraph::graph_mis2_coarsen(aRowptrs, aColinds, numAggs); - } - else if(aggAlgo == "mis2 aggregation") - { - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: MIS-2 aggregation" << std::endl; - labels = KokkosGraph::graph_mis2_aggregate(aRowptrs, aColinds, numAggs); - } - auto vertex2AggId = aggregates->GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates->GetProcWinner() ->getDeviceLocalView(Xpetra::Access::OverwriteAll); - int rank = comm->getRank(); - Kokkos::parallel_for(Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA(lno_t i) - { + auto vertex2AggId = aggregates->GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto procWinner = aggregates->GetProcWinner()->getDeviceLocalView(Xpetra::Access::OverwriteAll); + int rank = comm->getRank(); + Kokkos::parallel_for( + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(lno_t i) { procWinner(i, 0) = rank; - if(aggStat(i) == READY) - { - aggStat(i) = AGGREGATED; + if (aggStat(i) == READY) { + aggStat(i) = AGGREGATED; vertex2AggId(i, 0) = labels(i); } }); - numNonAggregatedNodes = 0; - aggregates->SetNumAggregates(numAggs); - } - else + numNonAggregatedNodes = 0; + aggregates->SetNumAggregates(numAggs); + } else { { - { - SubFactoryMonitor sfm(*this, "Algo \"Graph Coloring\"", currentLevel); - - // LBV on Sept 06 2019: the note below is a little worrisome, - // can we guarantee that MueLu is never used on a non-symmetric - // graph? - // note: just using colinds_view in place of scalar_view_t type - // (it won't be used at all by symbolic SPGEMM) - using graph_t = typename LWGraph_kokkos::local_graph_type; - using KernelHandle = KokkosKernels::Experimental:: + SubFactoryMonitor sfm(*this, "Algo \"Graph Coloring\"", currentLevel); + + // LBV on Sept 06 2019: the note below is a little worrisome, + // can we guarantee that MueLu is never used on a non-symmetric + // graph? + // note: just using colinds_view in place of scalar_view_t type + // (it won't be used at all by symbolic SPGEMM) + using graph_t = typename LWGraph_kokkos::local_graph_type; + using KernelHandle = KokkosKernels::Experimental:: KokkosKernelsHandle; - KernelHandle kh; - //leave gc algorithm choice as the default - kh.create_distance2_graph_coloring_handle(); - - // get the distance-2 graph coloring handle - auto coloringHandle = kh.get_distance2_graph_coloring_handle(); - - // Set the distance-2 graph coloring algorithm to use. - // Options: - // COLORING_D2_DEFAULT - Let the kernel handle pick the variation - // COLORING_D2_SERIAL - Use the legacy serial-only implementation - // COLORING_D2_VB - Use the parallel vertex based direct method - // COLORING_D2_VB_BIT - Same as VB but using the bitvector forbidden array - // COLORING_D2_VB_BIT_EF - Add experimental edge-filtering to VB_BIT - // COLORING_D2_NB_BIT - Net-based coloring (generally the fastest) - if(pL.get("aggregation: deterministic") == true) { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_SERIAL ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl; - } else if(aggAlgo == "serial") { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_SERIAL ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl; - } else if(aggAlgo == "default") { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_DEFAULT ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: default" << std::endl; - } else if(aggAlgo == "vertex based") { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based" << std::endl; - } else if(aggAlgo == "vertex based bit set") { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB_BIT ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based bit set" << std::endl; - } else if(aggAlgo == "edge filtering") { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB_BIT_EF ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: edge filtering" << std::endl; - } else if(aggAlgo == "net based bit set") { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_NB_BIT ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: net based bit set" << std::endl; - } else { - TEUCHOS_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Unrecognized distance 2 coloring algorithm, valid options are: serial, default, matrix squared, vertex based, vertex based bit set, edge filtering") - } - - //Create device views for graph rowptrs/colinds - typename graph_t::row_map_type aRowptrs = graph->getLocalLWGraph().getRowPtrs(); - typename graph_t::entries_type aColinds = graph->getLocalLWGraph().getEntries(); - - //run d2 graph coloring - //graph is symmetric so row map/entries and col map/entries are the same - { - SubFactoryMonitor sfm2(*this, "Algo \"Graph Coloring\": KokkosGraph Call", currentLevel);//CMS HACK - KokkosGraph::Experimental::graph_color_distance2(&kh, numRows, aRowptrs, aColinds); - } + KernelHandle kh; + // leave gc algorithm choice as the default + kh.create_distance2_graph_coloring_handle(); + + // get the distance-2 graph coloring handle + auto coloringHandle = kh.get_distance2_graph_coloring_handle(); + + // Set the distance-2 graph coloring algorithm to use. + // Options: + // COLORING_D2_DEFAULT - Let the kernel handle pick the variation + // COLORING_D2_SERIAL - Use the legacy serial-only implementation + // COLORING_D2_VB - Use the parallel vertex based direct method + // COLORING_D2_VB_BIT - Same as VB but using the bitvector forbidden array + // COLORING_D2_VB_BIT_EF - Add experimental edge-filtering to VB_BIT + // COLORING_D2_NB_BIT - Net-based coloring (generally the fastest) + if (pL.get("aggregation: deterministic") == true) { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_SERIAL); + if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl; + } else if (aggAlgo == "serial") { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_SERIAL); + if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl; + } else if (aggAlgo == "default") { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_DEFAULT); + if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: default" << std::endl; + } else if (aggAlgo == "vertex based") { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB); + if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based" << std::endl; + } else if (aggAlgo == "vertex based bit set") { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB_BIT); + if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based bit set" << std::endl; + } else if (aggAlgo == "edge filtering") { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB_BIT_EF); + if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: edge filtering" << std::endl; + } else if (aggAlgo == "net based bit set") { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_NB_BIT); + if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: net based bit set" << std::endl; + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unrecognized distance 2 coloring algorithm, valid options are: serial, default, matrix squared, vertex based, vertex based bit set, edge filtering") + } - // extract the colors and store them in the aggregates - aggregates->SetGraphColors(coloringHandle->get_vertex_colors()); - aggregates->SetGraphNumColors(static_cast(coloringHandle->get_num_colors())); + // Create device views for graph rowptrs/colinds + typename graph_t::row_map_type aRowptrs = graph->getLocalLWGraph().getRowPtrs(); + typename graph_t::entries_type aColinds = graph->getLocalLWGraph().getEntries(); - //clean up coloring handle - kh.destroy_distance2_graph_coloring_handle(); + // run d2 graph coloring + // graph is symmetric so row map/entries and col map/entries are the same + { + SubFactoryMonitor sfm2(*this, "Algo \"Graph Coloring\": KokkosGraph Call", currentLevel); // CMS HACK + KokkosGraph::Experimental::graph_color_distance2(&kh, numRows, aRowptrs, aColinds); } + // extract the colors and store them in the aggregates + aggregates->SetGraphColors(coloringHandle->get_vertex_colors()); + aggregates->SetGraphNumColors(static_cast(coloringHandle->get_num_colors())); + + // clean up coloring handle + kh.destroy_distance2_graph_coloring_handle(); + } + + if (IsPrint(Statistics1)) { + GetOStream(Statistics1) << " num colors: " << aggregates->GetGraphNumColors() << std::endl; + } + GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0; + for (size_t a = 0; a < algos_.size(); a++) { + std::string phase = algos_[a]->description(); + SubFactoryMonitor sfm2(*this, "Algo \"" + phase + "\"", currentLevel); + + int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose()); + algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); + algos_[a]->SetProcRankVerbose(oldRank); + if (IsPrint(Statistics1)) { - GetOStream(Statistics1) << " num colors: " << aggregates->GetGraphNumColors() << std::endl; - } - GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0; - for (size_t a = 0; a < algos_.size(); a++) { - std::string phase = algos_[a]->description(); - SubFactoryMonitor sfm2(*this, "Algo \"" + phase + "\"", currentLevel); - - int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose()); - algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); - algos_[a]->SetProcRankVerbose(oldRank); - - if (IsPrint(Statistics1)) { - GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0; - GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0; - MueLu_sumAll(comm, numLocalAggregated, numGlobalAggregated); - MueLu_sumAll(comm, numLocalAggs, numGlobalAggs); - - double aggPercent = 100*as(numGlobalAggregated)/as(numGlobalRows); - if (aggPercent > 99.99 && aggPercent < 100.00) { - // Due to round off (for instance, for 140465733/140466897), we could - // get 100.00% display even if there are some remaining nodes. This - // is bad from the users point of view. It is much better to change - // it to display 99.99%. - aggPercent = 99.99; - } - GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed - << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n" - << " remaining : " << numGlobalRows - numGlobalAggregated << "\n" - << " aggregates : " << numGlobalAggs-numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl; - numGlobalAggregatedPrev = numGlobalAggregated; - numGlobalAggsPrev = numGlobalAggs; + GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0; + GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0; + MueLu_sumAll(comm, numLocalAggregated, numGlobalAggregated); + MueLu_sumAll(comm, numLocalAggs, numGlobalAggs); + + double aggPercent = 100 * as(numGlobalAggregated) / as(numGlobalRows); + if (aggPercent > 99.99 && aggPercent < 100.00) { + // Due to round off (for instance, for 140465733/140466897), we could + // get 100.00% display even if there are some remaining nodes. This + // is bad from the users point of view. It is much better to change + // it to display 99.99%. + aggPercent = 99.99; } + GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed + << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n" + << " remaining : " << numGlobalRows - numGlobalAggregated << "\n" + << " aggregates : " << numGlobalAggs - numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl; + numGlobalAggregatedPrev = numGlobalAggregated; + numGlobalAggsPrev = numGlobalAggs; } } + } - TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!"); - - aggregates->AggregatesCrossProcessors(false); - aggregates->ComputeAggregateSizes(true/*forceRecompute*/); + TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!"); - Set(currentLevel, "Aggregates", aggregates); + aggregates->AggregatesCrossProcessors(false); + aggregates->ComputeAggregateSizes(true /*forceRecompute*/); - } + Set(currentLevel, "Aggregates", aggregates); +} -} //namespace MueLu +} // namespace MueLu #endif /* MUELU_UNCOUPLEDAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_decl.hpp b/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_decl.hpp index 17c7a3297fd6..9fbcbbc45951 100644 --- a/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_decl.hpp +++ b/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_decl.hpp @@ -46,7 +46,6 @@ #ifndef MUELU_USERAGGREGATIONFACTORY_DECL_HPP_ #define MUELU_USERAGGREGATIONFACTORY_DECL_HPP_ - #include #include @@ -60,22 +59,22 @@ namespace MueLu { -template +template class UserAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_USERAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" -public: + public: //! @name Constructors/Destructors. //@{ //! Constructor. - UserAggregationFactory() { }; + UserAggregationFactory(){}; //! Destructor. - virtual ~UserAggregationFactory() { } + virtual ~UserAggregationFactory() {} RCP GetValidParameterList() const; @@ -101,10 +100,10 @@ class UserAggregationFactory : public SingleLevelFactoryBase { //@} -private: -}; // class UserAggregationFactory + private: +}; // class UserAggregationFactory -} +} // namespace MueLu #define MUELU_USERAGGREGATIONFACTORY_SHORT #endif /* MUELU_USERAGGREGATIONFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_def.hpp b/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_def.hpp index bc0a418a46c7..45d7f9a229d2 100644 --- a/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_def.hpp @@ -60,87 +60,87 @@ namespace MueLu { - template - RCP UserAggregationFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - // input parameters - validParamList->set("filePrefix", "", "The data is read from files of this name: _."); - validParamList->set("fileExt", "", "The data is read from files of this name: _."); +template +RCP UserAggregationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + // input parameters + validParamList->set("filePrefix", "", "The data is read from files of this name: _."); + validParamList->set("fileExt", "", "The data is read from files of this name: _."); + + return validParamList; +} + +template +void UserAggregationFactory::DeclareInput(Level& /* currentLevel */) const {} + +/** + * The function reads aggregate information from a file. + * The file structure is the following: + * * line 1 : + * * line 2+: ... + */ +template +void UserAggregationFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + const ParameterList& pL = GetParameterList(); + + RCP > comm = Teuchos::DefaultComm::getComm(); + const int myRank = comm->getRank(); + + std::string fileName = pL.get("filePrefix") + toString(currentLevel.GetLevelID()) + "_" + toString(myRank) + "." + pL.get("fileExt"); + std::ifstream ifs(fileName.c_str()); + TEUCHOS_TEST_FOR_EXCEPTION(!ifs.good(), Exceptions::RuntimeError, "Cannot read data from \"" << fileName << "\""); + + LO numVertices, numAggregates; + ifs >> numVertices; + TEUCHOS_TEST_FOR_EXCEPTION(!ifs.good(), Exceptions::RuntimeError, "Cannot read data from \"" << fileName << "\""); + ifs >> numAggregates; + TEUCHOS_TEST_FOR_EXCEPTION(numVertices <= 0, Exceptions::InvalidArgument, "Number of vertices must be > 0"); + TEUCHOS_TEST_FOR_EXCEPTION(numAggregates <= 0, Exceptions::InvalidArgument, "Number of aggregates must be > 0"); + + Xpetra::UnderlyingLib lib = currentLevel.lib(); + const int indexBase = 0; + RCP map = MapFactory::Build(lib, numVertices, indexBase, comm); + + RCP aggregates = rcp(new Aggregates(map)); + aggregates->setObjectLabel("User"); + + aggregates->SetNumAggregates(numAggregates); + + Teuchos::ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0); + Teuchos::ArrayRCP procWinner = aggregates->GetProcWinner()->getDataNonConst(0); + + for (LO i = 0; i < numAggregates; i++) { + int aggSize = 0; + ifs >> aggSize; + + std::vector list(aggSize); + for (int k = 0; k < aggSize; k++) { + // FIXME: File contains GIDs, we need LIDs + // for now, works on a single processor + ifs >> list[k]; + } - return validParamList; - } + // Mark first node as root node for the aggregate + aggregates->SetIsRoot(list[0]); - template - void UserAggregationFactory::DeclareInput(Level& /* currentLevel */) const { } - - /** - * The function reads aggregate information from a file. - * The file structure is the following: - * * line 1 : - * * line 2+: ... - */ - template - void UserAggregationFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - - const ParameterList& pL = GetParameterList(); - - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); - const int myRank = comm->getRank(); - - std::string fileName = pL.get("filePrefix") + toString(currentLevel.GetLevelID()) + "_" + toString(myRank) + "." + pL.get("fileExt"); - std::ifstream ifs(fileName.c_str()); - TEUCHOS_TEST_FOR_EXCEPTION(!ifs.good(), Exceptions::RuntimeError, "Cannot read data from \"" << fileName << "\""); - - LO numVertices, numAggregates; - ifs >> numVertices; - TEUCHOS_TEST_FOR_EXCEPTION(!ifs.good(), Exceptions::RuntimeError, "Cannot read data from \"" << fileName << "\""); - ifs >> numAggregates; - TEUCHOS_TEST_FOR_EXCEPTION(numVertices <= 0, Exceptions::InvalidArgument, "Number of vertices must be > 0"); - TEUCHOS_TEST_FOR_EXCEPTION(numAggregates <= 0, Exceptions::InvalidArgument, "Number of aggregates must be > 0"); - - Xpetra::UnderlyingLib lib = currentLevel.lib(); - const int indexBase = 0; - RCP map = MapFactory::Build(lib, numVertices, indexBase, comm); - - RCP aggregates = rcp(new Aggregates(map)); - aggregates->setObjectLabel("User"); - - aggregates->SetNumAggregates(numAggregates); - - Teuchos::ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0); - Teuchos::ArrayRCP procWinner = aggregates->GetProcWinner() ->getDataNonConst(0); - - for (LO i = 0; i < numAggregates; i++) { - int aggSize = 0; - ifs >> aggSize; - - std::vector list(aggSize); - for (int k = 0; k < aggSize; k++) { - // FIXME: File contains GIDs, we need LIDs - // for now, works on a single processor - ifs >> list[k]; - } - - // Mark first node as root node for the aggregate - aggregates->SetIsRoot(list[0]); - - // Fill vertex2AggId and procWinner structure with information - for (int k = 0; k < aggSize; k++) { - vertex2AggId[list[k]] = i; - procWinner [list[k]] = myRank; - } + // Fill vertex2AggId and procWinner structure with information + for (int k = 0; k < aggSize; k++) { + vertex2AggId[list[k]] = i; + procWinner[list[k]] = myRank; } + } - // FIXME: do the proper check whether aggregates cross interprocessor boundary - aggregates->AggregatesCrossProcessors(false); + // FIXME: do the proper check whether aggregates cross interprocessor boundary + aggregates->AggregatesCrossProcessors(false); - Set(currentLevel, "Aggregates", aggregates); + Set(currentLevel, "Aggregates", aggregates); - GetOStream(Statistics0) << aggregates->description() << std::endl; - } + GetOStream(Statistics0) << aggregates->description() << std::endl; +} -} //namespace MueLu +} // namespace MueLu #endif /* MUELU_USERAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Headers/MueLu_ConfigDefs.hpp b/packages/muelu/src/Headers/MueLu_ConfigDefs.hpp index 6d65bf28102c..799914095ee3 100644 --- a/packages/muelu/src/Headers/MueLu_ConfigDefs.hpp +++ b/packages/muelu/src/Headers/MueLu_ConfigDefs.hpp @@ -51,7 +51,7 @@ #include // Tpetra -#include // default template parameter of many MueLu classes +#include // default template parameter of many MueLu classes // Memory management #include @@ -83,33 +83,33 @@ //! Namespace for MueLu classes and methods namespace MueLu { - // import Teuchos memory management classes into MueLu - using Teuchos::arcp; - using Teuchos::arcpFromArrayView; - using Teuchos::arcp_reinterpret_cast; - using Teuchos::Array; - using Teuchos::ArrayRCP; - using Teuchos::ArrayView; - using Teuchos::as; - using Teuchos::null; - using Teuchos::ParameterList; - using Teuchos::rcp; - using Teuchos::RCP; - using Teuchos::rcp_const_cast; - using Teuchos::rcp_dynamic_cast; - using Teuchos::rcpFromRef; - using Teuchos::rcp_implicit_cast; - using Teuchos::rcp_static_cast; +// import Teuchos memory management classes into MueLu +using Teuchos::arcp; +using Teuchos::arcp_reinterpret_cast; +using Teuchos::arcpFromArrayView; +using Teuchos::Array; +using Teuchos::ArrayRCP; +using Teuchos::ArrayView; +using Teuchos::as; +using Teuchos::null; +using Teuchos::ParameterList; +using Teuchos::rcp; +using Teuchos::RCP; +using Teuchos::rcp_const_cast; +using Teuchos::rcp_dynamic_cast; +using Teuchos::rcp_implicit_cast; +using Teuchos::rcp_static_cast; +using Teuchos::rcpFromRef; - // verbose levels - using Teuchos::VERB_DEFAULT; - using Teuchos::VERB_NONE; - using Teuchos::VERB_LOW; - using Teuchos::VERB_MEDIUM; - using Teuchos::VERB_HIGH; - using Teuchos::VERB_EXTREME; +// verbose levels +using Teuchos::VERB_DEFAULT; +using Teuchos::VERB_EXTREME; +using Teuchos::VERB_HIGH; +using Teuchos::VERB_LOW; +using Teuchos::VERB_MEDIUM; +using Teuchos::VERB_NONE; -} +} // namespace MueLu // This include file defines macros to avoid warnings under CUDA. See github issue #1133. #include "Teuchos_CompilerCodeTweakMacros.hpp" diff --git a/packages/muelu/src/Headers/MueLu_Details_DefaultTypes.hpp b/packages/muelu/src/Headers/MueLu_Details_DefaultTypes.hpp index fa9745afbe37..b1d19f73d5e8 100644 --- a/packages/muelu/src/Headers/MueLu_Details_DefaultTypes.hpp +++ b/packages/muelu/src/Headers/MueLu_Details_DefaultTypes.hpp @@ -52,22 +52,21 @@ #include -namespace MueLu -{ +namespace MueLu { - typedef Tpetra::Details::DefaultTypes::scalar_type DefaultScalar; +typedef Tpetra::Details::DefaultTypes::scalar_type DefaultScalar; - typedef int DefaultLocalOrdinal; +typedef int DefaultLocalOrdinal; - #if defined HAVE_MUELU_DEFAULT_GO_LONG - typedef long DefaultGlobalOrdinal; - #elif defined HAVE_MUELU_DEFAULT_GO_LONGLONG - typedef long long DefaultGlobalOrdinal; - #else - typedef int DefaultGlobalOrdinal; - #endif +#if defined HAVE_MUELU_DEFAULT_GO_LONG +typedef long DefaultGlobalOrdinal; +#elif defined HAVE_MUELU_DEFAULT_GO_LONGLONG +typedef long long DefaultGlobalOrdinal; +#else +typedef int DefaultGlobalOrdinal; +#endif - typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType DefaultNode; -} +typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType DefaultNode; +} // namespace MueLu #endif diff --git a/packages/muelu/src/Headers/MueLu_Types.hpp b/packages/muelu/src/Headers/MueLu_Types.hpp index 6ea5d31711d7..26182a928a38 100644 --- a/packages/muelu/src/Headers/MueLu_Types.hpp +++ b/packages/muelu/src/Headers/MueLu_Types.hpp @@ -49,55 +49,55 @@ #include "MueLu_ConfigDefs.hpp" namespace MueLu { - enum CycleType { - VCYCLE, - WCYCLE - }; +enum CycleType { + VCYCLE, + WCYCLE +}; - enum PreOrPost { - PRE = 0x1, - POST = 0x2, - BOTH = 0x3 - }; +enum PreOrPost { + PRE = 0x1, + POST = 0x2, + BOTH = 0x3 +}; - // In the algorithm, aggStat[] = READY/NOTSEL/SELECTED indicates whether a node has been aggregated - enum NodeState { - READY = 1, // indicates that a node is available to be - // selected as a root node of an aggregate +// In the algorithm, aggStat[] = READY/NOTSEL/SELECTED indicates whether a node has been aggregated +enum NodeState { + READY = 1, // indicates that a node is available to be + // selected as a root node of an aggregate - NOTSEL = 2, // indicates that a node has been rejected as a root node. - // This could perhaps be because if this node had been - // selected a small aggregate would have resulted - // This is Phase 1 specific + NOTSEL = 2, // indicates that a node has been rejected as a root node. + // This could perhaps be because if this node had been + // selected a small aggregate would have resulted + // This is Phase 1 specific - AGGREGATED = 3, // indicates that a node has been assigned - // to an aggregate + AGGREGATED = 3, // indicates that a node has been assigned + // to an aggregate - ONEPT = 4, // indicates that a node shall be preserved over - // all multigrid levels as 1 point aggregate + ONEPT = 4, // indicates that a node shall be preserved over + // all multigrid levels as 1 point aggregate - IGNORED = 5, // indicates that the node is removed from consideration, - // and is not aggregated + IGNORED = 5, // indicates that the node is removed from consideration, + // and is not aggregated - BOUNDARY = 6, // node is a Dirichlet node - // During aggregation, it is transformed either to AGGREGATED - // or to IGNORED - INTERFACE = 7 // node is chosen as root node on an interface where coordinated - // coarsening across the interface is required. - }; + BOUNDARY = 6, // node is a Dirichlet node + // During aggregation, it is transformed either to AGGREGATED + // or to IGNORED + INTERFACE = 7 // node is chosen as root node on an interface where coordinated + // coarsening across the interface is required. +}; - // This is use by the structured aggregation index manager to keep track of the underlying mesh - // layout. - enum IndexingType { - UNCOUPLED = 1, // indicates that the underlying mesh is treated independently from rank to rank +// This is use by the structured aggregation index manager to keep track of the underlying mesh +// layout. +enum IndexingType { + UNCOUPLED = 1, // indicates that the underlying mesh is treated independently from rank to rank - LOCALLEXI = 2, // local lexicographic indexing of the mesh, this is similar to uncoupled but - // extra data is used to compute indices accross ranks + LOCALLEXI = 2, // local lexicographic indexing of the mesh, this is similar to uncoupled but + // extra data is used to compute indices accross ranks - GLOBALLEXI = 3 // global lexicographic indexing of the mesh means that the mesh is ordered - // lexicographically accorss and subsequently split among ranks. - }; + GLOBALLEXI = 3 // global lexicographic indexing of the mesh means that the mesh is ordered + // lexicographically accorss and subsequently split among ranks. +}; -} +} // namespace MueLu -#endif //ifndef MUELU_TYPES_HPP +#endif // ifndef MUELU_TYPES_HPP diff --git a/packages/muelu/src/Headers/MueLu_UseShortNames.hpp b/packages/muelu/src/Headers/MueLu_UseShortNames.hpp index 7a25b031132e..4142deca5b21 100644 --- a/packages/muelu/src/Headers/MueLu_UseShortNames.hpp +++ b/packages/muelu/src/Headers/MueLu_UseShortNames.hpp @@ -81,4 +81,4 @@ //! @file MueLu_UseShortNamesOrdinal.hpp -//TODO / NOTE: This file should not be included at the global scope (to avoid name collision) +// TODO / NOTE: This file should not be included at the global scope (to avoid name collision) diff --git a/packages/muelu/src/Headers/MueLu_UseShortNamesOrdinal.hpp b/packages/muelu/src/Headers/MueLu_UseShortNamesOrdinal.hpp index 9bc47bfe9b36..63fa18d9a1df 100644 --- a/packages/muelu/src/Headers/MueLu_UseShortNamesOrdinal.hpp +++ b/packages/muelu/src/Headers/MueLu_UseShortNamesOrdinal.hpp @@ -3,124 +3,124 @@ #include #ifdef MUELU_AGGREGATES_SHORT -using Aggregates [[maybe_unused]] = MueLu::Aggregates; +using Aggregates [[maybe_unused]] = MueLu::Aggregates; #endif #ifdef MUELU_AGGREGATIONPHASE1ALGORITHM_SHORT -using AggregationPhase1Algorithm [[maybe_unused]] = MueLu::AggregationPhase1Algorithm; +using AggregationPhase1Algorithm [[maybe_unused]] = MueLu::AggregationPhase1Algorithm; #endif #ifdef MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_SHORT -using AggregationPhase1Algorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase1Algorithm_kokkos; +using AggregationPhase1Algorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase1Algorithm_kokkos; #endif #ifdef MUELU_AGGREGATIONPHASE2AALGORITHM_SHORT -using AggregationPhase2aAlgorithm [[maybe_unused]] = MueLu::AggregationPhase2aAlgorithm; +using AggregationPhase2aAlgorithm [[maybe_unused]] = MueLu::AggregationPhase2aAlgorithm; #endif #ifdef MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_SHORT -using AggregationPhase2aAlgorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase2aAlgorithm_kokkos; +using AggregationPhase2aAlgorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase2aAlgorithm_kokkos; #endif #ifdef MUELU_AGGREGATIONPHASE2BALGORITHM_SHORT -using AggregationPhase2bAlgorithm [[maybe_unused]] = MueLu::AggregationPhase2bAlgorithm; +using AggregationPhase2bAlgorithm [[maybe_unused]] = MueLu::AggregationPhase2bAlgorithm; #endif #ifdef MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_SHORT -using AggregationPhase2bAlgorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase2bAlgorithm_kokkos; +using AggregationPhase2bAlgorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase2bAlgorithm_kokkos; #endif #ifdef MUELU_AGGREGATIONPHASE3ALGORITHM_SHORT -using AggregationPhase3Algorithm [[maybe_unused]] = MueLu::AggregationPhase3Algorithm; +using AggregationPhase3Algorithm [[maybe_unused]] = MueLu::AggregationPhase3Algorithm; #endif #ifdef MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_SHORT -using AggregationPhase3Algorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase3Algorithm_kokkos; +using AggregationPhase3Algorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase3Algorithm_kokkos; #endif #ifdef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_SHORT -using AggregationStructuredAlgorithm [[maybe_unused]] = MueLu::AggregationStructuredAlgorithm; +using AggregationStructuredAlgorithm [[maybe_unused]] = MueLu::AggregationStructuredAlgorithm; #endif #ifdef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_SHORT -using AggregationStructuredAlgorithm_kokkos [[maybe_unused]] = MueLu::AggregationStructuredAlgorithm_kokkos; +using AggregationStructuredAlgorithm_kokkos [[maybe_unused]] = MueLu::AggregationStructuredAlgorithm_kokkos; #endif #ifdef MUELU_AMALGAMATIONINFO_SHORT -using AmalgamationInfo [[maybe_unused]] = MueLu::AmalgamationInfo; +using AmalgamationInfo [[maybe_unused]] = MueLu::AmalgamationInfo; #endif #ifdef MUELU_GLOBALLEXICOGRAPHICINDEXMANAGER_SHORT -using GlobalLexicographicIndexManager [[maybe_unused]] = MueLu::GlobalLexicographicIndexManager; +using GlobalLexicographicIndexManager [[maybe_unused]] = MueLu::GlobalLexicographicIndexManager; #endif #ifdef MUELU_GRAPH_SHORT -using Graph [[maybe_unused]] = MueLu::Graph; +using Graph [[maybe_unused]] = MueLu::Graph; #endif #ifdef MUELU_GRAPHBASE_SHORT -using GraphBase [[maybe_unused]] = MueLu::GraphBase; +using GraphBase [[maybe_unused]] = MueLu::GraphBase; #endif #ifdef MUELU_HYBRIDAGGREGATIONFACTORY_SHORT -using HybridAggregationFactory [[maybe_unused]] = MueLu::HybridAggregationFactory; +using HybridAggregationFactory [[maybe_unused]] = MueLu::HybridAggregationFactory; #endif #ifdef MUELU_INDEXMANAGER_SHORT -using IndexManager [[maybe_unused]] = MueLu::IndexManager; +using IndexManager [[maybe_unused]] = MueLu::IndexManager; #endif #ifdef MUELU_INDEXMANAGER_KOKKOS_SHORT -using IndexManager_kokkos [[maybe_unused]] = MueLu::IndexManager_kokkos; +using IndexManager_kokkos [[maybe_unused]] = MueLu::IndexManager_kokkos; #endif #ifdef MUELU_INTERFACEAGGREGATIONALGORITHM_SHORT -using InterfaceAggregationAlgorithm [[maybe_unused]] = MueLu::InterfaceAggregationAlgorithm; +using InterfaceAggregationAlgorithm [[maybe_unused]] = MueLu::InterfaceAggregationAlgorithm; #endif #ifdef MUELU_INTERFACEMAPPINGTRANSFERFACTORY_SHORT -using InterfaceMappingTransferFactory [[maybe_unused]] = MueLu::InterfaceMappingTransferFactory; +using InterfaceMappingTransferFactory [[maybe_unused]] = MueLu::InterfaceMappingTransferFactory; #endif #ifdef MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_SHORT -using IsolatedNodeAggregationAlgorithm [[maybe_unused]] = MueLu::IsolatedNodeAggregationAlgorithm; +using IsolatedNodeAggregationAlgorithm [[maybe_unused]] = MueLu::IsolatedNodeAggregationAlgorithm; #endif #ifdef MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_KOKKOS_SHORT -using IsolatedNodeAggregationAlgorithm_kokkos [[maybe_unused]] = MueLu::IsolatedNodeAggregationAlgorithm_kokkos; +using IsolatedNodeAggregationAlgorithm_kokkos [[maybe_unused]] = MueLu::IsolatedNodeAggregationAlgorithm_kokkos; #endif #ifdef MUELU_ISORROPIAINTERFACE_SHORT -using IsorropiaInterface [[maybe_unused]] = MueLu::IsorropiaInterface; +using IsorropiaInterface [[maybe_unused]] = MueLu::IsorropiaInterface; #endif #ifdef MUELU_LWGRAPH_SHORT -using LWGraph [[maybe_unused]] = MueLu::LWGraph; +using LWGraph [[maybe_unused]] = MueLu::LWGraph; #endif #ifdef MUELU_LWGRAPH_KOKKOS_SHORT -using LWGraph_kokkos [[maybe_unused]] = MueLu::LWGraph_kokkos; +using LWGraph_kokkos [[maybe_unused]] = MueLu::LWGraph_kokkos; #endif #ifdef MUELU_LOCALLWGRAPH_KOKKOS_SHORT -using LocalLWGraph_kokkos [[maybe_unused]] = MueLu::LocalLWGraph_kokkos; +using LocalLWGraph_kokkos [[maybe_unused]] = MueLu::LocalLWGraph_kokkos; #endif #ifdef MUELU_LOCALLEXICOGRAPHICINDEXMANAGER_SHORT -using LocalLexicographicIndexManager [[maybe_unused]] = MueLu::LocalLexicographicIndexManager; +using LocalLexicographicIndexManager [[maybe_unused]] = MueLu::LocalLexicographicIndexManager; #endif #ifdef MUELU_LOCALORDINALTRANSFERFACTORY_SHORT -using LocalOrdinalTransferFactory [[maybe_unused]] = MueLu::LocalOrdinalTransferFactory; +using LocalOrdinalTransferFactory [[maybe_unused]] = MueLu::LocalOrdinalTransferFactory; #endif #ifdef MUELU_ONEPTAGGREGATIONALGORITHM_SHORT -using OnePtAggregationAlgorithm [[maybe_unused]] = MueLu::OnePtAggregationAlgorithm; +using OnePtAggregationAlgorithm [[maybe_unused]] = MueLu::OnePtAggregationAlgorithm; #endif #ifdef MUELU_ONEPTAGGREGATIONALGORITHM_KOKKOS_SHORT -using OnePtAggregationAlgorithm_kokkos [[maybe_unused]] = MueLu::OnePtAggregationAlgorithm_kokkos; +using OnePtAggregationAlgorithm_kokkos [[maybe_unused]] = MueLu::OnePtAggregationAlgorithm_kokkos; #endif #ifdef MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_SHORT -using PreserveDirichletAggregationAlgorithm [[maybe_unused]] = MueLu::PreserveDirichletAggregationAlgorithm; +using PreserveDirichletAggregationAlgorithm [[maybe_unused]] = MueLu::PreserveDirichletAggregationAlgorithm; #endif #ifdef MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_KOKKOS_SHORT -using PreserveDirichletAggregationAlgorithm_kokkos [[maybe_unused]] = MueLu::PreserveDirichletAggregationAlgorithm_kokkos; +using PreserveDirichletAggregationAlgorithm_kokkos [[maybe_unused]] = MueLu::PreserveDirichletAggregationAlgorithm_kokkos; #endif #ifdef MUELU_PRFACTORY_SHORT -using PRFactory [[maybe_unused]] = MueLu::PRFactory; +using PRFactory [[maybe_unused]] = MueLu::PRFactory; #endif #ifdef MUELU_REBALANCEMAPFACTORY_SHORT -using RebalanceMapFactory [[maybe_unused]] = MueLu::RebalanceMapFactory; +using RebalanceMapFactory [[maybe_unused]] = MueLu::RebalanceMapFactory; #endif #ifdef MUELU_REPARTITIONINTERFACE_SHORT -using RepartitionInterface [[maybe_unused]] = MueLu::RepartitionInterface; +using RepartitionInterface [[maybe_unused]] = MueLu::RepartitionInterface; #endif #ifdef MUELU_STRUCTUREDAGGREGATIONFACTORY_KOKKOS_SHORT -using StructuredAggregationFactory_kokkos [[maybe_unused]] = MueLu::StructuredAggregationFactory_kokkos; +using StructuredAggregationFactory_kokkos [[maybe_unused]] = MueLu::StructuredAggregationFactory_kokkos; #endif #ifdef MUELU_UNCOUPLEDAGGREGATIONFACTORY_SHORT -using UncoupledAggregationFactory [[maybe_unused]] = MueLu::UncoupledAggregationFactory; +using UncoupledAggregationFactory [[maybe_unused]] = MueLu::UncoupledAggregationFactory; #endif #ifdef MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_SHORT -using UncoupledAggregationFactory_kokkos [[maybe_unused]] = MueLu::UncoupledAggregationFactory_kokkos; +using UncoupledAggregationFactory_kokkos [[maybe_unused]] = MueLu::UncoupledAggregationFactory_kokkos; #endif #ifdef MUELU_UNCOUPLEDINDEXMANAGER_SHORT -using UncoupledIndexManager [[maybe_unused]] = MueLu::UncoupledIndexManager; +using UncoupledIndexManager [[maybe_unused]] = MueLu::UncoupledIndexManager; #endif #ifdef MUELU_USERAGGREGATIONFACTORY_SHORT -using UserAggregationFactory [[maybe_unused]] = MueLu::UserAggregationFactory; +using UserAggregationFactory [[maybe_unused]] = MueLu::UserAggregationFactory; #endif #ifdef MUELU_FACTORY_SHORT using Factory [[maybe_unused]] = MueLu::Factory; diff --git a/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp b/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp index d326324c097f..532f051b27ef 100644 --- a/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp +++ b/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp @@ -3,461 +3,461 @@ #include #ifdef MUELU_ADAPTIVESAMLPARAMETERLISTINTERPRETER_SHORT -using AdaptiveSaMLParameterListInterpreter [[maybe_unused]] = MueLu::AdaptiveSaMLParameterListInterpreter; +using AdaptiveSaMLParameterListInterpreter [[maybe_unused]] = MueLu::AdaptiveSaMLParameterListInterpreter; #endif #ifdef MUELU_AGGREGATIONEXPORTFACTORY_SHORT -using AggregationExportFactory [[maybe_unused]] = MueLu::AggregationExportFactory; +using AggregationExportFactory [[maybe_unused]] = MueLu::AggregationExportFactory; #endif #ifdef MUELU_AGGREGATEQUALITYESTIMATEFACTORY_SHORT -using AggregateQualityEstimateFactory [[maybe_unused]] = MueLu::AggregateQualityEstimateFactory; +using AggregateQualityEstimateFactory [[maybe_unused]] = MueLu::AggregateQualityEstimateFactory; #endif #ifdef MUELU_AMALGAMATIONFACTORY_SHORT -using AmalgamationFactory [[maybe_unused]] = MueLu::AmalgamationFactory; +using AmalgamationFactory [[maybe_unused]] = MueLu::AmalgamationFactory; #endif #ifdef MUELU_AMESOS2SMOOTHER_SHORT -using Amesos2Smoother [[maybe_unused]] = MueLu::Amesos2Smoother; +using Amesos2Smoother [[maybe_unused]] = MueLu::Amesos2Smoother; #endif #ifdef MUELU_AMGXOPERATOR_SHORT -using AMGXOperator [[maybe_unused]] = MueLu::AMGXOperator; +using AMGXOperator [[maybe_unused]] = MueLu::AMGXOperator; #endif #ifdef MUELU_ALGEBRAICPERMUTATIONSTRATEGY_SHORT -using AlgebraicPermutationStrategy [[maybe_unused]] = MueLu::AlgebraicPermutationStrategy; +using AlgebraicPermutationStrategy [[maybe_unused]] = MueLu::AlgebraicPermutationStrategy; #endif #ifdef MUELU_BELOSSMOOTHER_SHORT -using BelosSmoother [[maybe_unused]] = MueLu::BelosSmoother; +using BelosSmoother [[maybe_unused]] = MueLu::BelosSmoother; #endif #ifdef MUELU_BLACKBOXPFACTORY_SHORT -using BlackBoxPFactory [[maybe_unused]] = MueLu::BlackBoxPFactory; +using BlackBoxPFactory [[maybe_unused]] = MueLu::BlackBoxPFactory; #endif #ifdef MUELU_BLOCKEDCOARSEMAPFACTORY_SHORT -using BlockedCoarseMapFactory [[maybe_unused]] = MueLu::BlockedCoarseMapFactory; +using BlockedCoarseMapFactory [[maybe_unused]] = MueLu::BlockedCoarseMapFactory; #endif #ifdef MUELU_BLOCKEDCOORDINATESTRANSFERFACTORY_SHORT -using BlockedCoordinatesTransferFactory [[maybe_unused]] = MueLu::BlockedCoordinatesTransferFactory; +using BlockedCoordinatesTransferFactory [[maybe_unused]] = MueLu::BlockedCoordinatesTransferFactory; #endif #ifdef MUELU_BLOCKEDDIRECTSOLVER_SHORT -using BlockedDirectSolver [[maybe_unused]] = MueLu::BlockedDirectSolver; +using BlockedDirectSolver [[maybe_unused]] = MueLu::BlockedDirectSolver; #endif #ifdef MUELU_BLOCKEDGAUSSSEIDELSMOOTHER_SHORT -using BlockedGaussSeidelSmoother [[maybe_unused]] = MueLu::BlockedGaussSeidelSmoother; +using BlockedGaussSeidelSmoother [[maybe_unused]] = MueLu::BlockedGaussSeidelSmoother; #endif #ifdef MUELU_BLOCKEDJACOBISMOOTHER_SHORT -using BlockedJacobiSmoother [[maybe_unused]] = MueLu::BlockedJacobiSmoother; +using BlockedJacobiSmoother [[maybe_unused]] = MueLu::BlockedJacobiSmoother; #endif #ifdef MUELU_BLOCKEDPFACTORY_SHORT -using BlockedPFactory [[maybe_unused]] = MueLu::BlockedPFactory; +using BlockedPFactory [[maybe_unused]] = MueLu::BlockedPFactory; #endif #ifdef MUELU_BLOCKEDRAPFACTORY_SHORT -using BlockedRAPFactory [[maybe_unused]] = MueLu::BlockedRAPFactory; +using BlockedRAPFactory [[maybe_unused]] = MueLu::BlockedRAPFactory; #endif #ifdef MUELU_BRICKAGGREGATIONFACTORY_SHORT -using BrickAggregationFactory [[maybe_unused]] = MueLu::BrickAggregationFactory; +using BrickAggregationFactory [[maybe_unused]] = MueLu::BrickAggregationFactory; #endif #ifdef MUELU_BRAESSSARAZINSMOOTHER_SHORT -using BraessSarazinSmoother [[maybe_unused]] = MueLu::BraessSarazinSmoother; +using BraessSarazinSmoother [[maybe_unused]] = MueLu::BraessSarazinSmoother; #endif #ifdef MUELU_CGSOLVER_SHORT -using CGSolver [[maybe_unused]] = MueLu::CGSolver; +using CGSolver [[maybe_unused]] = MueLu::CGSolver; #endif #ifdef MUELU_CLASSICALMAPFACTORY_SHORT -using ClassicalMapFactory [[maybe_unused]] = MueLu::ClassicalMapFactory; +using ClassicalMapFactory [[maybe_unused]] = MueLu::ClassicalMapFactory; #endif #ifdef MUELU_CLASSICALPFACTORY_SHORT -using ClassicalPFactory [[maybe_unused]] = MueLu::ClassicalPFactory; +using ClassicalPFactory [[maybe_unused]] = MueLu::ClassicalPFactory; #endif #ifdef MUELU_CLONEREPARTITIONINTERFACE_SHORT -using CloneRepartitionInterface [[maybe_unused]] = MueLu::CloneRepartitionInterface; +using CloneRepartitionInterface [[maybe_unused]] = MueLu::CloneRepartitionInterface; #endif #ifdef MUELU_COALESCEDROPFACTORY_SHORT -using CoalesceDropFactory [[maybe_unused]] = MueLu::CoalesceDropFactory; +using CoalesceDropFactory [[maybe_unused]] = MueLu::CoalesceDropFactory; #endif #ifdef MUELU_COALESCEDROPFACTORY_KOKKOS_SHORT -using CoalesceDropFactory_kokkos [[maybe_unused]] = MueLu::CoalesceDropFactory_kokkos; +using CoalesceDropFactory_kokkos [[maybe_unused]] = MueLu::CoalesceDropFactory_kokkos; #endif #ifdef MUELU_COARSEMAPFACTORY_SHORT -using CoarseMapFactory [[maybe_unused]] = MueLu::CoarseMapFactory; +using CoarseMapFactory [[maybe_unused]] = MueLu::CoarseMapFactory; #endif #ifdef MUELU_COARSENINGVISUALIZATIONFACTORY_SHORT -using CoarseningVisualizationFactory [[maybe_unused]] = MueLu::CoarseningVisualizationFactory; +using CoarseningVisualizationFactory [[maybe_unused]] = MueLu::CoarseningVisualizationFactory; #endif #ifdef MUELU_COMBINEPFACTORY_SHORT -using CombinePFactory [[maybe_unused]] = MueLu::CombinePFactory; +using CombinePFactory [[maybe_unused]] = MueLu::CombinePFactory; #endif #ifdef MUELU_CONSTRAINT_SHORT -using Constraint [[maybe_unused]] = MueLu::Constraint; +using Constraint [[maybe_unused]] = MueLu::Constraint; #endif #ifdef MUELU_CONSTRAINTFACTORY_SHORT -using ConstraintFactory [[maybe_unused]] = MueLu::ConstraintFactory; +using ConstraintFactory [[maybe_unused]] = MueLu::ConstraintFactory; #endif #ifdef MUELU_COORDINATESTRANSFERFACTORY_SHORT -using CoordinatesTransferFactory [[maybe_unused]] = MueLu::CoordinatesTransferFactory; +using CoordinatesTransferFactory [[maybe_unused]] = MueLu::CoordinatesTransferFactory; #endif #ifdef MUELU_COUPLEDRBMFACTORY_SHORT -using CoupledRBMFactory [[maybe_unused]] = MueLu::CoupledRBMFactory; +using CoupledRBMFactory [[maybe_unused]] = MueLu::CoupledRBMFactory; #endif #ifdef MUELU_DEMOFACTORY_SHORT -using DemoFactory [[maybe_unused]] = MueLu::DemoFactory; +using DemoFactory [[maybe_unused]] = MueLu::DemoFactory; #endif #ifdef MUELU_DIRECTSOLVER_SHORT -using DirectSolver [[maybe_unused]] = MueLu::DirectSolver; +using DirectSolver [[maybe_unused]] = MueLu::DirectSolver; #endif #ifdef MUELU_DROPNEGATIVEENTRIESFACTORY_SHORT -using DropNegativeEntriesFactory [[maybe_unused]] = MueLu::DropNegativeEntriesFactory; +using DropNegativeEntriesFactory [[maybe_unused]] = MueLu::DropNegativeEntriesFactory; #endif #ifdef MUELU_EMINPFACTORY_SHORT -using EminPFactory [[maybe_unused]] = MueLu::EminPFactory; +using EminPFactory [[maybe_unused]] = MueLu::EminPFactory; #endif #ifdef MUELU_FACADEBGS2X2_SHORT -using FacadeBGS2x2 [[maybe_unused]] = MueLu::FacadeBGS2x2; +using FacadeBGS2x2 [[maybe_unused]] = MueLu::FacadeBGS2x2; #endif #ifdef MUELU_FACADECLASSBASE_SHORT -using FacadeClassBase [[maybe_unused]] = MueLu::FacadeClassBase; +using FacadeClassBase [[maybe_unused]] = MueLu::FacadeClassBase; #endif #ifdef MUELU_FACADECLASSFACTORY_SHORT -using FacadeClassFactory [[maybe_unused]] = MueLu::FacadeClassFactory; +using FacadeClassFactory [[maybe_unused]] = MueLu::FacadeClassFactory; #endif #ifdef MUELU_FACADESIMPLE_SHORT -using FacadeSimple [[maybe_unused]] = MueLu::FacadeSimple; +using FacadeSimple [[maybe_unused]] = MueLu::FacadeSimple; #endif #ifdef MUELU_FACTORYFACTORY_SHORT -using FactoryFactory [[maybe_unused]] = MueLu::FactoryFactory; +using FactoryFactory [[maybe_unused]] = MueLu::FactoryFactory; #endif #ifdef MUELU_FACTORYMANAGER_SHORT -using FactoryManager [[maybe_unused]] = MueLu::FactoryManager; +using FactoryManager [[maybe_unused]] = MueLu::FactoryManager; #endif #ifdef MUELU_FAKESMOOTHERPROTOTYPE_SHORT -using FakeSmootherPrototype [[maybe_unused]] = MueLu::FakeSmootherPrototype; +using FakeSmootherPrototype [[maybe_unused]] = MueLu::FakeSmootherPrototype; #endif #ifdef MUELU_FILTEREDAFACTORY_SHORT -using FilteredAFactory [[maybe_unused]] = MueLu::FilteredAFactory; +using FilteredAFactory [[maybe_unused]] = MueLu::FilteredAFactory; #endif #ifdef MUELU_FINELEVELINPUTDATAFACTORY_SHORT -using FineLevelInputDataFactory [[maybe_unused]] = MueLu::FineLevelInputDataFactory; +using FineLevelInputDataFactory [[maybe_unused]] = MueLu::FineLevelInputDataFactory; #endif #ifdef MUELU_GENERALGEOMETRICPFACTORY_SHORT -using GeneralGeometricPFactory [[maybe_unused]] = MueLu::GeneralGeometricPFactory; +using GeneralGeometricPFactory [[maybe_unused]] = MueLu::GeneralGeometricPFactory; #endif #ifdef MUELU_GENERICRFACTORY_SHORT -using GenericRFactory [[maybe_unused]] = MueLu::GenericRFactory; +using GenericRFactory [[maybe_unused]] = MueLu::GenericRFactory; #endif #ifdef MUELU_GEOMETRICINTERPOLATIONPFACTORY_SHORT -using GeometricInterpolationPFactory [[maybe_unused]] = MueLu::GeometricInterpolationPFactory; +using GeometricInterpolationPFactory [[maybe_unused]] = MueLu::GeometricInterpolationPFactory; #endif #ifdef MUELU_GEOMETRICINTERPOLATIONPFACTORY_KOKKOS_SHORT -using GeometricInterpolationPFactory_kokkos [[maybe_unused]] = MueLu::GeometricInterpolationPFactory_kokkos; +using GeometricInterpolationPFactory_kokkos [[maybe_unused]] = MueLu::GeometricInterpolationPFactory_kokkos; #endif #ifdef MUELU_GMRESSOLVER_SHORT -using GMRESSolver [[maybe_unused]] = MueLu::GMRESSolver; +using GMRESSolver [[maybe_unused]] = MueLu::GMRESSolver; #endif #ifdef MUELU_HIERARCHY_SHORT -using Hierarchy [[maybe_unused]] = MueLu::Hierarchy; +using Hierarchy [[maybe_unused]] = MueLu::Hierarchy; #endif #ifdef MUELU_HIERARCHYMANAGER_SHORT -using HierarchyManager [[maybe_unused]] = MueLu::HierarchyManager; +using HierarchyManager [[maybe_unused]] = MueLu::HierarchyManager; #endif #ifdef MUELU_HIERARCHYFACTORY_SHORT -using HierarchyFactory [[maybe_unused]] = MueLu::HierarchyFactory; +using HierarchyFactory [[maybe_unused]] = MueLu::HierarchyFactory; #endif #ifdef MUELU_HIERARCHYUTILS_SHORT -using HierarchyUtils [[maybe_unused]] = MueLu::HierarchyUtils; +using HierarchyUtils [[maybe_unused]] = MueLu::HierarchyUtils; #endif #ifdef MUELU_INTERFACEAGGREGATIONFACTORY_SHORT -using InterfaceAggregationFactory [[maybe_unused]] = MueLu::InterfaceAggregationFactory; +using InterfaceAggregationFactory [[maybe_unused]] = MueLu::InterfaceAggregationFactory; #endif #ifdef MUELU_IFPACK2SMOOTHER_SHORT -using Ifpack2Smoother [[maybe_unused]] = MueLu::Ifpack2Smoother; +using Ifpack2Smoother [[maybe_unused]] = MueLu::Ifpack2Smoother; #endif #ifdef MUELU_INDEFBLOCKEDDIAGONALSMOOTHER_SHORT -using IndefBlockedDiagonalSmoother [[maybe_unused]] = MueLu::IndefBlockedDiagonalSmoother; +using IndefBlockedDiagonalSmoother [[maybe_unused]] = MueLu::IndefBlockedDiagonalSmoother; #endif #ifdef MUELU_INITIALBLOCKNUMBERFACTORY_SHORT -using InitialBlockNumberFactory [[maybe_unused]] = MueLu::InitialBlockNumberFactory; +using InitialBlockNumberFactory [[maybe_unused]] = MueLu::InitialBlockNumberFactory; #endif #ifdef MUELU_INTREPIDPCOARSENFACTORY_SHORT -using IntrepidPCoarsenFactory [[maybe_unused]] = MueLu::IntrepidPCoarsenFactory; +using IntrepidPCoarsenFactory [[maybe_unused]] = MueLu::IntrepidPCoarsenFactory; #endif #ifdef MUELU_INVERSEAPPROXIMATIONFACTORY_SHORT -using InverseApproximationFactory [[maybe_unused]] = MueLu::InverseApproximationFactory; +using InverseApproximationFactory [[maybe_unused]] = MueLu::InverseApproximationFactory; #endif #ifdef MUELU_LINEDETECTIONFACTORY_SHORT -using LineDetectionFactory [[maybe_unused]] = MueLu::LineDetectionFactory; +using LineDetectionFactory [[maybe_unused]] = MueLu::LineDetectionFactory; #endif #ifdef MUELU_LOCALPERMUTATIONSTRATEGY_SHORT -using LocalPermutationStrategy [[maybe_unused]] = MueLu::LocalPermutationStrategy; +using LocalPermutationStrategy [[maybe_unused]] = MueLu::LocalPermutationStrategy; #endif #ifdef MUELU_LOWPRECISIONFACTORY_SHORT -using LowPrecisionFactory [[maybe_unused]] = MueLu::LowPrecisionFactory; +using LowPrecisionFactory [[maybe_unused]] = MueLu::LowPrecisionFactory; #endif #ifdef MUELU_MAPTRANSFERFACTORY_SHORT -using MapTransferFactory [[maybe_unused]] = MueLu::MapTransferFactory; +using MapTransferFactory [[maybe_unused]] = MueLu::MapTransferFactory; #endif #ifdef MUELU_MATRIXANALYSISFACTORY_SHORT -using MatrixAnalysisFactory [[maybe_unused]] = MueLu::MatrixAnalysisFactory; +using MatrixAnalysisFactory [[maybe_unused]] = MueLu::MatrixAnalysisFactory; #endif #ifdef MUELU_MERGEDBLOCKEDMATRIXFACTORY_SHORT -using MergedBlockedMatrixFactory [[maybe_unused]] = MueLu::MergedBlockedMatrixFactory; +using MergedBlockedMatrixFactory [[maybe_unused]] = MueLu::MergedBlockedMatrixFactory; #endif #ifdef MUELU_MERGEDSMOOTHER_SHORT -using MergedSmoother [[maybe_unused]] = MueLu::MergedSmoother; +using MergedSmoother [[maybe_unused]] = MueLu::MergedSmoother; #endif #ifdef MUELU_MLPARAMETERLISTINTERPRETER_SHORT -using MLParameterListInterpreter [[maybe_unused]] = MueLu::MLParameterListInterpreter; +using MLParameterListInterpreter [[maybe_unused]] = MueLu::MLParameterListInterpreter; #endif #ifdef MUELU_MULTIVECTORTRANSFERFACTORY_SHORT -using MultiVectorTransferFactory [[maybe_unused]] = MueLu::MultiVectorTransferFactory; +using MultiVectorTransferFactory [[maybe_unused]] = MueLu::MultiVectorTransferFactory; #endif #ifdef MUELU_NOTAYAGGREGATIONFACTORY_SHORT -using NotayAggregationFactory [[maybe_unused]] = MueLu::NotayAggregationFactory; +using NotayAggregationFactory [[maybe_unused]] = MueLu::NotayAggregationFactory; #endif #ifdef MUELU_NULLSPACEFACTORY_SHORT -using NullspaceFactory [[maybe_unused]] = MueLu::NullspaceFactory; +using NullspaceFactory [[maybe_unused]] = MueLu::NullspaceFactory; #endif #ifdef MUELU_NULLSPACEFACTORY_KOKKOS_SHORT -using NullspaceFactory_kokkos [[maybe_unused]] = MueLu::NullspaceFactory_kokkos; +using NullspaceFactory_kokkos [[maybe_unused]] = MueLu::NullspaceFactory_kokkos; #endif #ifdef MUELU_NULLSPACEPRESMOOTHFACTORY_SHORT -using NullspacePresmoothFactory [[maybe_unused]] = MueLu::NullspacePresmoothFactory; +using NullspacePresmoothFactory [[maybe_unused]] = MueLu::NullspacePresmoothFactory; #endif #ifdef MUELU_PARAMETERLISTINTERPRETER_SHORT -using ParameterListInterpreter [[maybe_unused]] = MueLu::ParameterListInterpreter; +using ParameterListInterpreter [[maybe_unused]] = MueLu::ParameterListInterpreter; #endif #ifdef MUELU_PATTERNFACTORY_SHORT -using PatternFactory [[maybe_unused]] = MueLu::PatternFactory; +using PatternFactory [[maybe_unused]] = MueLu::PatternFactory; #endif #ifdef MUELU_PERFUTILS_SHORT -using PerfUtils [[maybe_unused]] = MueLu::PerfUtils; +using PerfUtils [[maybe_unused]] = MueLu::PerfUtils; #endif #ifdef MUELU_PERFMODELS_SHORT -using PerfModels [[maybe_unused]] = MueLu::PerfModels; +using PerfModels [[maybe_unused]] = MueLu::PerfModels; #endif #ifdef MUELU_PERMUTATIONFACTORY_SHORT -using PermutationFactory [[maybe_unused]] = MueLu::PermutationFactory; +using PermutationFactory [[maybe_unused]] = MueLu::PermutationFactory; #endif #ifdef MUELU_PERMUTINGSMOOTHER_SHORT -using PermutingSmoother [[maybe_unused]] = MueLu::PermutingSmoother; +using PermutingSmoother [[maybe_unused]] = MueLu::PermutingSmoother; #endif #ifdef MUELU_PGPFACTORY_SHORT -using PgPFactory [[maybe_unused]] = MueLu::PgPFactory; +using PgPFactory [[maybe_unused]] = MueLu::PgPFactory; #endif #ifdef MUELU_PREDROPFUNCTIONBASECLASS_SHORT -using PreDropFunctionBaseClass [[maybe_unused]] = MueLu::PreDropFunctionBaseClass; +using PreDropFunctionBaseClass [[maybe_unused]] = MueLu::PreDropFunctionBaseClass; #endif #ifdef MUELU_PREDROPFUNCTIONCONSTVAL_SHORT -using PreDropFunctionConstVal [[maybe_unused]] = MueLu::PreDropFunctionConstVal; +using PreDropFunctionConstVal [[maybe_unused]] = MueLu::PreDropFunctionConstVal; #endif #ifdef MUELU_PROJECTORSMOOTHER_SHORT -using ProjectorSmoother [[maybe_unused]] = MueLu::ProjectorSmoother; +using ProjectorSmoother [[maybe_unused]] = MueLu::ProjectorSmoother; #endif #ifdef MUELU_RAPFACTORY_SHORT -using RAPFactory [[maybe_unused]] = MueLu::RAPFactory; +using RAPFactory [[maybe_unused]] = MueLu::RAPFactory; #endif #ifdef MUELU_RAPSHIFTFACTORY_SHORT -using RAPShiftFactory [[maybe_unused]] = MueLu::RAPShiftFactory; +using RAPShiftFactory [[maybe_unused]] = MueLu::RAPShiftFactory; #endif #ifdef MUELU_REBALANCEACFACTORY_SHORT -using RebalanceAcFactory [[maybe_unused]] = MueLu::RebalanceAcFactory; +using RebalanceAcFactory [[maybe_unused]] = MueLu::RebalanceAcFactory; #endif #ifdef MUELU_REBALANCEBLOCKACFACTORY_SHORT -using RebalanceBlockAcFactory [[maybe_unused]] = MueLu::RebalanceBlockAcFactory; +using RebalanceBlockAcFactory [[maybe_unused]] = MueLu::RebalanceBlockAcFactory; #endif #ifdef MUELU_REBALANCEBLOCKINTERPOLATIONFACTORY_SHORT -using RebalanceBlockInterpolationFactory [[maybe_unused]] = MueLu::RebalanceBlockInterpolationFactory; +using RebalanceBlockInterpolationFactory [[maybe_unused]] = MueLu::RebalanceBlockInterpolationFactory; #endif #ifdef MUELU_REBALANCEBLOCKRESTRICTIONFACTORY_SHORT -using RebalanceBlockRestrictionFactory [[maybe_unused]] = MueLu::RebalanceBlockRestrictionFactory; +using RebalanceBlockRestrictionFactory [[maybe_unused]] = MueLu::RebalanceBlockRestrictionFactory; #endif #ifdef MUELU_REBALANCETRANSFERFACTORY_SHORT -using RebalanceTransferFactory [[maybe_unused]] = MueLu::RebalanceTransferFactory; +using RebalanceTransferFactory [[maybe_unused]] = MueLu::RebalanceTransferFactory; #endif #ifdef MUELU_REFMAXWELLSMOOTHER_SHORT -using RefMaxwellSmoother [[maybe_unused]] = MueLu::RefMaxwellSmoother; +using RefMaxwellSmoother [[maybe_unused]] = MueLu::RefMaxwellSmoother; #endif #ifdef MUELU_REGIONRFACTORY_SHORT -using RegionRFactory [[maybe_unused]] = MueLu::RegionRFactory; +using RegionRFactory [[maybe_unused]] = MueLu::RegionRFactory; #endif #ifdef MUELU_REGIONRFACTORY_KOKKOS_SHORT -using RegionRFactory_kokkos [[maybe_unused]] = MueLu::RegionRFactory_kokkos; +using RegionRFactory_kokkos [[maybe_unused]] = MueLu::RegionRFactory_kokkos; #endif #ifdef MUELU_REITZINGERPFACTORY_SHORT -using ReitzingerPFactory [[maybe_unused]] = MueLu::ReitzingerPFactory; +using ReitzingerPFactory [[maybe_unused]] = MueLu::ReitzingerPFactory; #endif #ifdef MUELU_REORDERBLOCKAFACTORY_SHORT -using ReorderBlockAFactory [[maybe_unused]] = MueLu::ReorderBlockAFactory; +using ReorderBlockAFactory [[maybe_unused]] = MueLu::ReorderBlockAFactory; #endif #ifdef MUELU_REPARTITIONFACTORY_SHORT -using RepartitionFactory [[maybe_unused]] = MueLu::RepartitionFactory; +using RepartitionFactory [[maybe_unused]] = MueLu::RepartitionFactory; #endif #ifdef MUELU_REPARTITIONBLOCKDIAGONALFACTORY_SHORT -using RepartitionBlockDiagonalFactory [[maybe_unused]] = MueLu::RepartitionBlockDiagonalFactory; +using RepartitionBlockDiagonalFactory [[maybe_unused]] = MueLu::RepartitionBlockDiagonalFactory; #endif #ifdef MUELU_REPARTITIONHEURISTICFACTORY_SHORT -using RepartitionHeuristicFactory [[maybe_unused]] = MueLu::RepartitionHeuristicFactory; +using RepartitionHeuristicFactory [[maybe_unused]] = MueLu::RepartitionHeuristicFactory; #endif #ifdef MUELU_REPLICATEPFACTORY_SHORT -using ReplicatePFactory [[maybe_unused]] = MueLu::ReplicatePFactory; +using ReplicatePFactory [[maybe_unused]] = MueLu::ReplicatePFactory; #endif #ifdef MUELU_RIGIDBODYMODEFACTORY_SHORT -using RigidBodyModeFactory [[maybe_unused]] = MueLu::RigidBodyModeFactory; +using RigidBodyModeFactory [[maybe_unused]] = MueLu::RigidBodyModeFactory; #endif #ifdef MUELU_SAPFACTORY_SHORT -using SaPFactory [[maybe_unused]] = MueLu::SaPFactory; +using SaPFactory [[maybe_unused]] = MueLu::SaPFactory; #endif #ifdef MUELU_SAPFACTORY_KOKKOS_SHORT -using SaPFactory_kokkos [[maybe_unused]] = MueLu::SaPFactory_kokkos; +using SaPFactory_kokkos [[maybe_unused]] = MueLu::SaPFactory_kokkos; #endif #ifdef MUELU_SCALEDNULLSPACEFACTORY_SHORT -using ScaledNullspaceFactory [[maybe_unused]] = MueLu::ScaledNullspaceFactory; +using ScaledNullspaceFactory [[maybe_unused]] = MueLu::ScaledNullspaceFactory; #endif #ifdef MUELU_SCHURCOMPLEMENTFACTORY_SHORT -using SchurComplementFactory [[maybe_unused]] = MueLu::SchurComplementFactory; +using SchurComplementFactory [[maybe_unused]] = MueLu::SchurComplementFactory; #endif #ifdef MUELU_SEGREGATEDAFACTORY_SHORT -using SegregatedAFactory [[maybe_unused]] = MueLu::SegregatedAFactory; +using SegregatedAFactory [[maybe_unused]] = MueLu::SegregatedAFactory; #endif #ifdef MUELU_SHIFTEDLAPLACIAN_SHORT -using ShiftedLaplacian [[maybe_unused]] = MueLu::ShiftedLaplacian; +using ShiftedLaplacian [[maybe_unused]] = MueLu::ShiftedLaplacian; #endif #ifdef MUELU_SHIFTEDLAPLACIANOPERATOR_SHORT -using ShiftedLaplacianOperator [[maybe_unused]] = MueLu::ShiftedLaplacianOperator; +using ShiftedLaplacianOperator [[maybe_unused]] = MueLu::ShiftedLaplacianOperator; #endif #ifdef MUELU_SIMPLESMOOTHER_SHORT -using SimpleSmoother [[maybe_unused]] = MueLu::SimpleSmoother; +using SimpleSmoother [[maybe_unused]] = MueLu::SimpleSmoother; #endif #ifdef MUELU_SMOOTHER_SHORT -using Smoother [[maybe_unused]] = MueLu::Smoother; +using Smoother [[maybe_unused]] = MueLu::Smoother; #endif #ifdef MUELU_SMOOTHERBASE_SHORT -using SmootherBase [[maybe_unused]] = MueLu::SmootherBase; +using SmootherBase [[maybe_unused]] = MueLu::SmootherBase; #endif #ifdef MUELU_SMOOTHERFACTORY_SHORT -using SmootherFactory [[maybe_unused]] = MueLu::SmootherFactory; +using SmootherFactory [[maybe_unused]] = MueLu::SmootherFactory; #endif #ifdef MUELU_SMOOTHERPROTOTYPE_SHORT -using SmootherPrototype [[maybe_unused]] = MueLu::SmootherPrototype; +using SmootherPrototype [[maybe_unused]] = MueLu::SmootherPrototype; #endif #ifdef MUELU_SMOOVECCOALESCEDROPFACTORY_SHORT -using SmooVecCoalesceDropFactory [[maybe_unused]] = MueLu::SmooVecCoalesceDropFactory; +using SmooVecCoalesceDropFactory [[maybe_unused]] = MueLu::SmooVecCoalesceDropFactory; #endif #ifdef MUELU_SOLVERBASE_SHORT -using SolverBase [[maybe_unused]] = MueLu::SolverBase; +using SolverBase [[maybe_unused]] = MueLu::SolverBase; #endif #ifdef MUELU_STEEPESTDESCENTSOLVER_SHORT -using SteepestDescentSolver [[maybe_unused]] = MueLu::SteepestDescentSolver; +using SteepestDescentSolver [[maybe_unused]] = MueLu::SteepestDescentSolver; #endif #ifdef MUELU_STRATIMIKOSSMOOTHER_SHORT -using StratimikosSmoother [[maybe_unused]] = MueLu::StratimikosSmoother; +using StratimikosSmoother [[maybe_unused]] = MueLu::StratimikosSmoother; #endif #ifdef MUELU_STRUCTUREDAGGREGATIONFACTORY_SHORT -using StructuredAggregationFactory [[maybe_unused]] = MueLu::StructuredAggregationFactory; +using StructuredAggregationFactory [[maybe_unused]] = MueLu::StructuredAggregationFactory; #endif #ifdef MUELU_STRUCTUREDLINEDETECTIONFACTORY_SHORT -using StructuredLineDetectionFactory [[maybe_unused]] = MueLu::StructuredLineDetectionFactory; +using StructuredLineDetectionFactory [[maybe_unused]] = MueLu::StructuredLineDetectionFactory; #endif #ifdef MUELU_SUBBLOCKAFACTORY_SHORT -using SubBlockAFactory [[maybe_unused]] = MueLu::SubBlockAFactory; +using SubBlockAFactory [[maybe_unused]] = MueLu::SubBlockAFactory; #endif #ifdef MUELU_TEKOSMOOTHER_SHORT -using TekoSmoother [[maybe_unused]] = MueLu::TekoSmoother; +using TekoSmoother [[maybe_unused]] = MueLu::TekoSmoother; #endif #ifdef MUELU_TENTATIVEPFACTORY_SHORT -using TentativePFactory [[maybe_unused]] = MueLu::TentativePFactory; +using TentativePFactory [[maybe_unused]] = MueLu::TentativePFactory; #endif #ifdef MUELU_TENTATIVEPFACTORY_KOKKOS_SHORT -using TentativePFactory_kokkos [[maybe_unused]] = MueLu::TentativePFactory_kokkos; +using TentativePFactory_kokkos [[maybe_unused]] = MueLu::TentativePFactory_kokkos; #endif #ifdef MUELU_MATRIXFREETENTATIVEP_SHORT -using MatrixFreeTentativeP [[maybe_unused]] = MueLu::MatrixFreeTentativeP; +using MatrixFreeTentativeP [[maybe_unused]] = MueLu::MatrixFreeTentativeP; #endif #ifdef MUELU_MATRIXFREETENTATIVEPFACTORY_SHORT -using MatrixFreeTentativePFactory [[maybe_unused]] = MueLu::MatrixFreeTentativePFactory; +using MatrixFreeTentativePFactory [[maybe_unused]] = MueLu::MatrixFreeTentativePFactory; #endif #ifdef MUELU_THRESHOLDAFILTERFACTORY_SHORT -using ThresholdAFilterFactory [[maybe_unused]] = MueLu::ThresholdAFilterFactory; +using ThresholdAFilterFactory [[maybe_unused]] = MueLu::ThresholdAFilterFactory; #endif #ifdef MUELU_TOGGLECOORDINATESTRANSFERFACTORY_SHORT -using ToggleCoordinatesTransferFactory [[maybe_unused]] = MueLu::ToggleCoordinatesTransferFactory; +using ToggleCoordinatesTransferFactory [[maybe_unused]] = MueLu::ToggleCoordinatesTransferFactory; #endif #ifdef MUELU_TOGGLEPFACTORY_SHORT -using TogglePFactory [[maybe_unused]] = MueLu::TogglePFactory; +using TogglePFactory [[maybe_unused]] = MueLu::TogglePFactory; #endif #ifdef MUELU_TOPRAPFACTORY_SHORT -using TopRAPFactory [[maybe_unused]] = MueLu::TopRAPFactory; +using TopRAPFactory [[maybe_unused]] = MueLu::TopRAPFactory; #endif #ifdef MUELU_TOPSMOOTHERFACTORY_SHORT -using TopSmootherFactory [[maybe_unused]] = MueLu::TopSmootherFactory; +using TopSmootherFactory [[maybe_unused]] = MueLu::TopSmootherFactory; #endif #ifdef MUELU_TPETRAOPERATOR_SHORT -using TpetraOperator [[maybe_unused]] = MueLu::TpetraOperator; +using TpetraOperator [[maybe_unused]] = MueLu::TpetraOperator; #endif #ifdef MUELU_TRANSPFACTORY_SHORT -using TransPFactory [[maybe_unused]] = MueLu::TransPFactory; +using TransPFactory [[maybe_unused]] = MueLu::TransPFactory; #endif #ifdef MUELU_RFROMP_OR_TRANSP_SHORT -using RfromP_Or_TransP [[maybe_unused]] = MueLu::RfromP_Or_TransP; +using RfromP_Or_TransP [[maybe_unused]] = MueLu::RfromP_Or_TransP; #endif #ifdef MUELU_TRILINOSSMOOTHER_SHORT -using TrilinosSmoother [[maybe_unused]] = MueLu::TrilinosSmoother; +using TrilinosSmoother [[maybe_unused]] = MueLu::TrilinosSmoother; #endif #ifdef MUELU_UNSMOOSHFACTORY_SHORT -using UnsmooshFactory [[maybe_unused]] = MueLu::UnsmooshFactory; +using UnsmooshFactory [[maybe_unused]] = MueLu::UnsmooshFactory; #endif #ifdef MUELU_USERPFACTORY_SHORT -using UserPFactory [[maybe_unused]] = MueLu::UserPFactory; +using UserPFactory [[maybe_unused]] = MueLu::UserPFactory; #endif #ifdef MUELU_UTILITIES_SHORT -using Utilities [[maybe_unused]] = MueLu::Utilities; +using Utilities [[maybe_unused]] = MueLu::Utilities; #endif #ifdef MUELU_UTILITIESBASE_SHORT -using UtilitiesBase [[maybe_unused]] = MueLu::UtilitiesBase; +using UtilitiesBase [[maybe_unused]] = MueLu::UtilitiesBase; #endif #ifdef MUELU_VARIABLEDOFLAPLACIANFACTORY_SHORT -using VariableDofLaplacianFactory [[maybe_unused]] = MueLu::VariableDofLaplacianFactory; +using VariableDofLaplacianFactory [[maybe_unused]] = MueLu::VariableDofLaplacianFactory; #endif #ifdef MUELU_SEMICOARSENPFACTORY_SHORT -using SemiCoarsenPFactory [[maybe_unused]] = MueLu::SemiCoarsenPFactory; +using SemiCoarsenPFactory [[maybe_unused]] = MueLu::SemiCoarsenPFactory; #endif #ifdef MUELU_SEMICOARSENPFACTORY_KOKKOS_SHORT -using SemiCoarsenPFactory_kokkos [[maybe_unused]] = MueLu::SemiCoarsenPFactory_kokkos; +using SemiCoarsenPFactory_kokkos [[maybe_unused]] = MueLu::SemiCoarsenPFactory_kokkos; #endif #ifdef MUELU_UZAWASMOOTHER_SHORT -using UzawaSmoother [[maybe_unused]] = MueLu::UzawaSmoother; +using UzawaSmoother [[maybe_unused]] = MueLu::UzawaSmoother; #endif #ifdef MUELU_VISUALIZATIONHELPERS_SHORT -using VisualizationHelpers [[maybe_unused]] = MueLu::VisualizationHelpers; +using VisualizationHelpers [[maybe_unused]] = MueLu::VisualizationHelpers; #endif #ifdef MUELU_ZEROSUBBLOCKAFACTORY_SHORT -using ZeroSubBlockAFactory [[maybe_unused]] = MueLu::ZeroSubBlockAFactory; +using ZeroSubBlockAFactory [[maybe_unused]] = MueLu::ZeroSubBlockAFactory; #endif #ifdef MUELU_ZOLTANINTERFACE_SHORT -using ZoltanInterface [[maybe_unused]] = MueLu::ZoltanInterface; +using ZoltanInterface [[maybe_unused]] = MueLu::ZoltanInterface; #endif #ifdef MUELU_ZOLTAN2INTERFACE_SHORT -using Zoltan2Interface [[maybe_unused]] = MueLu::Zoltan2Interface; +using Zoltan2Interface [[maybe_unused]] = MueLu::Zoltan2Interface; #endif #ifdef MUELU_NODEPARTITIONINTERFACE_SHORT -using NodePartitionInterface [[maybe_unused]] = MueLu::NodePartitionInterface; +using NodePartitionInterface [[maybe_unused]] = MueLu::NodePartitionInterface; #endif #ifdef MUELU_XPETRAOPERATOR_SHORT -using XpetraOperator [[maybe_unused]] = MueLu::XpetraOperator; +using XpetraOperator [[maybe_unused]] = MueLu::XpetraOperator; #endif #ifdef MUELU_REFMAXWELL_SHORT -using RefMaxwell [[maybe_unused]] = MueLu::RefMaxwell; +using RefMaxwell [[maybe_unused]] = MueLu::RefMaxwell; #endif #ifdef MUELU_MAXWELL1_SHORT -using Maxwell1 [[maybe_unused]] = MueLu::Maxwell1; +using Maxwell1 [[maybe_unused]] = MueLu::Maxwell1; #endif #ifdef MUELU_MULTIPHYS_SHORT -using MultiPhys [[maybe_unused]] = MueLu::MultiPhys; +using MultiPhys [[maybe_unused]] = MueLu::MultiPhys; #endif #ifdef MUELU_MAXWELL_UTILS_SHORT -using Maxwell_Utils [[maybe_unused]] = MueLu::Maxwell_Utils; +using Maxwell_Utils [[maybe_unused]] = MueLu::Maxwell_Utils; #endif #ifdef MUELU_TWOLEVELMATLABFACTORY_SHORT -typedef MueLu::TwoLevelMatlabFactory TwoLevelMatlabFactory; +typedef MueLu::TwoLevelMatlabFactory TwoLevelMatlabFactory; #endif #ifdef MUELU_SINGLELEVELMATLABFACTORY_SHORT -typedef MueLu::SingleLevelMatlabFactory SingleLevelMatlabFactory; +typedef MueLu::SingleLevelMatlabFactory SingleLevelMatlabFactory; #endif #ifdef MUELU_MATLABSMOOTHER_SHORT -typedef MueLu::MatlabSmoother MatlabSmoother; +typedef MueLu::MatlabSmoother MatlabSmoother; #endif diff --git a/packages/muelu/src/Headers/MueLu_Version.hpp b/packages/muelu/src/Headers/MueLu_Version.hpp index 0fbda392c252..6eb87ab59a83 100644 --- a/packages/muelu/src/Headers/MueLu_Version.hpp +++ b/packages/muelu/src/Headers/MueLu_Version.hpp @@ -55,10 +55,10 @@ namespace MueLu { - inline std::string const Version() { - return("MueLu development"); - } +inline std::string const Version() { + return ("MueLu development"); +} -} // namespace MueLu +} // namespace MueLu -#endif //ifndef MUELU_VERSION_HPP +#endif // ifndef MUELU_VERSION_HPP diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeBGS2x2_decl.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeBGS2x2_decl.hpp index 8885d500ed3c..d201111eb8b2 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeBGS2x2_decl.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeBGS2x2_decl.hpp @@ -55,37 +55,34 @@ namespace MueLu { - template - class FacadeBGS2x2 : public FacadeClassBase { +template +class FacadeBGS2x2 : public FacadeClassBase { #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors - //@{ + public: + //! @name Constructors/Destructors + //@{ - //! Constructor. - FacadeBGS2x2(); + //! Constructor. + FacadeBGS2x2(); - //! Destructor. - virtual ~FacadeBGS2x2() { } + //! Destructor. + virtual ~FacadeBGS2x2() {} - //@} + //@} - /*! @brief Set parameter list for FacadeClass interpreter. + /*! @brief Set parameter list for FacadeClass interpreter. - @param[in] paramList: ParameterList containing the MueLu parameters for chosen facade class. - */ - Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList); - - private: - - }; - -} // namespace MueLu + @param[in] paramList: ParameterList containing the MueLu parameters for chosen facade class. + */ + Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList); + private: +}; +} // namespace MueLu #endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_BGS2x2_DECL_HPP_ */ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeBGS2x2_def.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeBGS2x2_def.hpp index f96866f4ee6e..6c38b3f54cbd 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeBGS2x2_def.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeBGS2x2_def.hpp @@ -50,349 +50,343 @@ #include #include - #include "MueLu_Exceptions.hpp" #include "MueLu_FacadeBGS2x2_decl.hpp" namespace MueLu { - template - FacadeBGS2x2::FacadeBGS2x2() { - } - - - template - Teuchos::RCP FacadeBGS2x2::SetParameterList(const ParameterList& paramList) { - - // obtain ParameterList with default input parameters for this facade class - // Note all parameters are of type string (we use it for string replacement) - std::string defaultString = -"" -"" -"" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" -; - Teuchos::RCP defaultList = Teuchos::getParametersFromXmlString(defaultString); - // validate user input parameters (and set defaults if necessary) - Teuchos::ParameterList inputParameters = paramList; - inputParameters.validateParametersAndSetDefaults(*defaultList); - TEUCHOS_TEST_FOR_EXCEPTION(inputParameters.get("MueLu preconditioner") == "undefined", MueLu::Exceptions::RuntimeError, "FacadeBGS2x2: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); - - // create copy of template string which is updated with in-place string replacements - // template string for preconditioner layout (factory based parameters) - std::string finalString = +template +FacadeBGS2x2::FacadeBGS2x2() { +} -"" -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -"" -" " -" " -"" -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -"" - ; +template +Teuchos::RCP FacadeBGS2x2::SetParameterList(const ParameterList& paramList) { + // obtain ParameterList with default input parameters for this facade class + // Note all parameters are of type string (we use it for string replacement) + std::string defaultString = + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + ""; + Teuchos::RCP defaultList = Teuchos::getParametersFromXmlString(defaultString); + // validate user input parameters (and set defaults if necessary) + Teuchos::ParameterList inputParameters = paramList; + inputParameters.validateParametersAndSetDefaults(*defaultList); + TEUCHOS_TEST_FOR_EXCEPTION(inputParameters.get("MueLu preconditioner") == "undefined", MueLu::Exceptions::RuntimeError, "FacadeBGS2x2: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); - // logical code for more complicated distinctions + // create copy of template string which is updated with in-place string replacements + // template string for preconditioner layout (factory based parameters) + std::string finalString = + "" + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + "" + " " + " " + "" + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + ""; - std::string smoother1 = inputParameters.get("Block 1: smoother"); - if(smoother1 == "ILU") { - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooILUFact1"); - } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "SGS") { - this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Symmetric Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); - } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "GS") { - this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); - } else if (smoother1 == "Jacobi") { - this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Jacobi"); - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); - } else if (smoother1 == "Direct") { - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooDirectFact1"); - } else { - this->GetOStream(Errors) << "Invalid smoother type for block 1: " << smoother1 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; - } + // logical code for more complicated distinctions - std::string smoother2 = inputParameters.get("Block 2: smoother"); - if(smoother2 == "ILU") { - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooILUFact2"); - } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "SGS") { - this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Symmetric Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); - } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "GS") { - this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); - } else if (smoother2 == "Jacobi") { - this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); - } else if (smoother2 == "Direct") { - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooDirectFact2"); - } else { - this->GetOStream(Errors) << "Invalid smoother type for block 2: " << smoother2 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; - } + std::string smoother1 = inputParameters.get("Block 1: smoother"); + if (smoother1 == "ILU") { + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooILUFact1"); + } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "SGS") { + this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Symmetric Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); + } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "GS") { + this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); + } else if (smoother1 == "Jacobi") { + this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Jacobi"); + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); + } else if (smoother1 == "Direct") { + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooDirectFact1"); + } else { + this->GetOStream(Errors) << "Invalid smoother type for block 1: " << smoother1 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; + } - if(inputParameters.get("Block 1: transfer smoothing") == true) { - this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myPFact1"); - this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myRFact1"); - } else { - this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myTentativePFact1"); - this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myTransPFact1"); - } - if(inputParameters.get("Block 2: transfer smoothing") == true) { - this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myPFact2"); - this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myRFact2"); - } else { - this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myTentativePFact2"); - this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myTransPFact2"); - } + std::string smoother2 = inputParameters.get("Block 2: smoother"); + if (smoother2 == "ILU") { + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooILUFact2"); + } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "SGS") { + this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Symmetric Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); + } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "GS") { + this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); + } else if (smoother2 == "Jacobi") { + this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); + } else if (smoother2 == "Direct") { + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooDirectFact2"); + } else { + this->GetOStream(Errors) << "Invalid smoother type for block 2: " << smoother2 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; + } - // end logical code + if (inputParameters.get("Block 1: transfer smoothing") == true) { + this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myPFact1"); + this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myRFact1"); + } else { + this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myTentativePFact1"); + this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myTransPFact1"); + } + if (inputParameters.get("Block 2: transfer smoothing") == true) { + this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myPFact2"); + this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myRFact2"); + } else { + this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myTentativePFact2"); + this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myTransPFact2"); + } - // loop over all input parameters - for(Teuchos::ParameterList::ConstIterator it = inputParameters.begin(); it != inputParameters.end(); it++) { - // form replacement string - std::string par_name = inputParameters.name(it); - std::stringstream ss; - ss << "XXX" << par_name << "YYY"; + // end logical code - // update final string with parameters - Teuchos::ParameterEntry par_entry = inputParameters.entry(it); - this->ReplaceString(finalString, - ss.str(), Teuchos::toString(par_entry.getAny())); - } + // loop over all input parameters + for (Teuchos::ParameterList::ConstIterator it = inputParameters.begin(); it != inputParameters.end(); it++) { + // form replacement string + std::string par_name = inputParameters.name(it); + std::stringstream ss; + ss << "XXX" << par_name << "YYY"; - Teuchos::RCP ret = Teuchos::getParametersFromXmlString(finalString); - return ret; + // update final string with parameters + Teuchos::ParameterEntry par_entry = inputParameters.entry(it); + this->ReplaceString(finalString, + ss.str(), Teuchos::toString(par_entry.getAny())); } -} // end namespace MueLu -#endif // PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_BGS2x2_DEF_HPP_ + Teuchos::RCP ret = Teuchos::getParametersFromXmlString(finalString); + return ret; +} + +} // end namespace MueLu +#endif // PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_BGS2x2_DEF_HPP_ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_decl.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_decl.hpp index f9d9d8b9ff5b..ff5f1d8a40e0 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_decl.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_decl.hpp @@ -51,52 +51,48 @@ namespace MueLu { - template - class FacadeClassBase - : public virtual BaseClass{ +template +class FacadeClassBase + : public virtual BaseClass { #undef MUELU_FACADECLASSBASE_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors - //@{ + public: + //! @name Constructors/Destructors + //@{ - //! Constructor. - FacadeClassBase(); + //! Constructor. + FacadeClassBase(); - //! Destructor. - virtual ~FacadeClassBase() { } + //! Destructor. + virtual ~FacadeClassBase() {} - //@} + //@} - /*! @brief Set parameter list for FacadeClass (abstract member). + /*! @brief Set parameter list for FacadeClass (abstract member). - @param[in] paramList: ParameterList containing the MueLu parameters. - */ - virtual Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList) = 0; + @param[in] paramList: ParameterList containing the MueLu parameters. + */ + virtual Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList) = 0; - protected: - - /*! @brief Replace all occurrences of search string "search" by the string in "replace" given the string "subject" - */ - std::string ReplaceString(std::string& subject, const std::string& search, const std::string& replace) { - size_t pos = 0; - while ((pos = subject.find(search, pos)) != std::string::npos) { - subject.replace(pos, search.length(), replace); - pos += replace.length(); - } - return subject; + protected: + /*! @brief Replace all occurrences of search string "search" by the string in "replace" given the string "subject" + */ + std::string ReplaceString(std::string& subject, const std::string& search, const std::string& replace) { + size_t pos = 0; + while ((pos = subject.find(search, pos)) != std::string::npos) { + subject.replace(pos, search.length(), replace); + pos += replace.length(); } + return subject; + } +}; - }; - -} // namespace MueLu +} // namespace MueLu #define MUELU_FACADECLASSBASE_SHORT - - #endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSBASE_DECL_HPP_ */ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_def.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_def.hpp index fd2375c1ccc2..6ef823b40fcb 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_def.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_def.hpp @@ -50,15 +50,14 @@ #include #include - #include "MueLu_Exceptions.hpp" #include "MueLu_FacadeClassBase_decl.hpp" namespace MueLu { - template - FacadeClassBase::FacadeClassBase() { - } +template +FacadeClassBase::FacadeClassBase() { } +} // namespace MueLu #endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSBASE_DEF_HPP_ */ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_decl.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_decl.hpp index 368c8ed6bd78..e3857cc4537a 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_decl.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_decl.hpp @@ -56,51 +56,47 @@ namespace MueLu { - template - class FacadeClassFactory - : public virtual BaseClass{ +template +class FacadeClassFactory + : public virtual BaseClass { #undef MUELU_FACADECLASSFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors - //@{ + public: + //! @name Constructors/Destructors + //@{ - //! Constructor. - FacadeClassFactory(); + //! Constructor. + FacadeClassFactory(); - //! Destructor. - virtual ~FacadeClassFactory() { } + //! Destructor. + virtual ~FacadeClassFactory() {} - //@} + //@} - /*! @brief Set parameter list for FacadeClassFactory interpreter. + /*! @brief Set parameter list for FacadeClassFactory interpreter. - @param[in] paramList: ParameterList containing the MueLu parameters. - */ - Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList); + @param[in] paramList: ParameterList containing the MueLu parameters. + */ + Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList); - /*! @brief Register new facade class - * - * Register new externally provided facade class in FacadeClassFactory - * - * @param[in] name: name that is used to access Facade class - * @param[in] facadeclass: RCP pointer to facade class instance - */ - void RegisterFacadeClass(std::string name, Teuchos::RCP facadeclass) { - facadeClasses_[name] = facadeclass; - } + /*! @brief Register new facade class + * + * Register new externally provided facade class in FacadeClassFactory + * + * @param[in] name: name that is used to access Facade class + * @param[in] facadeclass: RCP pointer to facade class instance + */ + void RegisterFacadeClass(std::string name, Teuchos::RCP facadeclass) { + facadeClasses_[name] = facadeclass; + } - private: + private: + std::map > facadeClasses_; +}; - std::map > facadeClasses_; - - }; - -} // namespace MueLu +} // namespace MueLu #define MUELU_FACADECLASSFACTORY_SHORT - - #endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_def.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_def.hpp index 137ed1d9cfd1..787fa95ec8a6 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_def.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_def.hpp @@ -50,7 +50,6 @@ #include #include - #include "MueLu_Exceptions.hpp" #include "MueLu_FacadeClassBase.hpp" @@ -61,34 +60,32 @@ namespace MueLu { - template - FacadeClassFactory::FacadeClassFactory() { - facadeClasses_["Simple"] = Teuchos::rcp(new FacadeSimple()); - facadeClasses_["BGS2x2"] = Teuchos::rcp(new FacadeBGS2x2()); - } - +template +FacadeClassFactory::FacadeClassFactory() { + facadeClasses_["Simple"] = Teuchos::rcp(new FacadeSimple()); + facadeClasses_["BGS2x2"] = Teuchos::rcp(new FacadeBGS2x2()); +} - template - Teuchos::RCP FacadeClassFactory::SetParameterList(const ParameterList& paramList) { +template +Teuchos::RCP FacadeClassFactory::SetParameterList(const ParameterList& paramList) { + TEUCHOS_TEST_FOR_EXCEPTION(paramList.isParameter("MueLu preconditioner") == false, MueLu::Exceptions::RuntimeError, "FacadeClassFactory: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); + TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("MueLu preconditioner") == "undefined", MueLu::Exceptions::RuntimeError, "FacadeClassFactory: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); - TEUCHOS_TEST_FOR_EXCEPTION(paramList.isParameter("MueLu preconditioner") == false, MueLu::Exceptions::RuntimeError, "FacadeClassFactory: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); - TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("MueLu preconditioner") == "undefined", MueLu::Exceptions::RuntimeError, "FacadeClassFactory: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); + std::string precMueLu = paramList.get("MueLu preconditioner"); - std::string precMueLu = paramList.get("MueLu preconditioner"); - - // could not find requested facade class - if(facadeClasses_.find(precMueLu) == facadeClasses_.end()) { - GetOStream(Errors) << "FacadeClassFactory: Could not find facade class \"" << precMueLu << "\"!" << std::endl; - GetOStream(Errors) << "The available facade classes are:" << std::endl; - for(typename std::map >::const_iterator it =facadeClasses_.begin(); it != facadeClasses_.end(); it++){ - GetOStream(Errors) << " " << it->first << std::endl; - } - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "FacadeClassFactory: Could not find facade class \"" << precMueLu << "\"."); + // could not find requested facade class + if (facadeClasses_.find(precMueLu) == facadeClasses_.end()) { + GetOStream(Errors) << "FacadeClassFactory: Could not find facade class \"" << precMueLu << "\"!" << std::endl; + GetOStream(Errors) << "The available facade classes are:" << std::endl; + for (typename std::map >::const_iterator it = facadeClasses_.begin(); it != facadeClasses_.end(); it++) { + GetOStream(Errors) << " " << it->first << std::endl; } - - return facadeClasses_[precMueLu]->SetParameterList(paramList); + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "FacadeClassFactory: Could not find facade class \"" << precMueLu << "\"."); } -} // end namespace MueLu + return facadeClasses_[precMueLu]->SetParameterList(paramList); +} + +} // end namespace MueLu #endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeSimple_decl.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeSimple_decl.hpp index c36c4a4ef283..34ea6f4de46e 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeSimple_decl.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeSimple_decl.hpp @@ -55,37 +55,34 @@ namespace MueLu { - template - class FacadeSimple : public FacadeClassBase { +template +class FacadeSimple : public FacadeClassBase { #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors - //@{ + public: + //! @name Constructors/Destructors + //@{ - //! Constructor. - FacadeSimple(); + //! Constructor. + FacadeSimple(); - //! Destructor. - virtual ~FacadeSimple() { } + //! Destructor. + virtual ~FacadeSimple() {} - //@} + //@} - /*! @brief Set parameter list for FacadeClass interpreter. + /*! @brief Set parameter list for FacadeClass interpreter. - @param[in] paramList: ParameterList containing the MueLu parameters for chosen facade class. - */ - Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList); - - private: - - }; - -} // namespace MueLu + @param[in] paramList: ParameterList containing the MueLu parameters for chosen facade class. + */ + Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList); + private: +}; +} // namespace MueLu #endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_Simple_DECL_HPP_ */ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeSimple_def.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeSimple_def.hpp index 6c49ed4bdd64..b9dba089af29 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeSimple_def.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeSimple_def.hpp @@ -50,355 +50,349 @@ #include #include - #include "MueLu_Exceptions.hpp" #include "MueLu_FacadeSimple_decl.hpp" namespace MueLu { - template - FacadeSimple::FacadeSimple() { - } - - - template - Teuchos::RCP FacadeSimple::SetParameterList(const ParameterList& paramList) { - - // obtain ParameterList with default input parameters for this facade class - // Note all parameters are of type string (we use it for string replacement) - std::string defaultString = -"" -"" -"" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" -; - Teuchos::RCP defaultList = Teuchos::getParametersFromXmlString(defaultString); - // validate user input parameters (and set defaults if necessary) - Teuchos::ParameterList inputParameters = paramList; - inputParameters.validateParametersAndSetDefaults(*defaultList); - TEUCHOS_TEST_FOR_EXCEPTION(inputParameters.get("MueLu preconditioner") == "undefined", MueLu::Exceptions::RuntimeError, "FacadeSimple: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); - - // create copy of template string which is updated with in-place string replacements - // template string for preconditioner layout (factory based parameters) - std::string finalString = +template +FacadeSimple::FacadeSimple() { +} -"" -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -"" -" " -" " -"" -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -"" - ; +template +Teuchos::RCP FacadeSimple::SetParameterList(const ParameterList& paramList) { + // obtain ParameterList with default input parameters for this facade class + // Note all parameters are of type string (we use it for string replacement) + std::string defaultString = + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + ""; + Teuchos::RCP defaultList = Teuchos::getParametersFromXmlString(defaultString); + // validate user input parameters (and set defaults if necessary) + Teuchos::ParameterList inputParameters = paramList; + inputParameters.validateParametersAndSetDefaults(*defaultList); + TEUCHOS_TEST_FOR_EXCEPTION(inputParameters.get("MueLu preconditioner") == "undefined", MueLu::Exceptions::RuntimeError, "FacadeSimple: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); - // logical code for more complicated distinctions + // create copy of template string which is updated with in-place string replacements + // template string for preconditioner layout (factory based parameters) + std::string finalString = + "" + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + "" + " " + " " + "" + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + ""; - std::string smoother1 = inputParameters.get("Block 1: smoother"); - if(smoother1 == "ILU") { - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooILUFact1"); - } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "SGS") { - this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Symmetric Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); - } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "GS") { - this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); - } else if (smoother1 == "Jacobi") { - this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Jacobi"); - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); - } else if (smoother1 == "Direct") { - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooDirectFact1"); - } else { - this->GetOStream(Errors) << "Invalid smoother type for block 1: " << smoother1 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; - } + // logical code for more complicated distinctions - std::string smoother2 = inputParameters.get("Block 2: smoother"); - if(smoother2 == "ILU") { - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooILUFact2"); - } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "SGS") { - this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Symmetric Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); - } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "GS") { - this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); - } else if (smoother2 == "Jacobi") { - this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Jacobi"); - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); - } else if (smoother2 == "Direct") { - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooDirectFact2"); - } else { - this->GetOStream(Errors) << "Invalid smoother type for block 2: " << smoother2 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; - } + std::string smoother1 = inputParameters.get("Block 1: smoother"); + if (smoother1 == "ILU") { + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooILUFact1"); + } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "SGS") { + this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Symmetric Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); + } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "GS") { + this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); + } else if (smoother1 == "Jacobi") { + this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Jacobi"); + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); + } else if (smoother1 == "Direct") { + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooDirectFact1"); + } else { + this->GetOStream(Errors) << "Invalid smoother type for block 1: " << smoother1 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; + } - if(inputParameters.get("Block 1: transfer smoothing") == true) { - this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myPFact1"); - this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myRFact1"); - } else { - this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myTentativePFact1"); - this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myTransPFact1"); - } - if(inputParameters.get("Block 2: transfer smoothing") == true) { - this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myPFact2"); - this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myRFact2"); - } else { - this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myTentativePFact2"); - this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myTransPFact2"); - } + std::string smoother2 = inputParameters.get("Block 2: smoother"); + if (smoother2 == "ILU") { + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooILUFact2"); + } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "SGS") { + this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Symmetric Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); + } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "GS") { + this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); + } else if (smoother2 == "Jacobi") { + this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Jacobi"); + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); + } else if (smoother2 == "Direct") { + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooDirectFact2"); + } else { + this->GetOStream(Errors) << "Invalid smoother type for block 2: " << smoother2 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; + } - // end logical code + if (inputParameters.get("Block 1: transfer smoothing") == true) { + this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myPFact1"); + this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myRFact1"); + } else { + this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myTentativePFact1"); + this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myTransPFact1"); + } + if (inputParameters.get("Block 2: transfer smoothing") == true) { + this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myPFact2"); + this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myRFact2"); + } else { + this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myTentativePFact2"); + this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myTransPFact2"); + } - // loop over all input parameters - for(Teuchos::ParameterList::ConstIterator it = inputParameters.begin(); it != inputParameters.end(); it++) { - // form replacement string - std::string par_name = inputParameters.name(it); - std::stringstream ss; - ss << "XXX" << par_name << "YYY"; + // end logical code - // update final string with parameters - Teuchos::ParameterEntry par_entry = inputParameters.entry(it); - this->ReplaceString(finalString, - ss.str(), Teuchos::toString(par_entry.getAny())); - } + // loop over all input parameters + for (Teuchos::ParameterList::ConstIterator it = inputParameters.begin(); it != inputParameters.end(); it++) { + // form replacement string + std::string par_name = inputParameters.name(it); + std::stringstream ss; + ss << "XXX" << par_name << "YYY"; - Teuchos::RCP ret = Teuchos::getParametersFromXmlString(finalString); - return ret; + // update final string with parameters + Teuchos::ParameterEntry par_entry = inputParameters.entry(it); + this->ReplaceString(finalString, + ss.str(), Teuchos::toString(par_entry.getAny())); } -} // end namespace MueLu -#endif // PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_Simple_DEF_HPP_ + Teuchos::RCP ret = Teuchos::getParametersFromXmlString(finalString); + return ret; +} + +} // end namespace MueLu +#endif // PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_Simple_DEF_HPP_ diff --git a/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_decl.hpp b/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_decl.hpp index 308bb878ca53..76edf912232d 100644 --- a/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_decl.hpp +++ b/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_decl.hpp @@ -37,171 +37,169 @@ namespace MueLu { - /* - Utility that from an existing Teuchos::ParameterList creates a new list, in - which level-specific parameters are replaced with sublists. - - Currently, level-specific parameters that begin with "smoother:" - or "aggregation:" are placed in sublists. Coarse options are also placed - in a coarse list. - - Example: - Input: - smoother: type (level 0) = symmetric Gauss-Seidel - smoother: sweeps (level 0) = 1 - Output: - smoother: list (level 0) -> - smoother: type = symmetric Gauss-Seidel - smoother: sweeps = 1 - */ - // This function is a copy of ML_CreateSublists to avoid dependency on ML - // Throw exception on error instead of exit() - //void CreateSublists(const ParameterList &List, ParameterList &newList); - - - /*! - @class AdaptiveSAMLParameterListInterpreter class. - @brief Class that accepts ML-style parameters and builds a MueLu preconditioner. - This interpreter uses the same default values as ML. This allows to compare ML/MueLu results - */ - - template - class AdaptiveSaMLParameterListInterpreter : - public HierarchyManager { +/* + Utility that from an existing Teuchos::ParameterList creates a new list, in + which level-specific parameters are replaced with sublists. + + Currently, level-specific parameters that begin with "smoother:" + or "aggregation:" are placed in sublists. Coarse options are also placed + in a coarse list. + + Example: + Input: + smoother: type (level 0) = symmetric Gauss-Seidel + smoother: sweeps (level 0) = 1 + Output: + smoother: list (level 0) -> + smoother: type = symmetric Gauss-Seidel + smoother: sweeps = 1 +*/ +// This function is a copy of ML_CreateSublists to avoid dependency on ML +// Throw exception on error instead of exit() +// void CreateSublists(const ParameterList &List, ParameterList &newList); + +/*! + @class AdaptiveSAMLParameterListInterpreter class. + @brief Class that accepts ML-style parameters and builds a MueLu preconditioner. + This interpreter uses the same default values as ML. This allows to compare ML/MueLu results +*/ + +template +class AdaptiveSaMLParameterListInterpreter : public HierarchyManager { #undef MUELU_ADAPTIVESAMLPARAMETERLISTINTERPRETER_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - AdaptiveSaMLParameterListInterpreter() : nullspace_(NULL), blksize_(1) { } - - //! Constructor. - //! @param paramList: parameter list with ML parameters - //! @param nspVector: MultiVector with fine-level nullspace approximation - //! @param factoryList: vector with RCP of FactoryBase objects - //! - //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. - //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML - //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - AdaptiveSaMLParameterListInterpreter(Teuchos::ParameterList & paramList,std::vector > factoryList = std::vector >(0)); + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - //! @param xmlFileName: file name for XML file with ML parameters - //! @param factoryList: vector with RCP of FactoryBase objects - //! - //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. - //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML - //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - AdaptiveSaMLParameterListInterpreter(const std::string & xmlFileName,std::vector > factoryList = std::vector >(0)); + //! Constructor. + AdaptiveSaMLParameterListInterpreter() + : nullspace_(NULL) + , blksize_(1) {} - //! Destructor. - virtual ~AdaptiveSaMLParameterListInterpreter() { } + //! Constructor. + //! @param paramList: parameter list with ML parameters + //! @param nspVector: MultiVector with fine-level nullspace approximation + //! @param factoryList: vector with RCP of FactoryBase objects + //! + //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. + //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML + //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! + AdaptiveSaMLParameterListInterpreter(Teuchos::ParameterList& paramList, std::vector > factoryList = std::vector >(0)); - //@} + //! Constructor. + //! @param xmlFileName: file name for XML file with ML parameters + //! @param factoryList: vector with RCP of FactoryBase objects + //! + //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. + //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML + //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! + AdaptiveSaMLParameterListInterpreter(const std::string& xmlFileName, std::vector > factoryList = std::vector >(0)); - //@{ + //! Destructor. + virtual ~AdaptiveSaMLParameterListInterpreter() {} - void SetParameterList(const Teuchos::ParameterList & paramList); + //@} - //@} + //@{ - //@{ + void SetParameterList(const Teuchos::ParameterList& paramList); - //! Setup Hierarchy object - virtual void SetupHierarchy(Hierarchy & H) const; + //@} - //@} + //@{ - //@{ + //! Setup Hierarchy object + virtual void SetupHierarchy(Hierarchy& H) const; - //! @name Handling of additional user-specific transfer factories - //@{ - /*! @brief Add transfer factory in the end of list of transfer factories for RAPFactory. + //@} - This allows the user to add user-specific factories to the MueLu Hierarchy. The idea is to be able - to add some factories that write out some debug information etc. which are not handled by the ML - Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - */ - void AddTransferFactory(const RCP & factory); + //@{ - //! Returns number of transfer factories. - size_t NumTransferFactories() const; - //@} + //! @name Handling of additional user-specific transfer factories + //@{ + /*! @brief Add transfer factory in the end of list of transfer factories for RAPFactory. - private: - - //! build multigrid hierarchy for improving nullspace - //! use ML settings that are also used for the final full multigrid - //! hierarchy. In contrary to the final multigrid hierarchy use - //! only nonsmoothed transfer operators (safe time of prolongator smoothing) - //! and cheap level smoothers (no direct solver on coarsest level). - void SetupInitHierarchy(Hierarchy & H) const; - - //! internal routine to add a new factory manager used for the initialization phase - void AddInitFactoryManager(int startLevel, int numDesiredLevel, RCP manager) { - const int lastLevel = startLevel + numDesiredLevel - 1; - if (init_levelManagers_.size() < lastLevel + 1) init_levelManagers_.resize(lastLevel + 1); - - for(int iLevel = startLevel; iLevel <= lastLevel; iLevel++) { - init_levelManagers_[iLevel] = manager; - } + This allows the user to add user-specific factories to the MueLu Hierarchy. The idea is to be able + to add some factories that write out some debug information etc. which are not handled by the ML + Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! + */ + void AddTransferFactory(const RCP& factory); + + //! Returns number of transfer factories. + size_t NumTransferFactories() const; + //@} + + private: + //! build multigrid hierarchy for improving nullspace + //! use ML settings that are also used for the final full multigrid + //! hierarchy. In contrary to the final multigrid hierarchy use + //! only nonsmoothed transfer operators (safe time of prolongator smoothing) + //! and cheap level smoothers (no direct solver on coarsest level). + void SetupInitHierarchy(Hierarchy& H) const; + + //! internal routine to add a new factory manager used for the initialization phase + void AddInitFactoryManager(int startLevel, int numDesiredLevel, RCP manager) { + const int lastLevel = startLevel + numDesiredLevel - 1; + if (init_levelManagers_.size() < lastLevel + 1) init_levelManagers_.resize(lastLevel + 1); + + for (int iLevel = startLevel; iLevel <= lastLevel; iLevel++) { + init_levelManagers_[iLevel] = manager; } + } - //! Used in SetupInitHierarchy() to access levelManagers_ - //! Inputs i=-1 and i=size() are allowed to simplify calls to hierarchy->Setup() - Teuchos::RCP InitLvlMngr(int levelID, int lastLevelID) const { + //! Used in SetupInitHierarchy() to access levelManagers_ + //! Inputs i=-1 and i=size() are allowed to simplify calls to hierarchy->Setup() + Teuchos::RCP InitLvlMngr(int levelID, int lastLevelID) const { + // Please not that the order of the 'if' statements is important. - // Please not that the order of the 'if' statements is important. + if (levelID == -1) return Teuchos::null; // when this routine is called with levelID == '-1', it means that we are processing the finest Level (there is no finer level) + if (levelID == lastLevelID + 1) return Teuchos::null; // when this routine is called with levelID == 'lastLevelID+1', it means that we are processing the last level (ie: there is no nextLevel...) - if (levelID == -1) return Teuchos::null; // when this routine is called with levelID == '-1', it means that we are processing the finest Level (there is no finer level) - if (levelID == lastLevelID+1) return Teuchos::null; // when this routine is called with levelID == 'lastLevelID+1', it means that we are processing the last level (ie: there is no nextLevel...) - - if (0 == init_levelManagers_.size()) { // default factory manager. - // the default manager is shared across levels, initialized only if needed and deleted with the HierarchyManager. - static RCP defaultMngr = rcp(new FactoryManager()); - return defaultMngr; - } - if (levelID >= init_levelManagers_.size()) return init_levelManagers_[init_levelManagers_.size()-1]; // last levelManager is used for all the remaining levels. - - return init_levelManagers_[levelID]; // throw exception if out of bound. + if (0 == init_levelManagers_.size()) { // default factory manager. + // the default manager is shared across levels, initialized only if needed and deleted with the HierarchyManager. + static RCP defaultMngr = rcp(new FactoryManager()); + return defaultMngr; } + if (levelID >= init_levelManagers_.size()) return init_levelManagers_[init_levelManagers_.size() - 1]; // last levelManager is used for all the remaining levels. + + return init_levelManagers_[levelID]; // throw exception if out of bound. + } - //! nullspace can be embedded in the ML parameter list - int nullspaceDim_; - double* nullspace_; + //! nullspace can be embedded in the ML parameter list + int nullspaceDim_; + double* nullspace_; - //! export aggregates - bool bExportAggregates_; //!< if set to true an AggregationExportFactory is used to export aggregation information (default = false) + //! export aggregates + bool bExportAggregates_; //!< if set to true an AggregationExportFactory is used to export aggregation information (default = false) - //! list of user-defined transfer Factories - //! We use this vector to add some special user-given factories to the Hierarchy (RAPFactory) - //! This way the user can extend the standard functionality of the MLParameterListInterpreter beyond the - //! capabibilities of ML. - std::vector > TransferFacts_; + //! list of user-defined transfer Factories + //! We use this vector to add some special user-given factories to the Hierarchy (RAPFactory) + //! This way the user can extend the standard functionality of the MLParameterListInterpreter beyond the + //! capabibilities of ML. + std::vector > TransferFacts_; - //! list of levelManagers for adaptive smoothed aggregation - //! initialization phase - Array > init_levelManagers_; + //! list of levelManagers for adaptive smoothed aggregation + //! initialization phase + Array > init_levelManagers_; - //@{ Operator configuration + //@{ Operator configuration - //! Setup Operator object - //! overloaded from HierarchyManager to set nDofsPerNode - virtual void SetupOperator(Operator & Op) const; + //! Setup Operator object + //! overloaded from HierarchyManager to set nDofsPerNode + virtual void SetupOperator(Operator& Op) const; - //! Matrix configuration storage - int blksize_; - //@} + //! Matrix configuration storage + int blksize_; + //@} - }; // class AdaptiveSaMLParameterListInterpreter +}; // class AdaptiveSaMLParameterListInterpreter -} // namespace MueLu +} // namespace MueLu #define MUELU_ADAPTIVESAMLPARAMETERLISTINTERPRETER_SHORT #endif /* MUELU_ADAPTIVESAMLPARAMTERLISTINTERPRETER_DECL_HPP_ */ diff --git a/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_def.hpp b/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_def.hpp index 93d73cb53c5c..f18873a234d1 100644 --- a/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_def.hpp +++ b/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_def.hpp @@ -54,404 +54,404 @@ // Read a parameter value from a parameter list and store it into a variable named 'varName' #define MUELU_READ_PARAM(paramList, paramStr, varType, defaultValue, varName) \ - varType varName = defaultValue; if (paramList.isParameter(paramStr)) varName = paramList.get(paramStr); + varType varName = defaultValue; \ + if (paramList.isParameter(paramStr)) varName = paramList.get(paramStr); // Read a parameter value from a paraeter list and copy it into a new parameter list (with another parameter name) #define MUELU_COPY_PARAM(paramList, paramStr, varType, defaultValue, outParamList, outParamStr) \ - if (paramList.isParameter(paramStr)) \ - outParamList.set(outParamStr, paramList.get(paramStr)); \ - else outParamList.set(outParamStr, defaultValue); \ + if (paramList.isParameter(paramStr)) \ + outParamList.set(outParamStr, paramList.get(paramStr)); \ + else \ + outParamList.set(outParamStr, defaultValue); namespace MueLu { - namespace AdaptiveDetails { - template - Teuchos::RCP > getSmoother(Teuchos::ParameterList & list) { - using SF = MueLu::SmootherFactory; - ParameterListInterpreter interpreter(list); - return Teuchos::rcp_const_cast(Teuchos::rcp_dynamic_cast(interpreter.GetFactoryManager(0)->GetFactory("Smoother"))); - } +namespace AdaptiveDetails { +template +Teuchos::RCP > getSmoother(Teuchos::ParameterList& list) { + using SF = MueLu::SmootherFactory; + ParameterListInterpreter interpreter(list); + return Teuchos::rcp_const_cast(Teuchos::rcp_dynamic_cast(interpreter.GetFactoryManager(0)->GetFactory("Smoother"))); +} +} // namespace AdaptiveDetails + +template +AdaptiveSaMLParameterListInterpreter::AdaptiveSaMLParameterListInterpreter(Teuchos::ParameterList& paramList, std::vector > factoryList) + : TransferFacts_(factoryList) + , blksize_(1) { + SetParameterList(paramList); +} + +template +AdaptiveSaMLParameterListInterpreter::AdaptiveSaMLParameterListInterpreter(const std::string& xmlFileName, std::vector > factoryList) + : nullspace_(NULL) + , TransferFacts_(factoryList) + , blksize_(1) { + Teuchos::RCP paramList = Teuchos::getParametersFromXmlFile(xmlFileName); + SetParameterList(*paramList); +} + +template +void AdaptiveSaMLParameterListInterpreter::SetParameterList(const Teuchos::ParameterList& paramList_in) { + Teuchos::ParameterList paramList = paramList_in; + + RCP out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); // TODO: use internal out (GetOStream()) + + // + // Read top-level of the parameter list + // + + // hard-coded default values == ML defaults according to the manual + MUELU_READ_PARAM(paramList, "ML output", int, 0, verbosityLevel); + MUELU_READ_PARAM(paramList, "max levels", int, 10, maxLevels); + MUELU_READ_PARAM(paramList, "PDE equations", int, 1, nDofsPerNode); + + MUELU_READ_PARAM(paramList, "coarse: max size", int, 128, maxCoarseSize); + + MUELU_READ_PARAM(paramList, "aggregation: type", std::string, "Uncoupled", agg_type); + // MUELU_READ_PARAM(paramList, "aggregation: threshold", double, 0.0, agg_threshold); + MUELU_READ_PARAM(paramList, "aggregation: damping factor", double, (double)4 / (double)3, agg_damping); + // MUELU_READ_PARAM(paramList, "aggregation: smoothing sweeps", int, 1, agg_smoothingsweeps); + MUELU_READ_PARAM(paramList, "aggregation: nodes per aggregate", int, 1, minPerAgg); + + MUELU_READ_PARAM(paramList, "null space: type", std::string, "default vectors", nullspaceType); + MUELU_READ_PARAM(paramList, "null space: dimension", int, -1, nullspaceDim); // TODO: ML default not in documentation + MUELU_READ_PARAM(paramList, "null space: vectors", double*, NULL, nullspaceVec); // TODO: ML default not in documentation + + MUELU_READ_PARAM(paramList, "energy minimization: enable", bool, false, bEnergyMinimization); + + // + // Move smoothers/aggregation/coarse parameters to sublists + // + + // ML allows to have level-specific smoothers/aggregation/coarse parameters at the top level of the list or/and defined in sublists: + // See also: ML Guide section 6.4.1, MueLu::CreateSublists, ML_CreateSublists + ParameterList paramListWithSubList; + MueLu::CreateSublists(paramList, paramListWithSubList); + paramList = paramListWithSubList; // swap + + // std::cout << std::endl << "Parameter list after CreateSublists" << std::endl; + // std::cout << paramListWithSubList << std::endl; + + int maxNbrAlreadySelected = 0; + + // Matrix option + this->blksize_ = nDofsPerNode; + + // Translate verbosity parameter + Teuchos::EVerbosityLevel eVerbLevel = Teuchos::VERB_NONE; + if (verbosityLevel == 0) eVerbLevel = Teuchos::VERB_NONE; + if (verbosityLevel > 0) eVerbLevel = Teuchos::VERB_LOW; + if (verbosityLevel > 4) eVerbLevel = Teuchos::VERB_MEDIUM; + if (verbosityLevel > 7) eVerbLevel = Teuchos::VERB_HIGH; + if (verbosityLevel > 9) eVerbLevel = Teuchos::VERB_EXTREME; + + TEUCHOS_TEST_FOR_EXCEPTION(agg_type != "Uncoupled", Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter::Setup(): parameter \"aggregation: type\": only 'Uncoupled' aggregation is supported."); + + // Create MueLu factories + // RCP nspFact = rcp(new NullspaceFactory()); + RCP dropFact = rcp(new CoalesceDropFactory()); + // dropFact->SetVerbLevel(toMueLuVerbLevel(eVerbLevel)); + + // Uncoupled aggregation + RCP AggFact = rcp(new UncoupledAggregationFactory()); + AggFact->SetMinNodesPerAggregate(minPerAgg); // TODO should increase if run anything other than 1D + AggFact->SetMaxNeighAlreadySelected(maxNbrAlreadySelected); + AggFact->SetOrdering("natural"); + + if (verbosityLevel > 3) { // TODO fix me: Setup is a static function: we cannot use GetOStream without an object... + *out << "========================= Aggregate option summary =========================" << std::endl; + *out << "min Nodes per aggregate : " << minPerAgg << std::endl; + *out << "min # of root nbrs already aggregated : " << maxNbrAlreadySelected << std::endl; + *out << "aggregate ordering : natural" << std::endl; + *out << "=============================================================================" << std::endl; } - template - AdaptiveSaMLParameterListInterpreter::AdaptiveSaMLParameterListInterpreter(Teuchos::ParameterList & paramList, std::vector > factoryList) : TransferFacts_(factoryList), blksize_(1) { - SetParameterList(paramList); + RCP PFact; + RCP RFact; + RCP PtentFact = rcp(new TentativePFactory()); + if (agg_damping == 0.0 && bEnergyMinimization == false) { + // tentative prolongation operator (PA-AMG) + PFact = PtentFact; + RFact = rcp(new TransPFactory()); + } else if (agg_damping != 0.0 && bEnergyMinimization == false) { + // smoothed aggregation (SA-AMG) + RCP SaPFact = rcp(new SaPFactory()); + SaPFact->SetParameter("sa: damping factor", ParameterEntry(agg_damping)); + PFact = SaPFact; + RFact = rcp(new TransPFactory()); + } else if (bEnergyMinimization == true) { + // Petrov Galerkin PG-AMG smoothed aggregation (energy minimization in ML) + PFact = rcp(new PgPFactory()); + RFact = rcp(new GenericRFactory()); } - template - AdaptiveSaMLParameterListInterpreter::AdaptiveSaMLParameterListInterpreter(const std::string & xmlFileName, std::vector > factoryList) : nullspace_(NULL), TransferFacts_(factoryList), blksize_(1) { - Teuchos::RCP paramList = Teuchos::getParametersFromXmlFile(xmlFileName); - SetParameterList(*paramList); + RCP AcFact = rcp(new RAPFactory()); + for (size_t i = 0; i < TransferFacts_.size(); i++) { + AcFact->AddTransferFactory(TransferFacts_[i]); // THIS WILL BE REPLACED with a call to the MLParamterListInterpreter } - template - void AdaptiveSaMLParameterListInterpreter::SetParameterList(const Teuchos::ParameterList & paramList_in) { - Teuchos::ParameterList paramList = paramList_in; + // + // Nullspace factory + // - RCP out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); // TODO: use internal out (GetOStream()) + // Set fine level nullspace + // extract pre-computed nullspace from ML parameter list + // store it in nullspace_ and nullspaceDim_ + if (nullspaceType != "default vectors") { + TEUCHOS_TEST_FOR_EXCEPTION(nullspaceType != "pre-computed", Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (no pre-computed null space). error."); + TEUCHOS_TEST_FOR_EXCEPTION(nullspaceDim == -1, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace dim == -1). error."); + TEUCHOS_TEST_FOR_EXCEPTION(nullspaceVec == NULL, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace == NULL). You have to provide a valid fine-level nullspace in \'null space: vectors\'"); - // - // Read top-level of the parameter list - // + nullspaceDim_ = nullspaceDim; + nullspace_ = nullspaceVec; + } - // hard-coded default values == ML defaults according to the manual - MUELU_READ_PARAM(paramList, "ML output", int, 0, verbosityLevel); - MUELU_READ_PARAM(paramList, "max levels", int, 10, maxLevels); - MUELU_READ_PARAM(paramList, "PDE equations", int, 1, nDofsPerNode); + Teuchos::RCP nspFact = Teuchos::rcp(new NullspaceFactory()); + nspFact->SetFactory("Nullspace", PtentFact); + + // + // Hierarchy + FactoryManager + // + + // Hierarchy options + this->SetVerbLevel(toMueLuVerbLevel(eVerbLevel)); + this->numDesiredLevel_ = maxLevels; + this->maxCoarseSize_ = maxCoarseSize; + + // init smoother + RCP initSmootherFact = Teuchos::null; + if (paramList.isSublist("init smoother")) { + ParameterList& initList = paramList.sublist("init smoother"); // TODO move this before for loop + initSmootherFact = AdaptiveDetails::getSmoother(initList); + } else { + std::string ifpackType = "RELAXATION"; + Teuchos::ParameterList smootherParamList; + smootherParamList.set("relaxation: type", "symmetric Gauss-Seidel"); + smootherParamList.set("smoother: sweeps", 1); + smootherParamList.set("smoother: damping factor", 1.0); + RCP smooProto = rcp(new TrilinosSmoother(ifpackType, smootherParamList, 0)); + + initSmootherFact = rcp(new SmootherFactory()); + initSmootherFact->SetSmootherPrototypes(smooProto, smooProto); + } - MUELU_READ_PARAM(paramList, "coarse: max size", int, 128, maxCoarseSize); + // + // Coarse Smoother + // + ParameterList& coarseList = paramList.sublist("coarse: list"); + // coarseList.get("smoother: type", "Amesos-KLU"); // set default + RCP coarseFact = AdaptiveDetails::getSmoother(coarseList); - MUELU_READ_PARAM(paramList, "aggregation: type", std::string, "Uncoupled", agg_type); - //MUELU_READ_PARAM(paramList, "aggregation: threshold", double, 0.0, agg_threshold); - MUELU_READ_PARAM(paramList, "aggregation: damping factor", double, (double)4/(double)3, agg_damping); - //MUELU_READ_PARAM(paramList, "aggregation: smoothing sweeps", int, 1, agg_smoothingsweeps); - MUELU_READ_PARAM(paramList, "aggregation: nodes per aggregate", int, 1, minPerAgg); + // Smoothers Top Level Parameters - MUELU_READ_PARAM(paramList, "null space: type", std::string, "default vectors", nullspaceType); - MUELU_READ_PARAM(paramList, "null space: dimension", int, -1, nullspaceDim); // TODO: ML default not in documentation - MUELU_READ_PARAM(paramList, "null space: vectors", double*, NULL, nullspaceVec); // TODO: ML default not in documentation + RCP topLevelSmootherParam = ExtractSetOfParameters(paramList, "smoother"); + // std::cout << std::endl << "Top level smoother parameters:" << std::endl; + // std::cout << *topLevelSmootherParam << std::endl; - MUELU_READ_PARAM(paramList, "energy minimization: enable", bool, false, bEnergyMinimization); + // + // Prepare factory managers + // TODO: smootherFact can be reuse accross level if same parameters/no specific parameterList + for (int levelID = 0; levelID < maxLevels; levelID++) { // - // Move smoothers/aggregation/coarse parameters to sublists + // Level FactoryManager // - // ML allows to have level-specific smoothers/aggregation/coarse parameters at the top level of the list or/and defined in sublists: - // See also: ML Guide section 6.4.1, MueLu::CreateSublists, ML_CreateSublists - ParameterList paramListWithSubList; - MueLu::CreateSublists(paramList, paramListWithSubList); - paramList = paramListWithSubList; // swap - - // std::cout << std::endl << "Parameter list after CreateSublists" << std::endl; - // std::cout << paramListWithSubList << std::endl; - - int maxNbrAlreadySelected = 0; - - // Matrix option - this->blksize_ = nDofsPerNode; - - // Translate verbosity parameter - Teuchos::EVerbosityLevel eVerbLevel = Teuchos::VERB_NONE; - if (verbosityLevel == 0) eVerbLevel = Teuchos::VERB_NONE; - if (verbosityLevel > 0) eVerbLevel = Teuchos::VERB_LOW; - if (verbosityLevel > 4) eVerbLevel = Teuchos::VERB_MEDIUM; - if (verbosityLevel > 7) eVerbLevel = Teuchos::VERB_HIGH; - if (verbosityLevel > 9) eVerbLevel = Teuchos::VERB_EXTREME; - - TEUCHOS_TEST_FOR_EXCEPTION(agg_type != "Uncoupled", Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter::Setup(): parameter \"aggregation: type\": only 'Uncoupled' aggregation is supported."); - - // Create MueLu factories - // RCP nspFact = rcp(new NullspaceFactory()); - RCP dropFact = rcp(new CoalesceDropFactory()); - //dropFact->SetVerbLevel(toMueLuVerbLevel(eVerbLevel)); - - // Uncoupled aggregation - RCP AggFact = rcp(new UncoupledAggregationFactory()); - AggFact->SetMinNodesPerAggregate(minPerAgg); //TODO should increase if run anything other than 1D - AggFact->SetMaxNeighAlreadySelected(maxNbrAlreadySelected); - AggFact->SetOrdering("natural"); - - if (verbosityLevel > 3) { // TODO fix me: Setup is a static function: we cannot use GetOStream without an object... - *out << "========================= Aggregate option summary =========================" << std::endl; - *out << "min Nodes per aggregate : " << minPerAgg << std::endl; - *out << "min # of root nbrs already aggregated : " << maxNbrAlreadySelected << std::endl; - *out << "aggregate ordering : natural" << std::endl; - *out << "=============================================================================" << std::endl; - } - - RCP PFact; - RCP RFact; - RCP PtentFact = rcp( new TentativePFactory() ); - if (agg_damping == 0.0 && bEnergyMinimization == false) { - // tentative prolongation operator (PA-AMG) - PFact = PtentFact; - RFact = rcp( new TransPFactory() ); - } else if (agg_damping != 0.0 && bEnergyMinimization == false) { - // smoothed aggregation (SA-AMG) - RCP SaPFact = rcp( new SaPFactory() ); - SaPFact->SetParameter("sa: damping factor", ParameterEntry(agg_damping)); - PFact = SaPFact; - RFact = rcp( new TransPFactory() ); - } else if (bEnergyMinimization == true) { - // Petrov Galerkin PG-AMG smoothed aggregation (energy minimization in ML) - PFact = rcp( new PgPFactory() ); - RFact = rcp( new GenericRFactory() ); - } - - RCP AcFact = rcp( new RAPFactory() ); - for (size_t i = 0; iAddTransferFactory(TransferFacts_[i]); // THIS WILL BE REPLACED with a call to the MLParamterListInterpreter - } + RCP manager = rcp(new FactoryManager()); + RCP initmanager = rcp(new FactoryManager()); // - // Nullspace factory + // Smoothers // - // Set fine level nullspace - // extract pre-computed nullspace from ML parameter list - // store it in nullspace_ and nullspaceDim_ - if (nullspaceType != "default vectors") { - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceType != "pre-computed", Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (no pre-computed null space). error."); - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceDim == -1, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace dim == -1). error."); - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceVec == NULL, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace == NULL). You have to provide a valid fine-level nullspace in \'null space: vectors\'"); - - nullspaceDim_ = nullspaceDim; - nullspace_ = nullspaceVec; - } - - Teuchos::RCP nspFact = Teuchos::rcp(new NullspaceFactory()); - nspFact->SetFactory("Nullspace", PtentFact); - - // - // Hierarchy + FactoryManager - // - - // Hierarchy options - this->SetVerbLevel(toMueLuVerbLevel(eVerbLevel)); - this->numDesiredLevel_ = maxLevels; - this->maxCoarseSize_ = maxCoarseSize; - - // init smoother - RCP initSmootherFact = Teuchos::null; - if(paramList.isSublist("init smoother")) { - ParameterList& initList = paramList.sublist("init smoother"); // TODO move this before for loop - initSmootherFact = AdaptiveDetails::getSmoother(initList); - } else { - std::string ifpackType = "RELAXATION"; - Teuchos::ParameterList smootherParamList; - smootherParamList.set("relaxation: type", "symmetric Gauss-Seidel"); - smootherParamList.set("smoother: sweeps", 1); - smootherParamList.set("smoother: damping factor", 1.0); - RCP smooProto = rcp( new TrilinosSmoother(ifpackType, smootherParamList, 0) ); - - initSmootherFact = rcp( new SmootherFactory() ); - initSmootherFact->SetSmootherPrototypes(smooProto, smooProto); + { + // Merge level-specific parameters with global parameters. level-specific parameters takes precedence. + // TODO: unit-test this part alone + + ParameterList levelSmootherParam = GetMLSubList(paramList, "smoother", levelID); // copy + MergeParameterList(*topLevelSmootherParam, levelSmootherParam, false); /* false = do no overwrite levelSmootherParam parameters by topLevelSmootherParam parameters */ + // std::cout << std::endl << "Merged List for level " << levelID << std::endl; + // std::cout << levelSmootherParam << std::endl; + + // RCP smootherFact = this->GetSmootherFactory(levelSmootherParam); // TODO: missing AFact input arg. + RCP smootherFact = AdaptiveDetails::getSmoother(levelSmootherParam); + manager->SetFactory("Smoother", smootherFact); + smootherFact->DisableMultipleCallCheck(); + + initmanager->SetFactory("Smoother", initSmootherFact); + initmanager->SetFactory("CoarseSolver", initSmootherFact); + initSmootherFact->DisableMultipleCallCheck(); } // - // Coarse Smoother - // - ParameterList& coarseList = paramList.sublist("coarse: list"); - // coarseList.get("smoother: type", "Amesos-KLU"); // set default - RCP coarseFact = AdaptiveDetails::getSmoother(coarseList); - - // Smoothers Top Level Parameters - - RCP topLevelSmootherParam = ExtractSetOfParameters(paramList, "smoother"); - // std::cout << std::endl << "Top level smoother parameters:" << std::endl; - // std::cout << *topLevelSmootherParam << std::endl; - + // Misc // - // Prepare factory managers - // TODO: smootherFact can be reuse accross level if same parameters/no specific parameterList - - for (int levelID=0; levelID < maxLevels; levelID++) { - - // - // Level FactoryManager - // - - RCP manager = rcp(new FactoryManager()); - RCP initmanager = rcp(new FactoryManager()); - - // - // Smoothers - // - - { - // Merge level-specific parameters with global parameters. level-specific parameters takes precedence. - // TODO: unit-test this part alone - - ParameterList levelSmootherParam = GetMLSubList(paramList, "smoother", levelID); // copy - MergeParameterList(*topLevelSmootherParam, levelSmootherParam, false); /* false = do no overwrite levelSmootherParam parameters by topLevelSmootherParam parameters */ - // std::cout << std::endl << "Merged List for level " << levelID << std::endl; - // std::cout << levelSmootherParam << std::endl; - - //RCP smootherFact = this->GetSmootherFactory(levelSmootherParam); // TODO: missing AFact input arg. - RCP smootherFact = AdaptiveDetails::getSmoother(levelSmootherParam); - manager->SetFactory("Smoother", smootherFact); - smootherFact->DisableMultipleCallCheck(); - - initmanager->SetFactory("Smoother", initSmootherFact); - initmanager->SetFactory("CoarseSolver", initSmootherFact); - initSmootherFact->DisableMultipleCallCheck(); - - } - - // - // Misc - // - - Teuchos::rcp_dynamic_cast(PFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(PtentFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(RFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(coarseFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(dropFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(AggFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(AcFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(nspFact)->DisableMultipleCallCheck(); - - manager->SetFactory("CoarseSolver", coarseFact); // TODO: should not be done in the loop - manager->SetFactory("Graph", dropFact); - manager->SetFactory("Aggregates", AggFact); - manager->SetFactory("DofsPerNode", dropFact); - manager->SetFactory("A", AcFact); - manager->SetFactory("P", PFact); - manager->SetFactory("Ptent", PtentFact); - manager->SetFactory("R", RFact); - manager->SetFactory("Nullspace", nspFact); - - //initmanager->SetFactory("CoarseSolver", coarseFact); - initmanager->SetFactory("Graph", dropFact); - initmanager->SetFactory("Aggregates", AggFact); - initmanager->SetFactory("DofsPerNode", dropFact); - initmanager->SetFactory("A", AcFact); - initmanager->SetFactory("P", PtentFact); // use nonsmoothed transfers - initmanager->SetFactory("Ptent", PtentFact); - initmanager->SetFactory("R", RFact); - initmanager->SetFactory("Nullspace", nspFact); - - this->AddFactoryManager(levelID, 1, manager); - this->AddInitFactoryManager(levelID, 1, initmanager); - } // for (level loop) - } - - template - void AdaptiveSaMLParameterListInterpreter::SetupInitHierarchy(Hierarchy & H) const { - TEUCHOS_TEST_FOR_EXCEPTION(!H.GetLevel(0)->IsAvailable("A"), Exceptions::RuntimeError, "No fine level operator"); - - RCP l = H.GetLevel(0); - RCP Op = l->Get >("A"); - SetupOperator(*Op); // use overloaded SetupMatrix routine - this->SetupExtra(H); - - // Setup Hierarchy - H.SetMaxCoarseSize(this->maxCoarseSize_); // TODO - - int levelID = 0; - int lastLevelID = this->numDesiredLevel_ - 1; - bool isLastLevel = false; - - while(!isLastLevel) { - bool r = H.Setup(levelID, - InitLvlMngr(levelID-1, lastLevelID), - InitLvlMngr(levelID, lastLevelID), - InitLvlMngr(levelID+1, lastLevelID)); - - isLastLevel = r || (levelID == lastLevelID); - levelID++; - } + Teuchos::rcp_dynamic_cast(PFact)->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(PtentFact)->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(RFact)->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(coarseFact)->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(dropFact)->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(AggFact)->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(AcFact)->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(nspFact)->DisableMultipleCallCheck(); + + manager->SetFactory("CoarseSolver", coarseFact); // TODO: should not be done in the loop + manager->SetFactory("Graph", dropFact); + manager->SetFactory("Aggregates", AggFact); + manager->SetFactory("DofsPerNode", dropFact); + manager->SetFactory("A", AcFact); + manager->SetFactory("P", PFact); + manager->SetFactory("Ptent", PtentFact); + manager->SetFactory("R", RFact); + manager->SetFactory("Nullspace", nspFact); + + // initmanager->SetFactory("CoarseSolver", coarseFact); + initmanager->SetFactory("Graph", dropFact); + initmanager->SetFactory("Aggregates", AggFact); + initmanager->SetFactory("DofsPerNode", dropFact); + initmanager->SetFactory("A", AcFact); + initmanager->SetFactory("P", PtentFact); // use nonsmoothed transfers + initmanager->SetFactory("Ptent", PtentFact); + initmanager->SetFactory("R", RFact); + initmanager->SetFactory("Nullspace", nspFact); + + this->AddFactoryManager(levelID, 1, manager); + this->AddInitFactoryManager(levelID, 1, initmanager); + } // for (level loop) +} + +template +void AdaptiveSaMLParameterListInterpreter::SetupInitHierarchy(Hierarchy& H) const { + TEUCHOS_TEST_FOR_EXCEPTION(!H.GetLevel(0)->IsAvailable("A"), Exceptions::RuntimeError, "No fine level operator"); + + RCP l = H.GetLevel(0); + RCP Op = l->Get >("A"); + SetupOperator(*Op); // use overloaded SetupMatrix routine + this->SetupExtra(H); + + // Setup Hierarchy + H.SetMaxCoarseSize(this->maxCoarseSize_); // TODO + + int levelID = 0; + int lastLevelID = this->numDesiredLevel_ - 1; + bool isLastLevel = false; + + while (!isLastLevel) { + bool r = H.Setup(levelID, + InitLvlMngr(levelID - 1, lastLevelID), + InitLvlMngr(levelID, lastLevelID), + InitLvlMngr(levelID + 1, lastLevelID)); + + isLastLevel = r || (levelID == lastLevelID); + levelID++; } - - template - void AdaptiveSaMLParameterListInterpreter::SetupHierarchy(Hierarchy & H) const { - - // set fine level null space - // usually this null space is provided from outside (by the user) using - // the ML parameter lists. - if (this->nullspace_ != NULL) { - RCP fineLevel = H.GetLevel(0); - const RCP rowMap = fineLevel->Get< RCP >("A")->getRowMap(); - RCP nullspace = MultiVectorFactory::Build(rowMap, nullspaceDim_, true); - - for ( size_t i=0; i < Teuchos::as(nullspaceDim_); i++) { - Teuchos::ArrayRCP nullspacei = nullspace->getDataNonConst(i); - const size_t myLength = nullspace->getLocalLength(); - - for (size_t j = 0; j < myLength; j++) { - nullspacei[j] = nullspace_[i*myLength + j]; - } +} + +template +void AdaptiveSaMLParameterListInterpreter::SetupHierarchy(Hierarchy& H) const { + // set fine level null space + // usually this null space is provided from outside (by the user) using + // the ML parameter lists. + if (this->nullspace_ != NULL) { + RCP fineLevel = H.GetLevel(0); + const RCP rowMap = fineLevel->Get >("A")->getRowMap(); + RCP nullspace = MultiVectorFactory::Build(rowMap, nullspaceDim_, true); + + for (size_t i = 0; i < Teuchos::as(nullspaceDim_); i++) { + Teuchos::ArrayRCP nullspacei = nullspace->getDataNonConst(i); + const size_t myLength = nullspace->getLocalLength(); + + for (size_t j = 0; j < myLength; j++) { + nullspacei[j] = nullspace_[i * myLength + j]; } - - fineLevel->Set("Nullspace", nullspace); } - // keep aggregates - H.Keep("Aggregates", HierarchyManager::GetFactoryManager(0)->GetFactory("Aggregates").get()); - - /////////////////////////////// - - // build hierarchy for initialization - SetupInitHierarchy(H); + fineLevel->Set("Nullspace", nullspace); + } - { - // do some iterations with the built hierarchy to improve the null space - Teuchos::RCP Finest = H.GetLevel(0); // get finest level,MueLu::NoFactory::get() - Teuchos::RCP nspVector2 = Finest->Get >("Nullspace"); + // keep aggregates + H.Keep("Aggregates", HierarchyManager::GetFactoryManager(0)->GetFactory("Aggregates").get()); - Xpetra::IO::Write("orig_nsp.vec", *nspVector2); + /////////////////////////////// - RCP Op = Finest->Get >("A"); - Xpetra::IO::Write("A.mat", *Op); + // build hierarchy for initialization + SetupInitHierarchy(H); + { + // do some iterations with the built hierarchy to improve the null space + Teuchos::RCP Finest = H.GetLevel(0); // get finest level,MueLu::NoFactory::get() + Teuchos::RCP nspVector2 = Finest->Get >("Nullspace"); - Teuchos::RCP homogRhsVec = MultiVectorFactory::Build(nspVector2->getMap(),nspVector2->getNumVectors(),true); - homogRhsVec->putScalar(0.0); + Xpetra::IO::Write("orig_nsp.vec", *nspVector2); - // do 1 multigrid cycle for improving the null space by "solving" - // A B_f = 0 - // where A is the system matrix and B_f the fine level null space vectors - H.Iterate(*homogRhsVec, *nspVector2, 1, false); + RCP Op = Finest->Get >("A"); + Xpetra::IO::Write("A.mat", *Op); - // store improved fine level null space - Finest->Set("Nullspace",nspVector2); + Teuchos::RCP homogRhsVec = MultiVectorFactory::Build(nspVector2->getMap(), nspVector2->getNumVectors(), true); + homogRhsVec->putScalar(0.0); - Xpetra::IO::Write("new_nsp.vec", *nspVector2); + // do 1 multigrid cycle for improving the null space by "solving" + // A B_f = 0 + // where A is the system matrix and B_f the fine level null space vectors + H.Iterate(*homogRhsVec, *nspVector2, 1, false); - //H.Delete("CoarseSolver", init_levelManagers_[0]->GetFactory("CoarseSolver").get()); - } + // store improved fine level null space + Finest->Set("Nullspace", nspVector2); - { - // do some clean up. - // remove all old default factories. Build new ones for the second build. - // this is a little bit tricky to understand - for(size_t k=0; k < HierarchyManager::getNumFactoryManagers(); k++) { - HierarchyManager::GetFactoryManager(k)->Clean(); - //Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(k)->GetFactory("Smoother"))->DisableMultipleCallCheck(); // after changing to MLParamterListInterpreter functions - } - // not sure about this. i only need it if Smoother is defined explicitely (not using default smoother) - // need this: otherwise RAPFactory::Build is complaining on level 0 - // and TentativePFactory::Build is complaining on level 1 - Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(0)->GetFactory("A"))->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(1)->GetFactory("P"))->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(1)->GetFactory("Ptent"))->DisableMultipleCallCheck(); - - HierarchyManager::SetupHierarchy(H); - } + Xpetra::IO::Write("new_nsp.vec", *nspVector2); + // H.Delete("CoarseSolver", init_levelManagers_[0]->GetFactory("CoarseSolver").get()); } - template - void AdaptiveSaMLParameterListInterpreter::AddTransferFactory(const RCP& factory) { - // check if it's a TwoLevelFactoryBase based transfer factory - TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, "Transfer factory is not derived from TwoLevelFactoryBase. Since transfer factories will be handled by the RAPFactory they have to be derived from TwoLevelFactoryBase!"); - TransferFacts_.push_back(factory); - } - - template - size_t AdaptiveSaMLParameterListInterpreter::NumTransferFactories() const { - return TransferFacts_.size(); - } - - template - void AdaptiveSaMLParameterListInterpreter::SetupOperator(Operator & Op) const { - try { - Matrix& A = dynamic_cast(Op); - if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blksize_)) - this->GetOStream(Warnings0) << "Setting matrix block size to " << blksize_ << " (value of the parameter in the list) " - << "instead of " << A.GetFixedBlockSize() << " (provided matrix)." << std::endl; - - A.SetFixedBlockSize(blksize_); - - } catch (std::bad_cast& e) { - this->GetOStream(Warnings0) << "Skipping setting block size as the operator is not a matrix" << std::endl; + { + // do some clean up. + // remove all old default factories. Build new ones for the second build. + // this is a little bit tricky to understand + for (size_t k = 0; k < HierarchyManager::getNumFactoryManagers(); k++) { + HierarchyManager::GetFactoryManager(k)->Clean(); + // Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(k)->GetFactory("Smoother"))->DisableMultipleCallCheck(); // after changing to MLParamterListInterpreter functions } + // not sure about this. i only need it if Smoother is defined explicitely (not using default smoother) + // need this: otherwise RAPFactory::Build is complaining on level 0 + // and TentativePFactory::Build is complaining on level 1 + Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(0)->GetFactory("A"))->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(1)->GetFactory("P"))->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(1)->GetFactory("Ptent"))->DisableMultipleCallCheck(); + + HierarchyManager::SetupHierarchy(H); } +} + +template +void AdaptiveSaMLParameterListInterpreter::AddTransferFactory(const RCP& factory) { + // check if it's a TwoLevelFactoryBase based transfer factory + TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, "Transfer factory is not derived from TwoLevelFactoryBase. Since transfer factories will be handled by the RAPFactory they have to be derived from TwoLevelFactoryBase!"); + TransferFacts_.push_back(factory); +} + +template +size_t AdaptiveSaMLParameterListInterpreter::NumTransferFactories() const { + return TransferFacts_.size(); +} + +template +void AdaptiveSaMLParameterListInterpreter::SetupOperator(Operator& Op) const { + try { + Matrix& A = dynamic_cast(Op); + if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blksize_)) + this->GetOStream(Warnings0) << "Setting matrix block size to " << blksize_ << " (value of the parameter in the list) " + << "instead of " << A.GetFixedBlockSize() << " (provided matrix)." << std::endl; + + A.SetFixedBlockSize(blksize_); + + } catch (std::bad_cast& e) { + this->GetOStream(Warnings0) << "Skipping setting block size as the operator is not a matrix" << std::endl; + } +} -} // namespace MueLu - +} // namespace MueLu #endif /* MUELU_ADAPTIVESAMLPARAMETERLISTINTERPRETER_DEF_HPP_ */ diff --git a/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp b/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp index 1832f0704030..8e60177cc07f 100644 --- a/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp +++ b/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp @@ -159,7 +159,6 @@ #include "MueLu_Zoltan2Interface.hpp" #include "MueLu_NodePartitionInterface.hpp" - #include "MueLu_CoalesceDropFactory_kokkos.hpp" #include "MueLu_GeometricInterpolationPFactory_kokkos.hpp" #include "MueLu_NullspaceFactory_kokkos.hpp" @@ -187,777 +186,780 @@ namespace MueLu { - /*! class FactoryFactory +/*! class FactoryFactory - @brief Factory that can generate other factories from +@brief Factory that can generate other factories from - */ - template - class FactoryFactory : public BaseClass { +*/ +template +class FactoryFactory : public BaseClass { #undef MUELU_FACTORYFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - typedef std::map > FactoryMap; // TODO: remove - typedef std::map > FactoryManagerMap; - - public: - - /// \brief: Interpret Factory parameter list and build new factory - /// - /// \param param [in]: ParameterEntry being either the parameter list containing the "factory" parameter declaring the factory type (e.g., "TrilinosSmoother") or being a plain Parameter containing the factory type as value - /// \param factoryMapIn [in]: FactoryMap containing a map between factory name (e.g., "smootherFact1") and corresponding factory of all previously defined factories - /// \param factoryManagersIn [in]: FactoryManagerMap containing a map between group names and Factory manager objects. Needed for factories with sub-factory managers. - /// - /// Parameter List Parsing: - /// --------- - /// - /// - /// or: - /// - /// - /// - /// ... - /// - /// - virtual RCP BuildFactory(const Teuchos::ParameterEntry& param, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - // Find factory - std::string factoryName; - Teuchos::ParameterList paramList; - if (!param.isList()) { - factoryName = Teuchos::getValue(param); - } else { - paramList = Teuchos::getValue(param); - factoryName = paramList.get("factory"); - } + typedef std::map > FactoryMap; // TODO: remove + typedef std::map > FactoryManagerMap; + + public: + /// \brief: Interpret Factory parameter list and build new factory + /// + /// \param param [in]: ParameterEntry being either the parameter list containing the "factory" parameter declaring the factory type (e.g., "TrilinosSmoother") or being a plain Parameter containing the factory type as value + /// \param factoryMapIn [in]: FactoryMap containing a map between factory name (e.g., "smootherFact1") and corresponding factory of all previously defined factories + /// \param factoryManagersIn [in]: FactoryManagerMap containing a map between group names and Factory manager objects. Needed for factories with sub-factory managers. + /// + /// Parameter List Parsing: + /// --------- + /// + /// + /// or: + /// + /// + /// + /// ... + /// + /// + virtual RCP BuildFactory(const Teuchos::ParameterEntry& param, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + // Find factory + std::string factoryName; + Teuchos::ParameterList paramList; + if (!param.isList()) { + factoryName = Teuchos::getValue(param); + } else { + paramList = Teuchos::getValue(param); + factoryName = paramList.get("factory"); + } - // TODO: see how Teko handles this (=> register factories). - if (factoryName == "AggregateQualityEstimateFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "AggregationExportFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "AmalgamationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BlockedCoarseMapFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BlockedRAPFactory") return BuildRAPFactory (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BrickAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ClassicalMapFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ClassicalPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CloneRepartitionInterface") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CoarseMapFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CoarseningVisualizationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CoalesceDropFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SmooVecCoalesceDropFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ConstraintFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CoordinatesTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "DirectSolver") return BuildDirectSolver (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "DropNegativeEntriesFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "EminPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "FilteredAFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "FineLevelInputDataFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "GeneralGeometricPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ReplicatePFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CombinePFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "GenericRFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "GeometricInterpolationPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "HybridAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "InterfaceAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "InterfaceMappingTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "InverseApproximationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "InitialBlockNumberFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "LineDetectionFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - // LocalOrdinalTransferFactory is a utility factory that can be used for multiple things, so there is no default - // if (factoryName == "LocalOrdinalTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "MapTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "MatrixAnalysisFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "MultiVectorTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "NoFactory") return MueLu::NoFactory::getRCP(); - if (factoryName == "NoSmoother") return rcp(new SmootherFactory(Teuchos::null)); - if (factoryName == "NotayAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "NullspaceFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "NullspacePresmoothFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "PatternFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "PgPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SaPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RAPFactory") return BuildRAPFactory (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RAPShiftFactory") return BuildRAPFactory (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RebalanceAcFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RebalanceTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RegionRFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RegionRFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ReorderBlockAFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RepartitionInterface") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ScaledNullspaceFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SegregatedAFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SemiCoarsenPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "StructuredAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "StructuredLineDetectionFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SubBlockAFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "TentativePFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ToggleCoordinatesTransferFactory") return BuildToggleCoordinatesTransferFactory (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "TogglePFactory") return BuildTogglePFactory (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "TransPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RfromP_Or_TransP") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "TrilinosSmoother") return BuildTrilinosSmoother (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "UncoupledAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "UnsmooshFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "UserAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "UserPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "VariableDofLaplacianFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ZeroSubBlockAFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CoalesceDropFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "GeometricInterpolationPFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "NullspaceFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SaPFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SemiCoarsenPFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "StructuredAggregationFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "TentativePFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "MatrixFreeTentativePFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "UncoupledAggregationFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - - // Handle removed Kokkos factories - if (factoryName == "CoarseMapFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CoordinatesTransferFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - - if (factoryName == "ZoltanInterface") { + // TODO: see how Teko handles this (=> register factories). + if (factoryName == "AggregateQualityEstimateFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "AggregationExportFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "AmalgamationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "BlockedCoarseMapFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "BlockedRAPFactory") return BuildRAPFactory(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "BrickAggregationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "ClassicalMapFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "ClassicalPFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "CloneRepartitionInterface") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "CoarseMapFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "CoarseningVisualizationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "CoalesceDropFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SmooVecCoalesceDropFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "ConstraintFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "CoordinatesTransferFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "DirectSolver") return BuildDirectSolver(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "DropNegativeEntriesFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "EminPFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "FilteredAFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "FineLevelInputDataFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "GeneralGeometricPFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "ReplicatePFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "CombinePFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "GenericRFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "GeometricInterpolationPFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "HybridAggregationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "InterfaceAggregationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "InterfaceMappingTransferFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "InverseApproximationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "InitialBlockNumberFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "LineDetectionFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + // LocalOrdinalTransferFactory is a utility factory that can be used for multiple things, so there is no default + // if (factoryName == "LocalOrdinalTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "MapTransferFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "MatrixAnalysisFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "MultiVectorTransferFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "NoFactory") return MueLu::NoFactory::getRCP(); + if (factoryName == "NoSmoother") return rcp(new SmootherFactory(Teuchos::null)); + if (factoryName == "NotayAggregationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "NullspaceFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "NullspacePresmoothFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "PatternFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "PgPFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SaPFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RAPFactory") return BuildRAPFactory(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RAPShiftFactory") return BuildRAPFactory(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RebalanceAcFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RebalanceTransferFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RegionRFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RegionRFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "ReorderBlockAFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RepartitionInterface") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "ScaledNullspaceFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SegregatedAFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SemiCoarsenPFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "StructuredAggregationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "StructuredLineDetectionFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SubBlockAFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "TentativePFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "ToggleCoordinatesTransferFactory") return BuildToggleCoordinatesTransferFactory(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "TogglePFactory") return BuildTogglePFactory(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "TransPFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RfromP_Or_TransP") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "TrilinosSmoother") return BuildTrilinosSmoother(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "UncoupledAggregationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "UnsmooshFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "UserAggregationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "UserPFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "VariableDofLaplacianFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "ZeroSubBlockAFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "CoalesceDropFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "GeometricInterpolationPFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "NullspaceFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SaPFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SemiCoarsenPFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "StructuredAggregationFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "TentativePFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "MatrixFreeTentativePFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "UncoupledAggregationFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + + // Handle removed Kokkos factories + if (factoryName == "CoarseMapFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "CoordinatesTransferFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + + if (factoryName == "ZoltanInterface") { #if defined(HAVE_MUELU_ZOLTAN) && defined(HAVE_MPI) - return Build2(paramList, factoryMapIn, factoryManagersIn); + return Build2(paramList, factoryMapIn, factoryManagersIn); #else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a ZoltanInterface object: Zoltan is disabled: HAVE_MUELU_ZOLTAN && HAVE_MPI == false."); -#endif // HAVE_MUELU_ZOLTAN && HAVE_MPI - } - if (factoryName == "Zoltan2Interface") { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a ZoltanInterface object: Zoltan is disabled: HAVE_MUELU_ZOLTAN && HAVE_MPI == false."); +#endif // HAVE_MUELU_ZOLTAN && HAVE_MPI + } + if (factoryName == "Zoltan2Interface") { #if defined(HAVE_MUELU_ZOLTAN2) && defined(HAVE_MPI) - return Build2(paramList, factoryMapIn, factoryManagersIn); + return Build2(paramList, factoryMapIn, factoryManagersIn); #else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a Zoltan2Interface object: Zoltan2 is disabled: HAVE_MUELU_ZOLTAN2 && HAVE_MPI == false."); -#endif // HAVE_MUELU_ZOLTAN2 && HAVE_MPI - } - if (factoryName == "IsorropiaInterface") { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a Zoltan2Interface object: Zoltan2 is disabled: HAVE_MUELU_ZOLTAN2 && HAVE_MPI == false."); +#endif // HAVE_MUELU_ZOLTAN2 && HAVE_MPI + } + if (factoryName == "IsorropiaInterface") { #if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) - return Build2(paramList, factoryMapIn, factoryManagersIn); + return Build2(paramList, factoryMapIn, factoryManagersIn); #else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a IsorropiaInterface object: Isorropia is disabled: HAVE_MUELU_ISORROPIA && HAVE_MPI == false."); -#endif // HAVE_MUELU_ZOLTAN2 && HAVE_MPI - } + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a IsorropiaInterface object: Isorropia is disabled: HAVE_MUELU_ISORROPIA && HAVE_MPI == false."); +#endif // HAVE_MUELU_ZOLTAN2 && HAVE_MPI + } - if (factoryName == "NodePartitionInterface") { + if (factoryName == "NodePartitionInterface") { #if defined(HAVE_MPI) - return Build2(paramList, factoryMapIn, factoryManagersIn); + return Build2(paramList, factoryMapIn, factoryManagersIn); #else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a NodePartitionInterface object: HAVE_MPI == false."); -#endif // HAVE_MPI - } + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a NodePartitionInterface object: HAVE_MPI == false."); +#endif // HAVE_MPI + } - if (factoryName == "RepartitionFactory") { + if (factoryName == "RepartitionFactory") { #ifdef HAVE_MPI - return Build2(paramList, factoryMapIn, factoryManagersIn); + return Build2(paramList, factoryMapIn, factoryManagersIn); #else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a RepartitionFactory object: HAVE_MPI == false."); -#endif // HAVE_MPI - } - if (factoryName == "RepartitionHeuristicFactory") { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a RepartitionFactory object: HAVE_MPI == false."); +#endif // HAVE_MPI + } + if (factoryName == "RepartitionHeuristicFactory") { #ifdef HAVE_MPI - return Build2(paramList, factoryMapIn, factoryManagersIn); + return Build2(paramList, factoryMapIn, factoryManagersIn); #else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a RepartitionHeuristicFactory object: HAVE_MPI == false."); -#endif // HAVE_MPI - } - // Blocked factories - if (factoryName == "BlockedCoordinatesTransferFactory") return BuildBlockedCoordFactory (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BlockedDirectSolver") return BuildBlockedDirectSolver(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BlockedGaussSeidelSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BlockedJacobiSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BlockedPFactory") return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BraessSarazinSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "IndefiniteBlockDiagonalSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SimpleSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SchurComplementFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RebalanceBlockRestrictionFactory")return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RebalanceBlockAcFactory") return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RebalanceBlockInterpolationFactory") return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a RepartitionHeuristicFactory object: HAVE_MPI == false."); +#endif // HAVE_MPI + } + // Blocked factories + if (factoryName == "BlockedCoordinatesTransferFactory") return BuildBlockedCoordFactory(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "BlockedDirectSolver") return BuildBlockedDirectSolver(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "BlockedGaussSeidelSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "BlockedJacobiSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "BlockedPFactory") return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "BraessSarazinSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "IndefiniteBlockDiagonalSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SimpleSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SchurComplementFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RebalanceBlockRestrictionFactory") return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RebalanceBlockAcFactory") return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RebalanceBlockInterpolationFactory") return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); #ifdef HAVE_MPI - if (factoryName == "RepartitionBlockDiagonalFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RepartitionBlockDiagonalFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); #endif #ifdef HAVE_MUELU_TEKO - if (factoryName == "TekoSmoother") return BuildTekoSmoother(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "TekoSmoother") return BuildTekoSmoother(paramList, factoryMapIn, factoryManagersIn); #endif - if (factoryName == "UzawaSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "UzawaSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); // Matlab factories #ifdef HAVE_MUELU_MATLAB - if (factoryName == "TwoLevelMatlabFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SingleLevelMatlabFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "MatlabSmoother") return BuildMatlabSmoother (paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "TwoLevelMatlabFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SingleLevelMatlabFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "MatlabSmoother") return BuildMatlabSmoother(paramList, factoryMapIn, factoryManagersIn); #endif #ifdef HAVE_MUELU_INTREPID2 - if (factoryName == "IntrepidPCoarsenFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "IntrepidPCoarsenFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); #endif - // Use a user defined factories (in node) - if (factoryMapIn.find(factoryName) != factoryMapIn.end()) { - TEUCHOS_TEST_FOR_EXCEPTION((param.isList() && (++paramList.begin() != paramList.end())), Exceptions::RuntimeError, - "MueLu::FactoryFactory: Error during the parsing of: " << std::endl << paramList << std::endl - << "'" << factoryName << "' is not a factory name but an existing instance of a factory." << std::endl - << "Extra parameters cannot be specified after the creation of the object." << std::endl << std::endl - << "Correct syntaxes includes:" << std::endl - << " " << std::endl - << "or" << std::endl - << " " << std::endl - ); - - return factoryMapIn.find(factoryName)->second; - } + // Use a user defined factories (in node) + if (factoryMapIn.find(factoryName) != factoryMapIn.end()) { + TEUCHOS_TEST_FOR_EXCEPTION((param.isList() && (++paramList.begin() != paramList.end())), Exceptions::RuntimeError, + "MueLu::FactoryFactory: Error during the parsing of: " << std::endl + << paramList << std::endl + << "'" << factoryName << "' is not a factory name but an existing instance of a factory." << std::endl + << "Extra parameters cannot be specified after the creation of the object." << std::endl + << std::endl + << "Correct syntaxes includes:" << std::endl + << " " << std::endl + << "or" << std::endl + << " " << std::endl); + + return factoryMapIn.find(factoryName)->second; + } - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory: unknown factory name : " << factoryName); + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory: unknown factory name : " << factoryName); - TEUCHOS_UNREACHABLE_RETURN(Teuchos::null); - } + TEUCHOS_UNREACHABLE_RETURN(Teuchos::null); + } - // - // - // + // + // + // - // FOLLOWING FUNCTIONS SHOULD LIVE WITH THE CORRESPONDING CLASS + // FOLLOWING FUNCTIONS SHOULD LIVE WITH THE CORRESPONDING CLASS - // - // - // + // + // + // -#define arraysize(ar) (sizeof(ar) / sizeof(ar[0])) +#define arraysize(ar) (sizeof(ar) / sizeof(ar[0])) - template // T must implement the Factory interface - RCP Build(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP factory = rcp(new T()); + template // T must implement the Factory interface + RCP Build(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + RCP factory = rcp(new T()); - const char* strarray[] = {"A", "P", "R", "Graph", "UnAmalgamationInfo", "Aggregates", "Nullspace", "TransferFactory", "DofsPerNode"}; - std::vector v(strarray, strarray + arraysize(strarray)); - for (size_t i = 0; i < v.size(); ++i) - if (paramList.isParameter(v[i])) - factory->SetFactory(v[i], BuildFactory(paramList.getEntry(v[i]), factoryMapIn, factoryManagersIn)); + const char* strarray[] = {"A", "P", "R", "Graph", "UnAmalgamationInfo", "Aggregates", "Nullspace", "TransferFactory", "DofsPerNode"}; + std::vector v(strarray, strarray + arraysize(strarray)); + for (size_t i = 0; i < v.size(); ++i) + if (paramList.isParameter(v[i])) + factory->SetFactory(v[i], BuildFactory(paramList.getEntry(v[i]), factoryMapIn, factoryManagersIn)); - return factory; - } + return factory; + } - template // T must implement the Factory interface - RCP Build2(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP factory = rcp(new T()); + template // T must implement the Factory interface + RCP Build2(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + RCP factory = rcp(new T()); - ParameterList paramListWithFactories; + ParameterList paramListWithFactories; - // Read the RCP parameters of the class T - RCP validParamList = factory->GetValidParameterList(); // TODO check for Teuchos::null (no parameter list validation) - TEUCHOS_TEST_FOR_EXCEPTION(validParamList == Teuchos::null, Exceptions::RuntimeError, "FactoryFactory::Build2: default parameter list is null. Please fix this."); - for (ParameterList::ConstIterator param = validParamList->begin(); param != validParamList->end(); ++param) { - const std::string& pName = validParamList->name(param); + // Read the RCP parameters of the class T + RCP validParamList = factory->GetValidParameterList(); // TODO check for Teuchos::null (no parameter list validation) + TEUCHOS_TEST_FOR_EXCEPTION(validParamList == Teuchos::null, Exceptions::RuntimeError, "FactoryFactory::Build2: default parameter list is null. Please fix this."); + for (ParameterList::ConstIterator param = validParamList->begin(); param != validParamList->end(); ++param) { + const std::string& pName = validParamList->name(param); - if (!paramList.isParameter(pName)) { - // Ignore unknown parameters - continue; - } + if (!paramList.isParameter(pName)) { + // Ignore unknown parameters + continue; + } - if (validParamList->isType< RCP >(pName)) { - // Generate or get factory described by param - RCP generatingFact = BuildFactory(paramList.getEntry(pName), factoryMapIn, factoryManagersIn); - paramListWithFactories.set(pName, generatingFact); - } else if (validParamList->isType >(pName)) { - if (pName == "ParameterList") { - // NOTE: we cannot use - // subList = sublist(rcpFromRef(paramList), pName) - // here as that would result in sublist also being a reference to a temporary object. - // The resulting dereferencing in the corresponding factory would then segfault - RCP subList = Teuchos::sublist(rcp(new ParameterList(paramList)), pName); - paramListWithFactories.set(pName, subList); - } - } else { - paramListWithFactories.setEntry(pName, paramList.getEntry(pName)); + if (validParamList->isType >(pName)) { + // Generate or get factory described by param + RCP generatingFact = BuildFactory(paramList.getEntry(pName), factoryMapIn, factoryManagersIn); + paramListWithFactories.set(pName, generatingFact); + } else if (validParamList->isType >(pName)) { + if (pName == "ParameterList") { + // NOTE: we cannot use + // subList = sublist(rcpFromRef(paramList), pName) + // here as that would result in sublist also being a reference to a temporary object. + // The resulting dereferencing in the corresponding factory would then segfault + RCP subList = Teuchos::sublist(rcp(new ParameterList(paramList)), pName); + paramListWithFactories.set(pName, subList); } + } else { + paramListWithFactories.setEntry(pName, paramList.getEntry(pName)); } - - // Configure the factory - factory->SetParameterList(paramListWithFactories); - - return factory; } - template // T must implement the Factory interface - RCP BuildRAPFactory(const Teuchos::ParameterList & paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP factory; - if (paramList.isSublist("TransferFactories") == false) { - factory = Build2(paramList, factoryMapIn, factoryManagersIn); - - } else { - RCP paramListNonConst = rcp(new Teuchos::ParameterList(paramList)); - RCP transferFactories = rcp(new Teuchos::ParameterList(*sublist(paramListNonConst, "TransferFactories"))); - - paramListNonConst->remove("TransferFactories"); - - factory = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - - for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { - RCP p = BuildFactory(transferFactories->entry(param), factoryMapIn, factoryManagersIn); - factory->AddTransferFactory(p); - } - } + // Configure the factory + factory->SetParameterList(paramListWithFactories); - return factory; - } + return factory; + } - template // T must implement the Factory interface - RCP BuildTogglePFactory(const Teuchos::ParameterList & paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP factory; - if (paramList.isSublist("TransferFactories") == false) { - //TODO put in an error message: the TogglePFactory needs a TransferFactories sublist! - factory = Build2(paramList, factoryMapIn, factoryManagersIn); + template // T must implement the Factory interface + RCP BuildRAPFactory(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + RCP factory; + if (paramList.isSublist("TransferFactories") == false) { + factory = Build2(paramList, factoryMapIn, factoryManagersIn); - } else { - RCP paramListNonConst = rcp(new Teuchos::ParameterList(paramList)); - RCP transferFactories = rcp(new Teuchos::ParameterList(*sublist(paramListNonConst, "TransferFactories"))); - - paramListNonConst->remove("TransferFactories"); - - // build TogglePFactory - factory = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - - // count how many prolongation factories and how many coarse null space factories have been declared. - // the numbers must match! - int numProlongatorFactories = 0; - int numPtentFactories = 0; - int numCoarseNspFactories = 0; - for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { - size_t foundNsp = transferFactories->name(param).find("Nullspace"); - if (foundNsp != std::string::npos && foundNsp == 0 && transferFactories->name(param).length()==10) { - numCoarseNspFactories++; - continue; - } - size_t foundPtent = transferFactories->name(param).find("Ptent"); - if (foundPtent != std::string::npos && foundPtent == 0 && transferFactories->name(param).length()==6) { - numPtentFactories++; - continue; - } - size_t foundP = transferFactories->name(param).find("P"); - if (foundP != std::string::npos && foundP == 0 && transferFactories->name(param).length()==2) { - numProlongatorFactories++; - continue; - } - } - TEUCHOS_TEST_FOR_EXCEPTION(numProlongatorFactories!=numCoarseNspFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: The user has to provide the same number of prolongator and coarse nullspace factories!"); - TEUCHOS_TEST_FOR_EXCEPTION(numPtentFactories!=numCoarseNspFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: The user has to provide the same number of ptent and coarse nullspace factories!"); - TEUCHOS_TEST_FOR_EXCEPTION(numProlongatorFactories < 2, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: The TogglePFactory needs at least two different prolongation operators. The factories have to be provided using the names P%i and Nullspace %i, where %i denotes a number between 1 and 9."); - - // create empty vectors with data - std::vector prolongatorFactoryNames(numProlongatorFactories); - std::vector coarseNspFactoryNames(numProlongatorFactories); - std::vector ptentFactoryNames(numProlongatorFactories); - - for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { - size_t foundNsp = transferFactories->name(param).find("Nullspace"); - if (foundNsp != std::string::npos && foundNsp == 0 && transferFactories->name(param).length()==10) { - int number = atoi(&(transferFactories->name(param).at(9))); - TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numProlongatorFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: Please use the format Nullspace%i with %i an integer between 1 and the maximum number of prolongation operators in TogglePFactory!"); - coarseNspFactoryNames[number-1] = transferFactories->entry(param); - continue; - } - size_t foundPtent = transferFactories->name(param).find("Ptent"); - if (foundPtent != std::string::npos && foundPtent == 0 && transferFactories->name(param).length()==6) { - int number = atoi(&(transferFactories->name(param).at(5))); - TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numPtentFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: Please use the format Ptent%i with %i an integer between 1 and the maximum number of prolongation operators in TogglePFactory!"); - ptentFactoryNames[number-1] = transferFactories->entry(param); - continue; - } - size_t foundP = transferFactories->name(param).find("P"); - if (foundP != std::string::npos && foundP == 0 && transferFactories->name(param).length()==2) { - int number = atoi(&(transferFactories->name(param).at(1))); - TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numProlongatorFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: Please use the format P%i with %i an integer between 1 and the maximum number of prolongation operators in TogglePFactory!"); - prolongatorFactoryNames[number-1] = transferFactories->entry(param); - continue; - } - } + } else { + RCP paramListNonConst = rcp(new Teuchos::ParameterList(paramList)); + RCP transferFactories = rcp(new Teuchos::ParameterList(*sublist(paramListNonConst, "TransferFactories"))); - // register all prolongation factories in TogglePFactory - for (std::vector::const_iterator it = prolongatorFactoryNames.begin(); it != prolongatorFactoryNames.end(); ++it) { - RCP p = BuildFactory(*it, factoryMapIn, factoryManagersIn); - factory->AddProlongatorFactory(p); - } + paramListNonConst->remove("TransferFactories"); - // register all tentative prolongation factories in TogglePFactory - for (std::vector::const_iterator it = ptentFactoryNames.begin(); it != ptentFactoryNames.end(); ++it) { - RCP p = BuildFactory(*it, factoryMapIn, factoryManagersIn); - factory->AddPtentFactory(p); - } + factory = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - // register all coarse nullspace factories in TogglePFactory - for (std::vector::const_iterator it = coarseNspFactoryNames.begin(); it != coarseNspFactoryNames.end(); ++it) { - RCP p = BuildFactory(*it, factoryMapIn, factoryManagersIn); - factory->AddCoarseNullspaceFactory(p); - } + for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { + RCP p = BuildFactory(transferFactories->entry(param), factoryMapIn, factoryManagersIn); + factory->AddTransferFactory(p); } - return factory; } - RCP BuildToggleCoordinatesTransferFactory(const Teuchos::ParameterList & paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP factory; - TEUCHOS_TEST_FOR_EXCEPTION(paramList.isSublist("TransferFactories") == false, Exceptions::RuntimeError, "FactoryFactory::BuildToggleCoordinatesTransferFactory: the ToggleCoordinatesTransferFactory needs a sublist 'TransferFactories' containing information about the subfactories for coordinate transfer!"); + return factory; + } - RCP paramListNonConst = rcp(new Teuchos::ParameterList(paramList)); + template // T must implement the Factory interface + RCP BuildTogglePFactory(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + RCP factory; + if (paramList.isSublist("TransferFactories") == false) { + // TODO put in an error message: the TogglePFactory needs a TransferFactories sublist! + factory = Build2(paramList, factoryMapIn, factoryManagersIn); + + } else { + RCP paramListNonConst = rcp(new Teuchos::ParameterList(paramList)); RCP transferFactories = rcp(new Teuchos::ParameterList(*sublist(paramListNonConst, "TransferFactories"))); + paramListNonConst->remove("TransferFactories"); - // build CoordinatesTransferFactory - factory = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); + // build TogglePFactory + factory = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - // count how many coordinate transfer factories have been declared. + // count how many prolongation factories and how many coarse null space factories have been declared. // the numbers must match! - int numCoordTransferFactories = 0; + int numProlongatorFactories = 0; + int numPtentFactories = 0; + int numCoarseNspFactories = 0; for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { - size_t foundCoordinates = transferFactories->name(param).find("Coordinates"); - if (foundCoordinates != std::string::npos && foundCoordinates == 0 && transferFactories->name(param).length()==12) { - numCoordTransferFactories++; + size_t foundNsp = transferFactories->name(param).find("Nullspace"); + if (foundNsp != std::string::npos && foundNsp == 0 && transferFactories->name(param).length() == 10) { + numCoarseNspFactories++; + continue; + } + size_t foundPtent = transferFactories->name(param).find("Ptent"); + if (foundPtent != std::string::npos && foundPtent == 0 && transferFactories->name(param).length() == 6) { + numPtentFactories++; + continue; + } + size_t foundP = transferFactories->name(param).find("P"); + if (foundP != std::string::npos && foundP == 0 && transferFactories->name(param).length() == 2) { + numProlongatorFactories++; continue; } } - TEUCHOS_TEST_FOR_EXCEPTION(numCoordTransferFactories != 2, Exceptions::RuntimeError, "FactoryFactory::BuildToggleCoordinatesTransfer: The ToggleCoordinatesTransferFactory needs two (different) coordinate transfer factories. The factories have to be provided using the names Coordinates%i, where %i denotes a number between 1 and 9."); + TEUCHOS_TEST_FOR_EXCEPTION(numProlongatorFactories != numCoarseNspFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: The user has to provide the same number of prolongator and coarse nullspace factories!"); + TEUCHOS_TEST_FOR_EXCEPTION(numPtentFactories != numCoarseNspFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: The user has to provide the same number of ptent and coarse nullspace factories!"); + TEUCHOS_TEST_FOR_EXCEPTION(numProlongatorFactories < 2, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: The TogglePFactory needs at least two different prolongation operators. The factories have to be provided using the names P%i and Nullspace %i, where %i denotes a number between 1 and 9."); // create empty vectors with data - std::vector coarseCoordsFactoryNames(numCoordTransferFactories); + std::vector prolongatorFactoryNames(numProlongatorFactories); + std::vector coarseNspFactoryNames(numProlongatorFactories); + std::vector ptentFactoryNames(numProlongatorFactories); for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { - size_t foundCoords = transferFactories->name(param).find("Coordinates"); - if (foundCoords != std::string::npos && foundCoords == 0 && transferFactories->name(param).length()==12) { - int number = atoi(&(transferFactories->name(param).at(11))); - TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numCoordTransferFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleCoordinatesTransfer: Please use the format Coordinates%i with %i an integer between 1 and the maximum number of coordinate transfer factories in ToggleCoordinatesTransferFactory!"); - coarseCoordsFactoryNames[number-1] = transferFactories->entry(param); - continue; + size_t foundNsp = transferFactories->name(param).find("Nullspace"); + if (foundNsp != std::string::npos && foundNsp == 0 && transferFactories->name(param).length() == 10) { + int number = atoi(&(transferFactories->name(param).at(9))); + TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numProlongatorFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: Please use the format Nullspace%i with %i an integer between 1 and the maximum number of prolongation operators in TogglePFactory!"); + coarseNspFactoryNames[number - 1] = transferFactories->entry(param); + continue; + } + size_t foundPtent = transferFactories->name(param).find("Ptent"); + if (foundPtent != std::string::npos && foundPtent == 0 && transferFactories->name(param).length() == 6) { + int number = atoi(&(transferFactories->name(param).at(5))); + TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numPtentFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: Please use the format Ptent%i with %i an integer between 1 and the maximum number of prolongation operators in TogglePFactory!"); + ptentFactoryNames[number - 1] = transferFactories->entry(param); + continue; + } + size_t foundP = transferFactories->name(param).find("P"); + if (foundP != std::string::npos && foundP == 0 && transferFactories->name(param).length() == 2) { + int number = atoi(&(transferFactories->name(param).at(1))); + TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numProlongatorFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: Please use the format P%i with %i an integer between 1 and the maximum number of prolongation operators in TogglePFactory!"); + prolongatorFactoryNames[number - 1] = transferFactories->entry(param); + continue; } } - // register all coarse nullspace factories in TogglePFactory - for (std::vector::const_iterator it = coarseCoordsFactoryNames.begin(); it != coarseCoordsFactoryNames.end(); ++it) { + // register all prolongation factories in TogglePFactory + for (std::vector::const_iterator it = prolongatorFactoryNames.begin(); it != prolongatorFactoryNames.end(); ++it) { RCP p = BuildFactory(*it, factoryMapIn, factoryManagersIn); - factory->AddCoordTransferFactory(p); + factory->AddProlongatorFactory(p); } - return factory; - } + // register all tentative prolongation factories in TogglePFactory + for (std::vector::const_iterator it = ptentFactoryNames.begin(); it != ptentFactoryNames.end(); ++it) { + RCP p = BuildFactory(*it, factoryMapIn, factoryManagersIn); + factory->AddPtentFactory(p); + } - //! TrilinosSmoother - // Parameter List Parsing: - // - // - // - // - // - // ... - // - // - RCP BuildTrilinosSmoother(const Teuchos::ParameterList & paramList, const FactoryMap & factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - if (paramList.begin() == paramList.end()) - return rcp(new SmootherFactory(rcp(new TrilinosSmoother()))); - - TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "TrilinosSmoother", Exceptions::RuntimeError, ""); - - // Is it true? TEUCHOS_TEST_FOR_EXCEPTION(!paramList.isParameter("type"), Exceptions::RuntimeError, "TrilinosSmoother: parameter 'type' is mandatory"); - // type="" is default in TrilinosSmoother, but what happen then? - - std::string type=""; if(paramList.isParameter("type")) type = paramList.get("type"); - int overlap=0; if(paramList.isParameter("overlap")) overlap = paramList.get ("overlap"); - // std::string verbose; if(paramList.isParameter("verbose")) verbose = paramList.get("verbose"); - Teuchos::ParameterList params; if(paramList.isParameter("ParameterList")) params = paramList.get("ParameterList"); - - // parameters from SmootherFactory - //bool bKeepSmootherData = false; if(paramList.isParameter("keep smoother data")) bKeepSmootherData = paramList.get("keep smoother data"); - - // Read in factory information for smoothers (if available...) - // NOTE: only a selected number of factories can be used with the Trilinos smoother - // smoothers usually work with the global data available (which is A and the transfers P and R) - - Teuchos::RCP trilSmoo = Teuchos::rcp(new TrilinosSmoother(type, params, overlap)); - - if (paramList.isParameter("LineDetection_Layers")) { - RCP generatingFact = BuildFactory(paramList.getEntry("LineDetection_Layers"), factoryMapIn, factoryManagersIn); - trilSmoo->SetFactory("LineDetection_Layers", generatingFact); + // register all coarse nullspace factories in TogglePFactory + for (std::vector::const_iterator it = coarseNspFactoryNames.begin(); it != coarseNspFactoryNames.end(); ++it) { + RCP p = BuildFactory(*it, factoryMapIn, factoryManagersIn); + factory->AddCoarseNullspaceFactory(p); } - if (paramList.isParameter("LineDetection_VertLineIds")) { - RCP generatingFact = BuildFactory(paramList.getEntry("LineDetection_Layers"), factoryMapIn, factoryManagersIn); - trilSmoo->SetFactory("LineDetection_Layers", generatingFact); + } + return factory; + } + + RCP BuildToggleCoordinatesTransferFactory(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + RCP factory; + TEUCHOS_TEST_FOR_EXCEPTION(paramList.isSublist("TransferFactories") == false, Exceptions::RuntimeError, "FactoryFactory::BuildToggleCoordinatesTransferFactory: the ToggleCoordinatesTransferFactory needs a sublist 'TransferFactories' containing information about the subfactories for coordinate transfer!"); + + RCP paramListNonConst = rcp(new Teuchos::ParameterList(paramList)); + RCP transferFactories = rcp(new Teuchos::ParameterList(*sublist(paramListNonConst, "TransferFactories"))); + paramListNonConst->remove("TransferFactories"); + + // build CoordinatesTransferFactory + factory = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); + + // count how many coordinate transfer factories have been declared. + // the numbers must match! + int numCoordTransferFactories = 0; + for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { + size_t foundCoordinates = transferFactories->name(param).find("Coordinates"); + if (foundCoordinates != std::string::npos && foundCoordinates == 0 && transferFactories->name(param).length() == 12) { + numCoordTransferFactories++; + continue; } - if (paramList.isParameter("CoarseNumZLayers")) { - RCP generatingFact = BuildFactory(paramList.getEntry("CoarseNumZLayers"), factoryMapIn, factoryManagersIn); - trilSmoo->SetFactory("CoarseNumZLayers", generatingFact); + } + TEUCHOS_TEST_FOR_EXCEPTION(numCoordTransferFactories != 2, Exceptions::RuntimeError, "FactoryFactory::BuildToggleCoordinatesTransfer: The ToggleCoordinatesTransferFactory needs two (different) coordinate transfer factories. The factories have to be provided using the names Coordinates%i, where %i denotes a number between 1 and 9."); + + // create empty vectors with data + std::vector coarseCoordsFactoryNames(numCoordTransferFactories); + + for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { + size_t foundCoords = transferFactories->name(param).find("Coordinates"); + if (foundCoords != std::string::npos && foundCoords == 0 && transferFactories->name(param).length() == 12) { + int number = atoi(&(transferFactories->name(param).at(11))); + TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numCoordTransferFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleCoordinatesTransfer: Please use the format Coordinates%i with %i an integer between 1 and the maximum number of coordinate transfer factories in ToggleCoordinatesTransferFactory!"); + coarseCoordsFactoryNames[number - 1] = transferFactories->entry(param); + continue; } - - RCP smooFact = rcp(new SmootherFactory(Teuchos::null)); - Teuchos::ParameterList smooFactParams; - //smooFactParams.setEntry("keep smoother data", paramList.getEntry("keep smoother data")); - smooFact->SetParameterList(smooFactParams); - smooFact->SetSmootherPrototypes(trilSmoo); - return smooFact; } -#ifdef HAVE_MUELU_MATLAB - //! MatlabSmoother - // Parameter List Parsing: - // - // - // - // - // - // - // - // - // - RCP BuildMatlabSmoother(const Teuchos::ParameterList & paramList, const FactoryMap & factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - if (paramList.begin() == paramList.end()) - return rcp(new SmootherFactory(rcp(new MatlabSmoother()))); - - TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "MatlabSmoother", Exceptions::RuntimeError, ""); - - // Read in factory information for smoothers (if available...) - // NOTE: only a selected number of factories can be used with the Trilinos smoother - // smoothers usually work with the global data available (which is A and the transfers P and R) - - Teuchos::RCP matSmoo = Teuchos::rcp(new MatlabSmoother(paramList)); - - return rcp(new SmootherFactory(matSmoo)); + // register all coarse nullspace factories in TogglePFactory + for (std::vector::const_iterator it = coarseCoordsFactoryNames.begin(); it != coarseCoordsFactoryNames.end(); ++it) { + RCP p = BuildFactory(*it, factoryMapIn, factoryManagersIn); + factory->AddCoordTransferFactory(p); } -#endif - - RCP BuildDirectSolver(const Teuchos::ParameterList& paramList, const FactoryMap& /* factoryMapIn */, const FactoryManagerMap& /* factoryManagersIn */) const { - if (paramList.begin() == paramList.end()) - return rcp(new SmootherFactory(rcp(new DirectSolver()), Teuchos::null)); - - TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "DirectSolver", Exceptions::RuntimeError, ""); - std::string type; if(paramList.isParameter("type")) type = paramList.get("type"); - // std::string verbose; if(paramList.isParameter("verbose")) verbose = paramList.get("verbose"); - Teuchos::ParameterList params; if(paramList.isParameter("ParameterList")) params = paramList.get("ParameterList"); - - return rcp(new SmootherFactory(rcp(new DirectSolver(type, params)), Teuchos::null)); + return factory; + } + + //! TrilinosSmoother + // Parameter List Parsing: + // + // + // + // + // + // ... + // + // + RCP BuildTrilinosSmoother(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + if (paramList.begin() == paramList.end()) + return rcp(new SmootherFactory(rcp(new TrilinosSmoother()))); + + TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "TrilinosSmoother", Exceptions::RuntimeError, ""); + + // Is it true? TEUCHOS_TEST_FOR_EXCEPTION(!paramList.isParameter("type"), Exceptions::RuntimeError, "TrilinosSmoother: parameter 'type' is mandatory"); + // type="" is default in TrilinosSmoother, but what happen then? + + std::string type = ""; + if (paramList.isParameter("type")) type = paramList.get("type"); + int overlap = 0; + if (paramList.isParameter("overlap")) overlap = paramList.get("overlap"); + // std::string verbose; if(paramList.isParameter("verbose")) verbose = paramList.get("verbose"); + Teuchos::ParameterList params; + if (paramList.isParameter("ParameterList")) params = paramList.get("ParameterList"); + + // parameters from SmootherFactory + // bool bKeepSmootherData = false; if(paramList.isParameter("keep smoother data")) bKeepSmootherData = paramList.get("keep smoother data"); + + // Read in factory information for smoothers (if available...) + // NOTE: only a selected number of factories can be used with the Trilinos smoother + // smoothers usually work with the global data available (which is A and the transfers P and R) + + Teuchos::RCP trilSmoo = Teuchos::rcp(new TrilinosSmoother(type, params, overlap)); + + if (paramList.isParameter("LineDetection_Layers")) { + RCP generatingFact = BuildFactory(paramList.getEntry("LineDetection_Layers"), factoryMapIn, factoryManagersIn); + trilSmoo->SetFactory("LineDetection_Layers", generatingFact); + } + if (paramList.isParameter("LineDetection_VertLineIds")) { + RCP generatingFact = BuildFactory(paramList.getEntry("LineDetection_Layers"), factoryMapIn, factoryManagersIn); + trilSmoo->SetFactory("LineDetection_Layers", generatingFact); + } + if (paramList.isParameter("CoarseNumZLayers")) { + RCP generatingFact = BuildFactory(paramList.getEntry("CoarseNumZLayers"), factoryMapIn, factoryManagersIn); + trilSmoo->SetFactory("CoarseNumZLayers", generatingFact); } - template // T must implement the Factory interface - RCP BuildBlockedSmoother(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - // read in sub lists - RCP paramListNonConst = rcp(new ParameterList(paramList)); - - // internal vector of factory managers - std::vector > facManagers; - - // loop over all "block%i" sublists in parameter list - int blockid = 1; - bool blockExists = true; - while (blockExists == true) { - std::stringstream ss; - ss << "block" << blockid; - - if(paramList.isSublist(ss.str()) == true) { - // we either have a parameter group or we have a list of factories in here - RCP b = rcp(new ParameterList(*sublist(paramListNonConst, ss.str()))); - - RCP M = Teuchos::null; - - if (b->isParameter("group")) { - // use a factory manager - std::string facManagerName = b->get< std::string >("group"); - TEUCHOS_TEST_FOR_EXCEPTION(factoryManagersIn.count(facManagerName) != 1, Exceptions::RuntimeError, "Factory manager has not been found. Please check the spelling of the factory managers in your xml file."); - RCP Mb = factoryManagersIn.find(facManagerName)->second; - M = Teuchos::rcp_dynamic_cast(Mb); - TEUCHOS_TEST_FOR_EXCEPTION(M==Teuchos::null, Exceptions::RuntimeError, "Failed to cast FactoryManagerBase object to FactoryManager."); - } else { - // read in the list of factories - M = rcp(new FactoryManager()); - for (ParameterList::ConstIterator param = b->begin(); param != b->end(); ++param) { - RCP p = BuildFactory(b->entry(param), factoryMapIn, factoryManagersIn); - M->SetFactory(b->name(param),p); - } - } + RCP smooFact = rcp(new SmootherFactory(Teuchos::null)); + Teuchos::ParameterList smooFactParams; + // smooFactParams.setEntry("keep smoother data", paramList.getEntry("keep smoother data")); + smooFact->SetParameterList(smooFactParams); + smooFact->SetSmootherPrototypes(trilSmoo); + return smooFact; + } - // add factory manager to internal vector of factory managers - M->SetIgnoreUserData(true); - facManagers.push_back(M); - paramListNonConst->remove(ss.str()); - blockid++; +#ifdef HAVE_MUELU_MATLAB + //! MatlabSmoother + // Parameter List Parsing: + // + // + // + // + // + // + // + // + // + RCP BuildMatlabSmoother(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + if (paramList.begin() == paramList.end()) + return rcp(new SmootherFactory(rcp(new MatlabSmoother()))); + + TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "MatlabSmoother", Exceptions::RuntimeError, ""); + + // Read in factory information for smoothers (if available...) + // NOTE: only a selected number of factories can be used with the Trilinos smoother + // smoothers usually work with the global data available (which is A and the transfers P and R) + + Teuchos::RCP matSmoo = Teuchos::rcp(new MatlabSmoother(paramList)); + + return rcp(new SmootherFactory(matSmoo)); + } +#endif + + RCP BuildDirectSolver(const Teuchos::ParameterList& paramList, const FactoryMap& /* factoryMapIn */, const FactoryManagerMap& /* factoryManagersIn */) const { + if (paramList.begin() == paramList.end()) + return rcp(new SmootherFactory(rcp(new DirectSolver()), Teuchos::null)); + + TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "DirectSolver", Exceptions::RuntimeError, ""); + + std::string type; + if (paramList.isParameter("type")) type = paramList.get("type"); + // std::string verbose; if(paramList.isParameter("verbose")) verbose = paramList.get("verbose"); + Teuchos::ParameterList params; + if (paramList.isParameter("ParameterList")) params = paramList.get("ParameterList"); + + return rcp(new SmootherFactory(rcp(new DirectSolver(type, params)), Teuchos::null)); + } + + template // T must implement the Factory interface + RCP BuildBlockedSmoother(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + // read in sub lists + RCP paramListNonConst = rcp(new ParameterList(paramList)); + + // internal vector of factory managers + std::vector > facManagers; + + // loop over all "block%i" sublists in parameter list + int blockid = 1; + bool blockExists = true; + while (blockExists == true) { + std::stringstream ss; + ss << "block" << blockid; + + if (paramList.isSublist(ss.str()) == true) { + // we either have a parameter group or we have a list of factories in here + RCP b = rcp(new ParameterList(*sublist(paramListNonConst, ss.str()))); + + RCP M = Teuchos::null; + + if (b->isParameter("group")) { + // use a factory manager + std::string facManagerName = b->get("group"); + TEUCHOS_TEST_FOR_EXCEPTION(factoryManagersIn.count(facManagerName) != 1, Exceptions::RuntimeError, "Factory manager has not been found. Please check the spelling of the factory managers in your xml file."); + RCP Mb = factoryManagersIn.find(facManagerName)->second; + M = Teuchos::rcp_dynamic_cast(Mb); + TEUCHOS_TEST_FOR_EXCEPTION(M == Teuchos::null, Exceptions::RuntimeError, "Failed to cast FactoryManagerBase object to FactoryManager."); } else { - blockExists = false; - break; + // read in the list of factories + M = rcp(new FactoryManager()); + for (ParameterList::ConstIterator param = b->begin(); param != b->end(); ++param) { + RCP p = BuildFactory(b->entry(param), factoryMapIn, factoryManagersIn); + M->SetFactory(b->name(param), p); + } } + // add factory manager to internal vector of factory managers + M->SetIgnoreUserData(true); + facManagers.push_back(M); + paramListNonConst->remove(ss.str()); + blockid++; + } else { + blockExists = false; + break; } + } - // create a new blocked smoother - RCP bs = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - - // important: set block factory for A here! - // TAW: 7/6/2016: We should not need to set/hardcode the blocked operator here. - // The user might want to overwrite this in the xml file, so just - // use what is declared as "A" - //bs->SetFactory("A", MueLu::NoFactory::getRCP()); + // create a new blocked smoother + RCP bs = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - for (int i = 0; i(facManagers.size()); i++) { - bs->AddFactoryManager(facManagers[i],i); - } + // important: set block factory for A here! + // TAW: 7/6/2016: We should not need to set/hardcode the blocked operator here. + // The user might want to overwrite this in the xml file, so just + // use what is declared as "A" + // bs->SetFactory("A", MueLu::NoFactory::getRCP()); - return rcp(new SmootherFactory(bs)); + for (int i = 0; i < Teuchos::as(facManagers.size()); i++) { + bs->AddFactoryManager(facManagers[i], i); } + return rcp(new SmootherFactory(bs)); + } + #ifdef HAVE_MUELU_TEKO - RCP BuildTekoSmoother(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - // read in sub lists - RCP paramListNonConst = rcp(new ParameterList(paramList)); - RCP tekoParams = rcp(new ParameterList(paramListNonConst->sublist("Inverse Factory Library"))); - paramListNonConst->remove("Inverse Factory Library"); + RCP BuildTekoSmoother(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + // read in sub lists + RCP paramListNonConst = rcp(new ParameterList(paramList)); + RCP tekoParams = rcp(new ParameterList(paramListNonConst->sublist("Inverse Factory Library"))); + paramListNonConst->remove("Inverse Factory Library"); + + // create a new blocked smoother + RCP bs = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); + + // important: set block factory for A here! + // TAW: 7/6/2016: We should not need to set/hardcode the blocked operator here. + // The user might want to overwrite this in the xml file, so just + // use what is declared as "A" + // bs->SetFactory("A", MueLu::NoFactory::getRCP()); + + // Set Teko parameters ("Inverse Factory Library") + bs->SetTekoParameters(tekoParams); + + return rcp(new SmootherFactory(bs)); + } +#endif - // create a new blocked smoother - RCP bs = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); + RCP BuildBlockedDirectSolver(const Teuchos::ParameterList& paramList, const FactoryMap& /* factoryMapIn */, const FactoryManagerMap& /* factoryManagersIn */) const { + if (paramList.numParams() == 0) + return rcp(new SmootherFactory(rcp(new BlockedDirectSolver()))); - // important: set block factory for A here! - // TAW: 7/6/2016: We should not need to set/hardcode the blocked operator here. - // The user might want to overwrite this in the xml file, so just - // use what is declared as "A" - //bs->SetFactory("A", MueLu::NoFactory::getRCP()); + TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "BlockedDirectSolver", Exceptions::RuntimeError, "FactoryFactory::BuildBlockedDirectSolver: Generating factory needs to be a BlockedDirectSolver."); - // Set Teko parameters ("Inverse Factory Library") - bs->SetTekoParameters(tekoParams); + std::string type; + if (paramList.isParameter("type")) type = paramList.get("type"); + // std::string verbose; if(paramList.isParameter("verbose")) verbose = paramList.get("verbose"); + Teuchos::ParameterList params; + if (paramList.isParameter("ParameterList")) params = paramList.get("ParameterList"); - return rcp(new SmootherFactory(bs)); - } -#endif + return rcp(new SmootherFactory(rcp(new BlockedDirectSolver(type, params)))); + } - RCP BuildBlockedDirectSolver(const Teuchos::ParameterList& paramList, const FactoryMap& /* factoryMapIn */, const FactoryManagerMap& /* factoryManagersIn */) const { - if (paramList.numParams() == 0) - return rcp(new SmootherFactory(rcp(new BlockedDirectSolver()))); + // RCP BuildBlockedPFactory(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + // RCP pfac = rcp(new BlockedPFactory()); - TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "BlockedDirectSolver", Exceptions::RuntimeError, "FactoryFactory::BuildBlockedDirectSolver: Generating factory needs to be a BlockedDirectSolver."); + template // T must implement the Factory interface + RCP BuildBlockedFactory(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + RCP pfac = Teuchos::null; - std::string type; if(paramList.isParameter("type")) type = paramList.get("type"); - // std::string verbose; if(paramList.isParameter("verbose")) verbose = paramList.get("verbose"); - Teuchos::ParameterList params; if(paramList.isParameter("ParameterList")) params = paramList.get("ParameterList"); + // read in sub lists + RCP paramListNonConst = rcp(new ParameterList(paramList)); - return rcp(new SmootherFactory(rcp(new BlockedDirectSolver(type, params)))); - } + // internal vector of factory managers + std::vector > facManagers; - //RCP BuildBlockedPFactory(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - // RCP pfac = rcp(new BlockedPFactory()); - - template // T must implement the Factory interface - RCP BuildBlockedFactory(const Teuchos::ParameterList & paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP pfac = Teuchos::null; - - // read in sub lists - RCP paramListNonConst = rcp(new ParameterList(paramList)); - - // internal vector of factory managers - std::vector > facManagers; - - // loop over all "block%i" sublists in parameter list - int blockid = 1; - bool blockExists = true; - while (blockExists == true) { - std::stringstream ss; - ss << "block" << blockid; - - if(paramList.isSublist(ss.str()) == true) { - // we either have a parameter group or we have a list of factories in here - RCP b = rcp(new ParameterList(*sublist(paramListNonConst, ss.str()))); - - RCP M = Teuchos::null; - - if (b->isParameter("group")) { - // use a factory manager - std::string facManagerName = b->get< std::string >("group"); - TEUCHOS_TEST_FOR_EXCEPTION(factoryManagersIn.count(facManagerName) != 1, Exceptions::RuntimeError, "Factory manager has not been found. Please check the spelling of the factory managers in your xml file."); - RCP Mb = factoryManagersIn.find(facManagerName)->second; - M = Teuchos::rcp_dynamic_cast(Mb); - TEUCHOS_TEST_FOR_EXCEPTION(M==Teuchos::null, Exceptions::RuntimeError, "Failed to cast FactoryManagerBase object to FactoryManager."); - } else { - // read in the list of factories - M = rcp(new FactoryManager()); - for (ParameterList::ConstIterator param = b->begin(); param != b->end(); ++param) { - RCP p = BuildFactory(b->entry(param), factoryMapIn, factoryManagersIn); - M->SetFactory(b->name(param),p); - } - } + // loop over all "block%i" sublists in parameter list + int blockid = 1; + bool blockExists = true; + while (blockExists == true) { + std::stringstream ss; + ss << "block" << blockid; + + if (paramList.isSublist(ss.str()) == true) { + // we either have a parameter group or we have a list of factories in here + RCP b = rcp(new ParameterList(*sublist(paramListNonConst, ss.str()))); - // add factory manager to internal vector of factory managers - M->SetIgnoreUserData(true); - facManagers.push_back(M); - paramListNonConst->remove(ss.str()); - blockid++; + RCP M = Teuchos::null; + + if (b->isParameter("group")) { + // use a factory manager + std::string facManagerName = b->get("group"); + TEUCHOS_TEST_FOR_EXCEPTION(factoryManagersIn.count(facManagerName) != 1, Exceptions::RuntimeError, "Factory manager has not been found. Please check the spelling of the factory managers in your xml file."); + RCP Mb = factoryManagersIn.find(facManagerName)->second; + M = Teuchos::rcp_dynamic_cast(Mb); + TEUCHOS_TEST_FOR_EXCEPTION(M == Teuchos::null, Exceptions::RuntimeError, "Failed to cast FactoryManagerBase object to FactoryManager."); } else { - blockExists = false; - break; + // read in the list of factories + M = rcp(new FactoryManager()); + for (ParameterList::ConstIterator param = b->begin(); param != b->end(); ++param) { + RCP p = BuildFactory(b->entry(param), factoryMapIn, factoryManagersIn); + M->SetFactory(b->name(param), p); + } } + // add factory manager to internal vector of factory managers + M->SetIgnoreUserData(true); + facManagers.push_back(M); + paramListNonConst->remove(ss.str()); + blockid++; + } else { + blockExists = false; + break; } + } - // build BlockedPFactory (without sub block information) - pfac = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - - // add FactoryManager objects - for(size_t i = 0; iAddFactoryManager(facManagers[i]); // add factory manager - } + // build BlockedPFactory (without sub block information) + pfac = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - return pfac; + // add FactoryManager objects + for (size_t i = 0; i < facManagers.size(); i++) { + pfac->AddFactoryManager(facManagers[i]); // add factory manager } + return pfac; + } - template // T must implement the Factory interface - RCP BuildBlockedCoordFactory(const Teuchos::ParameterList & paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP pfac = Teuchos::null; - - // read in sub lists - RCP paramListNonConst = rcp(new ParameterList(paramList)); + template // T must implement the Factory interface + RCP BuildBlockedCoordFactory(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + RCP pfac = Teuchos::null; - // internal vector of factory managers - std::vector > facBase; + // read in sub lists + RCP paramListNonConst = rcp(new ParameterList(paramList)); - // loop over all "block%i" sublists in parameter list - int blockid = 1; - bool blockExists = true; - while (blockExists == true) { - std::stringstream ss; - ss << "block" << blockid; + // internal vector of factory managers + std::vector > facBase; - if(paramList.isSublist(ss.str()) == true) { - // we either have a parameter group or we have a list of factories in here - RCP b = rcp(new ParameterList(*sublist(paramListNonConst, ss.str()))); + // loop over all "block%i" sublists in parameter list + int blockid = 1; + bool blockExists = true; + while (blockExists == true) { + std::stringstream ss; + ss << "block" << blockid; - // read in the list of factories - for (ParameterList::ConstIterator param = b->begin(); param != b->end(); ++param) { - RCP p = BuildFactory(b->entry(param), factoryMapIn, factoryManagersIn); - facBase.push_back(p); - } + if (paramList.isSublist(ss.str()) == true) { + // we either have a parameter group or we have a list of factories in here + RCP b = rcp(new ParameterList(*sublist(paramListNonConst, ss.str()))); - // add factory manager to internal vector of factory managers - paramListNonConst->remove(ss.str()); - blockid++; - } else { - blockExists = false; - break; + // read in the list of factories + for (ParameterList::ConstIterator param = b->begin(); param != b->end(); ++param) { + RCP p = BuildFactory(b->entry(param), factoryMapIn, factoryManagersIn); + facBase.push_back(p); } + // add factory manager to internal vector of factory managers + paramListNonConst->remove(ss.str()); + blockid++; + } else { + blockExists = false; + break; } + } - // build BlockedPFactory (without sub block information) - pfac = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - - // add FactoryManager objects - for(size_t i = 0; iAddFactory(facBase[i]); // add factory manager - } + // build BlockedPFactory (without sub block information) + pfac = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - return pfac; + // add FactoryManager objects + for (size_t i = 0; i < facBase.size(); i++) { + pfac->AddFactory(facBase[i]); // add factory manager } - }; // class -} // namespace MueLu + return pfac; + } + +}; // class +} // namespace MueLu #define MUELU_FACTORYFACTORY_SHORT -#endif // MUELU_FACTORYFACTORY_DECL_HPP +#endif // MUELU_FACTORYFACTORY_DECL_HPP - // TODO: handle factory parameters - // TODO: parameter validator - // TODO: static - // TODO: default parameters should not be duplicated here and on the Factory (ex: default for overlap (=0) is defined both here and on TrilinosSmoother constructors) +// TODO: handle factory parameters +// TODO: parameter validator +// TODO: static +// TODO: default parameters should not be duplicated here and on the Factory (ex: default for overlap (=0) is defined both here and on TrilinosSmoother constructors) diff --git a/packages/muelu/src/Interface/MueLu_HierarchyFactory.hpp b/packages/muelu/src/Interface/MueLu_HierarchyFactory.hpp index 03e2596daf12..a82cad42cc47 100644 --- a/packages/muelu/src/Interface/MueLu_HierarchyFactory.hpp +++ b/packages/muelu/src/Interface/MueLu_HierarchyFactory.hpp @@ -55,42 +55,42 @@ namespace MueLu { - //! - template - class HierarchyFactory : public BaseClass { +//! +template +class HierarchyFactory : public BaseClass { #undef MUELU_HIERARCHYFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //@{ Constructors/Destructors. + public: + //@{ Constructors/Destructors. - //! Destructor. - virtual ~HierarchyFactory() { } + //! Destructor. + virtual ~HierarchyFactory() {} - //@} + //@} - //! Create an empty Hierarchy object - // Note: This function is not very useful at the moment as MueLu only have on Hierarchy class. - // In the future, we might have an abstract Hierarchy class and several derived Hierarchy classes. - // Using this function will then be the recommended way to generate a Hierarchy. - // - // This method is called Create() instead of Build(), because it return an non-initialized - // object (ie: MG setup is not done). - // Build() function in MueLu returns initialized objects. - virtual RCP CreateHierarchy() const = 0; + //! Create an empty Hierarchy object + // Note: This function is not very useful at the moment as MueLu only have on Hierarchy class. + // In the future, we might have an abstract Hierarchy class and several derived Hierarchy classes. + // Using this function will then be the recommended way to generate a Hierarchy. + // + // This method is called Create() instead of Build(), because it return an non-initialized + // object (ie: MG setup is not done). + // Build() function in MueLu returns initialized objects. + virtual RCP CreateHierarchy() const = 0; - //! Create a labeled empty Hierarchy object - virtual RCP CreateHierarchy(const std::string& label) const = 0; + //! Create a labeled empty Hierarchy object + virtual RCP CreateHierarchy(const std::string& label) const = 0; - //! Setup Hierarchy object - virtual void SetupHierarchy(Hierarchy & H) const = 0; + //! Setup Hierarchy object + virtual void SetupHierarchy(Hierarchy& H) const = 0; - }; // class HierarchyFactoryBase +}; // class HierarchyFactoryBase -} // namespace MueLu +} // namespace MueLu #define MUELU_HIERARCHYFACTORY_SHORT -#endif //ifndef MUELU_HIERARCHYFACTORY_HPP +#endif // ifndef MUELU_HIERARCHYFACTORY_HPP diff --git a/packages/muelu/src/Interface/MueLu_HierarchyManager.hpp b/packages/muelu/src/Interface/MueLu_HierarchyManager.hpp index 04f89f47e6ae..e38e8cc1c221 100644 --- a/packages/muelu/src/Interface/MueLu_HierarchyManager.hpp +++ b/packages/muelu/src/Interface/MueLu_HierarchyManager.hpp @@ -70,471 +70,456 @@ namespace MueLu { - // This class stores the configuration of a Hierarchy. - // The class also provides an algorithm to build a Hierarchy from the configuration. - // - // See also: FactoryManager - // - template - class HierarchyManager : public HierarchyFactory { +// This class stores the configuration of a Hierarchy. +// The class also provides an algorithm to build a Hierarchy from the configuration. +// +// See also: FactoryManager +// +template +class HierarchyManager : public HierarchyFactory { #undef MUELU_HIERARCHYMANAGER_SHORT #include "MueLu_UseShortNames.hpp" - typedef std::pair keep_pair; - - public: - - //! Constructor - HierarchyManager(int numDesiredLevel = MasterList::getDefault("max levels")) : - numDesiredLevel_(numDesiredLevel), - maxCoarseSize_(MasterList::getDefault("coarse: max size")), - verbosity_(Medium), - doPRrebalance_(MasterList::getDefault("repartition: rebalance P and R")), - doPRViaCopyrebalance_(MasterList::getDefault("repartition: explicit via new copy rebalance P and R")), - implicitTranspose_(MasterList::getDefault("transpose: use implicit")), - fuseProlongationAndUpdate_(MasterList::getDefault("fuse prolongation and update")), - suppressNullspaceDimensionCheck_(MasterList::getDefault("nullspace: suppress dimension check")), - sizeOfMultiVectors_(MasterList::getDefault("number of vectors")), - graphOutputLevel_(-2) { } - - //! Destructor - virtual ~HierarchyManager() = default; - - //! - void AddFactoryManager(int startLevel, int numDesiredLevel, RCP manager) { - const int lastLevel = startLevel + numDesiredLevel - 1; - if (levelManagers_.size() < lastLevel + 1) - levelManagers_.resize(lastLevel + 1); - - for (int iLevel = startLevel; iLevel <= lastLevel; iLevel++) - levelManagers_[iLevel] = manager; - } - - //! - RCP GetFactoryManager(int levelID) const { - // NOTE: last levelManager is used for all the remaining levels - return (levelID >= levelManagers_.size() ? levelManagers_[levelManagers_.size()-1] : levelManagers_[levelID]); - } - - //! returns number of factory managers stored in levelManagers_ vector. - size_t getNumFactoryManagers() const { - return levelManagers_.size(); - } - - //! - void CheckConfig() { - for (int i = 0; i < levelManagers_.size(); i++) - TEUCHOS_TEST_FOR_EXCEPTION(levelManagers_[i] == Teuchos::null, Exceptions::RuntimeError, "MueLu:HierarchyConfig::CheckConfig(): Undefined configuration for level:"); - } - - //@{ - - virtual RCP CreateHierarchy() const { - return rcp(new Hierarchy()); - } - - virtual RCP CreateHierarchy(const std::string& label) const { - return rcp(new Hierarchy(label)); - } - - //! Setup Hierarchy object - virtual void SetupHierarchy(Hierarchy& H) const { - TEUCHOS_TEST_FOR_EXCEPTION(!H.GetLevel(0)->IsAvailable("A"), Exceptions::RuntimeError, "No fine level operator"); - - RCP l0 = H.GetLevel(0); - RCP Op = l0->Get>("A"); - - // Compare nullspace dimension to NumPDEs and throw/warn based on user input - if (l0->IsAvailable("Nullspace")) { - RCP A = Teuchos::rcp_dynamic_cast(Op); - if (A != Teuchos::null) { - RCP nullspace = l0->Get>("Nullspace"); - - if (static_cast(A->GetFixedBlockSize()) > nullspace->getNumVectors()) - { - std::stringstream msg; - msg << "User-provided nullspace has fewer vectors (" - << nullspace->getNumVectors() << ") than number of PDE equations (" - << A->GetFixedBlockSize() << "). "; - - if (suppressNullspaceDimensionCheck_) - { - msg << "It depends on the PDE, if this is a problem or not."; - this->GetOStream(Warnings0) << msg.str() << std::endl; - } - else - { - msg << "Add the missing nullspace vectors! (You can suppress this check. See the MueLu user guide for details.)"; - TEUCHOS_TEST_FOR_EXCEPTION(static_cast(A->GetFixedBlockSize()) > nullspace->getNumVectors(), Exceptions::RuntimeError, msg.str()); - } + typedef std::pair keep_pair; + + public: + //! Constructor + HierarchyManager(int numDesiredLevel = MasterList::getDefault("max levels")) + : numDesiredLevel_(numDesiredLevel) + , maxCoarseSize_(MasterList::getDefault("coarse: max size")) + , verbosity_(Medium) + , doPRrebalance_(MasterList::getDefault("repartition: rebalance P and R")) + , doPRViaCopyrebalance_(MasterList::getDefault("repartition: explicit via new copy rebalance P and R")) + , implicitTranspose_(MasterList::getDefault("transpose: use implicit")) + , fuseProlongationAndUpdate_(MasterList::getDefault("fuse prolongation and update")) + , suppressNullspaceDimensionCheck_(MasterList::getDefault("nullspace: suppress dimension check")) + , sizeOfMultiVectors_(MasterList::getDefault("number of vectors")) + , graphOutputLevel_(-2) {} + + //! Destructor + virtual ~HierarchyManager() = default; + + //! + void AddFactoryManager(int startLevel, int numDesiredLevel, RCP manager) { + const int lastLevel = startLevel + numDesiredLevel - 1; + if (levelManagers_.size() < lastLevel + 1) + levelManagers_.resize(lastLevel + 1); + + for (int iLevel = startLevel; iLevel <= lastLevel; iLevel++) + levelManagers_[iLevel] = manager; + } + + //! + RCP GetFactoryManager(int levelID) const { + // NOTE: last levelManager is used for all the remaining levels + return (levelID >= levelManagers_.size() ? levelManagers_[levelManagers_.size() - 1] : levelManagers_[levelID]); + } + + //! returns number of factory managers stored in levelManagers_ vector. + size_t getNumFactoryManagers() const { + return levelManagers_.size(); + } + + //! + void CheckConfig() { + for (int i = 0; i < levelManagers_.size(); i++) + TEUCHOS_TEST_FOR_EXCEPTION(levelManagers_[i] == Teuchos::null, Exceptions::RuntimeError, "MueLu:HierarchyConfig::CheckConfig(): Undefined configuration for level:"); + } + + //@{ + + virtual RCP CreateHierarchy() const { + return rcp(new Hierarchy()); + } + + virtual RCP CreateHierarchy(const std::string& label) const { + return rcp(new Hierarchy(label)); + } + + //! Setup Hierarchy object + virtual void SetupHierarchy(Hierarchy& H) const { + TEUCHOS_TEST_FOR_EXCEPTION(!H.GetLevel(0)->IsAvailable("A"), Exceptions::RuntimeError, "No fine level operator"); + + RCP l0 = H.GetLevel(0); + RCP Op = l0->Get>("A"); + + // Compare nullspace dimension to NumPDEs and throw/warn based on user input + if (l0->IsAvailable("Nullspace")) { + RCP A = Teuchos::rcp_dynamic_cast(Op); + if (A != Teuchos::null) { + RCP nullspace = l0->Get>("Nullspace"); + + if (static_cast(A->GetFixedBlockSize()) > nullspace->getNumVectors()) { + std::stringstream msg; + msg << "User-provided nullspace has fewer vectors (" + << nullspace->getNumVectors() << ") than number of PDE equations (" + << A->GetFixedBlockSize() << "). "; + + if (suppressNullspaceDimensionCheck_) { + msg << "It depends on the PDE, if this is a problem or not."; + this->GetOStream(Warnings0) << msg.str() << std::endl; + } else { + msg << "Add the missing nullspace vectors! (You can suppress this check. See the MueLu user guide for details.)"; + TEUCHOS_TEST_FOR_EXCEPTION(static_cast(A->GetFixedBlockSize()) > nullspace->getNumVectors(), Exceptions::RuntimeError, msg.str()); } - } else { - this->GetOStream(Warnings0) << "Skipping dimension check of user-supplied nullspace because user-supplied operator is not a matrix" << std::endl; } + } else { + this->GetOStream(Warnings0) << "Skipping dimension check of user-supplied nullspace because user-supplied operator is not a matrix" << std::endl; } + } #ifdef HAVE_MUELU_DEBUG - // Reset factories' data used for debugging - for (int i = 0; i < levelManagers_.size(); i++) - levelManagers_[i]->ResetDebugData(); + // Reset factories' data used for debugging + for (int i = 0; i < levelManagers_.size(); i++) + levelManagers_[i]->ResetDebugData(); #endif - // Setup Matrix - // TODO: I should certainly undo this somewhere... + // Setup Matrix + // TODO: I should certainly undo this somewhere... - Xpetra::UnderlyingLib lib = Op->getDomainMap()->lib(); - H.setlib(lib); + Xpetra::UnderlyingLib lib = Op->getDomainMap()->lib(); + H.setlib(lib); - SetupOperator(*Op); - SetupExtra(H); + SetupOperator(*Op); + SetupExtra(H); - // Setup Hierarchy - H.SetMaxCoarseSize(maxCoarseSize_); - VerboseObject::SetDefaultVerbLevel(verbosity_); - if (graphOutputLevel_ >= 0 || graphOutputLevel_ == -1) - H.EnableGraphDumping("dep_graph", graphOutputLevel_); + // Setup Hierarchy + H.SetMaxCoarseSize(maxCoarseSize_); + VerboseObject::SetDefaultVerbLevel(verbosity_); + if (graphOutputLevel_ >= 0 || graphOutputLevel_ == -1) + H.EnableGraphDumping("dep_graph", graphOutputLevel_); - if (VerboseObject::IsPrint(Statistics2)) { - RCP Amat = rcp_dynamic_cast(Op); + if (VerboseObject::IsPrint(Statistics2)) { + RCP Amat = rcp_dynamic_cast(Op); - if (!Amat.is_null()) { - RCP params = rcp(new ParameterList()); - params->set("printLoadBalancingInfo", true); - params->set("printCommInfo", true); + if (!Amat.is_null()) { + RCP params = rcp(new ParameterList()); + params->set("printLoadBalancingInfo", true); + params->set("printCommInfo", true); - VerboseObject::GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Amat, "A0", params); - } else { - VerboseObject::GetOStream(Warnings1) << "Fine level operator is not a matrix, statistics are not available" << std::endl; - } + VerboseObject::GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Amat, "A0", params); + } else { + VerboseObject::GetOStream(Warnings1) << "Fine level operator is not a matrix, statistics are not available" << std::endl; } + } - H.SetPRrebalance(doPRrebalance_); - H.SetPRViaCopyrebalance(doPRViaCopyrebalance_); - H.SetImplicitTranspose(implicitTranspose_); - H.SetFuseProlongationAndUpdate(fuseProlongationAndUpdate_); - - H.Clear(); - - // There are few issues with using Keep in the interpreter: - // 1. Hierarchy::Keep interface takes a name and a factory. If - // factories are different on different levels, the AddNewLevel() call - // in Hierarchy does not work properly, as it assume that factories are - // the same. - // 2. FactoryManager does not have a Keep option, only Hierarchy and - // Level have it - // 3. Interpreter constructs factory managers, but not levels. So we - // cannot set up Keep flags there. - // - // The solution implemented here does the following: - // 1. Construct hierarchy with dummy levels. This avoids - // Hierarchy::AddNewLevel() calls which will propagate wrong - // inheritance. - // 2. Interpreter constructs keep_ array with names and factories for - // that level - // 3. For each level, we call Keep(name, factory) for each keep_ - for (int i = 0; i < numDesiredLevel_; i++) { - std::map >::const_iterator it = keep_.find(i); - if (it != keep_.end()) { - RCP l = H.GetLevel(i); - const std::vector& keeps = it->second; - for (size_t j = 0; j < keeps.size(); j++) - l->Keep(keeps[j].first, keeps[j].second); - } - if (i < numDesiredLevel_-1) { - RCP newLevel = rcp(new Level()); - H.AddLevel(newLevel); - } + H.SetPRrebalance(doPRrebalance_); + H.SetPRViaCopyrebalance(doPRViaCopyrebalance_); + H.SetImplicitTranspose(implicitTranspose_); + H.SetFuseProlongationAndUpdate(fuseProlongationAndUpdate_); + + H.Clear(); + + // There are few issues with using Keep in the interpreter: + // 1. Hierarchy::Keep interface takes a name and a factory. If + // factories are different on different levels, the AddNewLevel() call + // in Hierarchy does not work properly, as it assume that factories are + // the same. + // 2. FactoryManager does not have a Keep option, only Hierarchy and + // Level have it + // 3. Interpreter constructs factory managers, but not levels. So we + // cannot set up Keep flags there. + // + // The solution implemented here does the following: + // 1. Construct hierarchy with dummy levels. This avoids + // Hierarchy::AddNewLevel() calls which will propagate wrong + // inheritance. + // 2. Interpreter constructs keep_ array with names and factories for + // that level + // 3. For each level, we call Keep(name, factory) for each keep_ + for (int i = 0; i < numDesiredLevel_; i++) { + std::map>::const_iterator it = keep_.find(i); + if (it != keep_.end()) { + RCP l = H.GetLevel(i); + const std::vector& keeps = it->second; + for (size_t j = 0; j < keeps.size(); j++) + l->Keep(keeps[j].first, keeps[j].second); } + if (i < numDesiredLevel_ - 1) { + RCP newLevel = rcp(new Level()); + H.AddLevel(newLevel); + } + } - // Matrices to print - for(auto iter=matricesToPrint_.begin(); iter!=matricesToPrint_.end(); iter++) - ExportDataSetKeepFlags(H,iter->second,iter->first); + // Matrices to print + for (auto iter = matricesToPrint_.begin(); iter != matricesToPrint_.end(); iter++) + ExportDataSetKeepFlags(H, iter->second, iter->first); - // Vectors, aggregates and other things that need special case handling - ExportDataSetKeepFlags(H, nullspaceToPrint_, "Nullspace"); - ExportDataSetKeepFlags(H, coordinatesToPrint_, "Coordinates"); - // NOTE: Aggregates use the next level's Factory - ExportDataSetKeepFlagsNextLevel(H, aggregatesToPrint_, "Aggregates"); + // Vectors, aggregates and other things that need special case handling + ExportDataSetKeepFlags(H, nullspaceToPrint_, "Nullspace"); + ExportDataSetKeepFlags(H, coordinatesToPrint_, "Coordinates"); + // NOTE: Aggregates use the next level's Factory + ExportDataSetKeepFlagsNextLevel(H, aggregatesToPrint_, "Aggregates"); #ifdef HAVE_MUELU_INTREPID2 - ExportDataSetKeepFlags(H,elementToNodeMapsToPrint_, "pcoarsen: element to node map"); + ExportDataSetKeepFlags(H, elementToNodeMapsToPrint_, "pcoarsen: element to node map"); #endif - // Data to save only (these do not have a level, so we do all levels) - for(int i=0; iprint(H.GetOStream(Developer), verbosity_); - - isLastLevel = r || (levelID == lastLevelID); - levelID++; - } - if (!matvecParams_.is_null()) - H.SetMatvecParams(matvecParams_); - H.AllocateLevelMultiVectors(sizeOfMultiVectors_); - // Set hierarchy description. - // This is cached, but involves and MPI_Allreduce. - H.description(); - H.describe(H.GetOStream(Runtime0), verbosity_); - - // When we reuse hierarchy, it is necessary that we don't - // change the number of levels. We also cannot make requests - // for coarser levels, because we don't construct all the - // data on previous levels. For instance, let's say our first - // run constructed three levels. If we try to do requests during - // next setup for the fourth level, it would need Aggregates - // which we didn't construct for level 3 because we reused P. - // To fix this situation, we change the number of desired levels - // here. - numDesiredLevel_ = levelID; - - // Matrix prints - for(auto iter = matricesToPrint_.begin(); iter != matricesToPrint_.end(); iter++) { - WriteData(H,iter->second,iter->first); - } + int levelID = 0; + int lastLevelID = numDesiredLevel_ - 1; + bool isLastLevel = false; - // Vectors, aggregates and all things we need to print manually - WriteData(H, nullspaceToPrint_, "Nullspace"); - WriteData(H, coordinatesToPrint_, "Coordinates"); - WriteDataAggregates(H, aggregatesToPrint_, "Aggregates"); + while (!isLastLevel) { + bool r = H.Setup(levelID, + LvlMngr(levelID - 1, lastLevelID), + LvlMngr(levelID, lastLevelID), + LvlMngr(levelID + 1, lastLevelID)); + if (levelID < H.GetNumLevels()) + H.GetLevel(levelID)->print(H.GetOStream(Developer), verbosity_); + isLastLevel = r || (levelID == lastLevelID); + levelID++; + } + if (!matvecParams_.is_null()) + H.SetMatvecParams(matvecParams_); + H.AllocateLevelMultiVectors(sizeOfMultiVectors_); + // Set hierarchy description. + // This is cached, but involves and MPI_Allreduce. + H.description(); + H.describe(H.GetOStream(Runtime0), verbosity_); + + // When we reuse hierarchy, it is necessary that we don't + // change the number of levels. We also cannot make requests + // for coarser levels, because we don't construct all the + // data on previous levels. For instance, let's say our first + // run constructed three levels. If we try to do requests during + // next setup for the fourth level, it would need Aggregates + // which we didn't construct for level 3 because we reused P. + // To fix this situation, we change the number of desired levels + // here. + numDesiredLevel_ = levelID; + + // Matrix prints + for (auto iter = matricesToPrint_.begin(); iter != matricesToPrint_.end(); iter++) { + WriteData(H, iter->second, iter->first); + } + // Vectors, aggregates and all things we need to print manually + WriteData(H, nullspaceToPrint_, "Nullspace"); + WriteData(H, coordinatesToPrint_, "Coordinates"); + WriteDataAggregates(H, aggregatesToPrint_, "Aggregates"); #ifdef HAVE_MUELU_INTREPID2 - typedef Kokkos::DynRankView FCi; - WriteDataFC(H,elementToNodeMapsToPrint_, "pcoarsen: element to node map","el2node"); + typedef Kokkos::DynRankView FCi; + WriteDataFC(H, elementToNodeMapsToPrint_, "pcoarsen: element to node map", "el2node"); #endif + } // SetupHierarchy - } //SetupHierarchy - - //@} - - typedef std::map > FactoryMap; + //@} - protected: //TODO: access function + typedef std::map> FactoryMap; - //! Setup Matrix object - virtual void SetupOperator(Operator& /* Op */) const { } + protected: // TODO: access function + //! Setup Matrix object + virtual void SetupOperator(Operator& /* Op */) const {} - //! Setup extra data - // TODO: merge with SetupMatrix ? - virtual void SetupExtra(Hierarchy& /* H */) const { } + //! Setup extra data + // TODO: merge with SetupMatrix ? + virtual void SetupExtra(Hierarchy& /* H */) const {} - // TODO this was private - // Used in SetupHierarchy() to access levelManagers_ - // Inputs i=-1 and i=size() are allowed to simplify calls to hierarchy->Setup() - Teuchos::RCP LvlMngr(int levelID, int lastLevelID) const { - // NOTE: the order of 'if' statements is important - if (levelID == -1) // levelID = -1 corresponds to the finest level - return Teuchos::null; + // TODO this was private + // Used in SetupHierarchy() to access levelManagers_ + // Inputs i=-1 and i=size() are allowed to simplify calls to hierarchy->Setup() + Teuchos::RCP LvlMngr(int levelID, int lastLevelID) const { + // NOTE: the order of 'if' statements is important + if (levelID == -1) // levelID = -1 corresponds to the finest level + return Teuchos::null; - if (levelID == lastLevelID+1) // levelID = 'lastLevelID+1' corresponds to the last level (i.e., no nextLevel) - return Teuchos::null; + if (levelID == lastLevelID + 1) // levelID = 'lastLevelID+1' corresponds to the last level (i.e., no nextLevel) + return Teuchos::null; - if (levelManagers_.size() == 0) { // default factory manager. - // The default manager is shared across levels, initialized only if needed and deleted with the HierarchyManager - static RCP defaultMngr = rcp(new FactoryManager()); - return defaultMngr; - } - - return GetFactoryManager(levelID); + if (levelManagers_.size() == 0) { // default factory manager. + // The default manager is shared across levels, initialized only if needed and deleted with the HierarchyManager + static RCP defaultMngr = rcp(new FactoryManager()); + return defaultMngr; } - //! @group Hierarchy parameters - //! @{ + return GetFactoryManager(levelID); + } - mutable int numDesiredLevel_; - Xpetra::global_size_t maxCoarseSize_; - MsgType verbosity_; + //! @group Hierarchy parameters + //! @{ - bool doPRrebalance_; - bool doPRViaCopyrebalance_; - bool implicitTranspose_; - bool fuseProlongationAndUpdate_; + mutable int numDesiredLevel_; + Xpetra::global_size_t maxCoarseSize_; + MsgType verbosity_; - /*! @brief Flag to indicate whether the check of the nullspace dimension is suppressed + bool doPRrebalance_; + bool doPRViaCopyrebalance_; + bool implicitTranspose_; + bool fuseProlongationAndUpdate_; - By default, we do not suppress such a check, as it acts as a safety mechanism. - Yet, certain scenarios deliberately use nullspaces with less nullspace vectors than NumPDEs. - Therefore, the user can suppress this check. Then, the error message is converted to a warning. - */ - bool suppressNullspaceDimensionCheck_; + /*! @brief Flag to indicate whether the check of the nullspace dimension is suppressed - int sizeOfMultiVectors_; + By default, we do not suppress such a check, as it acts as a safety mechanism. + Yet, certain scenarios deliberately use nullspaces with less nullspace vectors than NumPDEs. + Therefore, the user can suppress this check. Then, the error message is converted to a warning. + */ + bool suppressNullspaceDimensionCheck_; - //! -2 = no output, -1 = all levels - int graphOutputLevel_; + int sizeOfMultiVectors_; - //! Lists of entities to be exported (or saved) - // Items here get handled manually - Teuchos::Array nullspaceToPrint_; - Teuchos::Array coordinatesToPrint_; - Teuchos::Array aggregatesToPrint_; - Teuchos::Array elementToNodeMapsToPrint_; + //! -2 = no output, -1 = all levels + int graphOutputLevel_; - // Data we'll need to save, not necessarily print - Teuchos::Array dataToSave_; + //! Lists of entities to be exported (or saved) + // Items here get handled manually + Teuchos::Array nullspaceToPrint_; + Teuchos::Array coordinatesToPrint_; + Teuchos::Array aggregatesToPrint_; + Teuchos::Array elementToNodeMapsToPrint_; - // Matrices we'll need to print - std::map > matricesToPrint_; + // Data we'll need to save, not necessarily print + Teuchos::Array dataToSave_; - Teuchos::RCP matvecParams_; + // Matrices we'll need to print + std::map> matricesToPrint_; - std::map > keep_; - //! @} + Teuchos::RCP matvecParams_; - private: - // Set the keep flags for Export Data - void ExportDataSetKeepFlags(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { - for (int i = 0; i < data.size(); ++i) { - if (data[i] < H.GetNumLevels()) { - RCP L = H.GetLevel(data[i]); - if(!L.is_null() && data[i] < levelManagers_.size()) - L->AddKeepFlag(name, &*levelManagers_[data[i]]->GetFactory(name)); - } - } - } + std::map> keep_; + //! @} - void ExportDataSetKeepFlagsNextLevel(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { - for (int i = 0; i < data.size(); ++i) { - if (data[i] < H.GetNumLevels()) { - RCP L = H.GetLevel(data[i]); - if(!L.is_null() && data[i]+1 < levelManagers_.size()) - L->AddKeepFlag(name, &*levelManagers_[data[i]+1]->GetFactory(name)); - } + private: + // Set the keep flags for Export Data + void ExportDataSetKeepFlags(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { + for (int i = 0; i < data.size(); ++i) { + if (data[i] < H.GetNumLevels()) { + RCP L = H.GetLevel(data[i]); + if (!L.is_null() && data[i] < levelManagers_.size()) + L->AddKeepFlag(name, &*levelManagers_[data[i]]->GetFactory(name)); } } - - // Set the keep flags for Export Data - void ExportDataSetKeepFlagsAll(Hierarchy& H, const std::string& name) const { - for (int i=0; i < H.GetNumLevels(); i++ ) { - RCP L = H.GetLevel(i); - if(!L.is_null() && i < levelManagers_.size()) - L->AddKeepFlag(name, &*levelManagers_[i]->GetFactory(name)); + } + + void ExportDataSetKeepFlagsNextLevel(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { + for (int i = 0; i < data.size(); ++i) { + if (data[i] < H.GetNumLevels()) { + RCP L = H.GetLevel(data[i]); + if (!L.is_null() && data[i] + 1 < levelManagers_.size()) + L->AddKeepFlag(name, &*levelManagers_[data[i] + 1]->GetFactory(name)); } } - - - template - void WriteData(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { - for (int i = 0; i < data.size(); ++i) { - std::string fileName; - if (H.getObjectLabel() != "") - fileName = H.getObjectLabel() + "_" + name + "_" + Teuchos::toString(data[i]) + ".m"; - else - fileName = name + "_" + Teuchos::toString(data[i]) + ".m"; - - if (data[i] < H.GetNumLevels()) { - RCP L = H.GetLevel(data[i]); - if (data[i] < levelManagers_.size() && L->IsAvailable(name,&*levelManagers_[data[i]]->GetFactory(name))) { - // Try generating factory - RCP M = L->template Get< RCP >(name,&*levelManagers_[data[i]]->GetFactory(name)); - if (!M.is_null()) { - Xpetra::IO::Write(fileName,* M); - } + } + + // Set the keep flags for Export Data + void ExportDataSetKeepFlagsAll(Hierarchy& H, const std::string& name) const { + for (int i = 0; i < H.GetNumLevels(); i++) { + RCP L = H.GetLevel(i); + if (!L.is_null() && i < levelManagers_.size()) + L->AddKeepFlag(name, &*levelManagers_[i]->GetFactory(name)); + } + } + + template + void WriteData(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { + for (int i = 0; i < data.size(); ++i) { + std::string fileName; + if (H.getObjectLabel() != "") + fileName = H.getObjectLabel() + "_" + name + "_" + Teuchos::toString(data[i]) + ".m"; + else + fileName = name + "_" + Teuchos::toString(data[i]) + ".m"; + + if (data[i] < H.GetNumLevels()) { + RCP L = H.GetLevel(data[i]); + if (data[i] < levelManagers_.size() && L->IsAvailable(name, &*levelManagers_[data[i]]->GetFactory(name))) { + // Try generating factory + RCP M = L->template Get>(name, &*levelManagers_[data[i]]->GetFactory(name)); + if (!M.is_null()) { + Xpetra::IO::Write(fileName, *M); } - else if (L->IsAvailable(name)) { - // Try nofactory - RCP M = L->template Get< RCP >(name); - if (!M.is_null()) { - Xpetra::IO::Write(fileName,* M); - } + } else if (L->IsAvailable(name)) { + // Try nofactory + RCP M = L->template Get>(name); + if (!M.is_null()) { + Xpetra::IO::Write(fileName, *M); } } } } - - void WriteDataAggregates(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { - for (int i = 0; i < data.size(); ++i) { - const std::string fileName = name + "_" + Teuchos::toString(data[i]) + ".m"; - - if (data[i] < H.GetNumLevels()) { - RCP L = H.GetLevel(data[i]); - - // NOTE: Aggregates use the next level's factory - RCP agg; - if(data[i]+1 < H.GetNumLevels() && L->IsAvailable(name,&*levelManagers_[data[i]+1]->GetFactory(name))) { - // Try generating factory - agg = L->template Get< RCP >(name,&*levelManagers_[data[i]+1]->GetFactory(name)); - } - else if (L->IsAvailable(name)) { - agg = L->template Get >("Aggregates"); - } - if(!agg.is_null()) { - std::ofstream ofs(fileName); - Teuchos::FancyOStream fofs(rcp(&ofs,false)); - agg->print(fofs,Teuchos::VERB_EXTREME); - } + } + + void WriteDataAggregates(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { + for (int i = 0; i < data.size(); ++i) { + const std::string fileName = name + "_" + Teuchos::toString(data[i]) + ".m"; + + if (data[i] < H.GetNumLevels()) { + RCP L = H.GetLevel(data[i]); + + // NOTE: Aggregates use the next level's factory + RCP agg; + if (data[i] + 1 < H.GetNumLevels() && L->IsAvailable(name, &*levelManagers_[data[i] + 1]->GetFactory(name))) { + // Try generating factory + agg = L->template Get>(name, &*levelManagers_[data[i] + 1]->GetFactory(name)); + } else if (L->IsAvailable(name)) { + agg = L->template Get>("Aggregates"); + } + if (!agg.is_null()) { + std::ofstream ofs(fileName); + Teuchos::FancyOStream fofs(rcp(&ofs, false)); + agg->print(fofs, Teuchos::VERB_EXTREME); } } } - - template - void WriteDataFC(Hierarchy& H, const Teuchos::Array& data, const std::string& name, const std::string & ofname) const { - for (int i = 0; i < data.size(); ++i) { - const std::string fileName = ofname + "_" + Teuchos::toString(data[i]) + ".m"; - - if (data[i] < H.GetNumLevels()) { - RCP L = H.GetLevel(data[i]); - - if (L->IsAvailable(name)) { - RCP M = L->template Get< RCP >(name); - if (!M.is_null()) { - RCP A = L->template Get >("A"); - RCP AG = A->getCrsGraph(); - WriteFieldContainer(fileName,*M,*AG->getColMap()); - } + } + + template + void WriteDataFC(Hierarchy& H, const Teuchos::Array& data, const std::string& name, const std::string& ofname) const { + for (int i = 0; i < data.size(); ++i) { + const std::string fileName = ofname + "_" + Teuchos::toString(data[i]) + ".m"; + + if (data[i] < H.GetNumLevels()) { + RCP L = H.GetLevel(data[i]); + + if (L->IsAvailable(name)) { + RCP M = L->template Get>(name); + if (!M.is_null()) { + RCP A = L->template Get>("A"); + RCP AG = A->getCrsGraph(); + WriteFieldContainer(fileName, *M, *AG->getColMap()); } } } } + } - // For dumping an IntrepidPCoarsening element-to-node map to disk - template - void WriteFieldContainer(const std::string& fileName, T & fcont,const Map &colMap) const { - - size_t num_els = (size_t) fcont.extent(0); - size_t num_vecs =(size_t) fcont.extent(1); + // For dumping an IntrepidPCoarsening element-to-node map to disk + template + void WriteFieldContainer(const std::string& fileName, T& fcont, const Map& colMap) const { + size_t num_els = (size_t)fcont.extent(0); + size_t num_vecs = (size_t)fcont.extent(1); - // Generate rowMap - Teuchos::RCP rowMap = Xpetra::MapFactory::Build(colMap.lib(),Teuchos::OrdinalTraits::invalid(),fcont.extent(0),colMap.getIndexBase(),colMap.getComm()); + // Generate rowMap + Teuchos::RCP rowMap = Xpetra::MapFactory::Build(colMap.lib(), Teuchos::OrdinalTraits::invalid(), fcont.extent(0), colMap.getIndexBase(), colMap.getComm()); - // Fill multivector to use *petra dump routines - RCP vec = Xpetra::MultiVectorFactory::Build(rowMap,num_vecs); + // Fill multivector to use *petra dump routines + RCP vec = Xpetra::MultiVectorFactory::Build(rowMap, num_vecs); - for(size_t j=0; j v = vec->getDataNonConst(j); - for(size_t i=0; i::Write(fileName,*vec); + for (size_t j = 0; j < num_vecs; j++) { + Teuchos::ArrayRCP v = vec->getDataNonConst(j); + for (size_t i = 0; i < num_els; i++) + v[i] = colMap.getGlobalElement(fcont(i, j)); } + Xpetra::IO::Write(fileName, *vec); + } + // Levels + Array> levelManagers_; // one FactoryManager per level (the last levelManager is used for all the remaining levels) - // Levels - Array > levelManagers_; // one FactoryManager per level (the last levelManager is used for all the remaining levels) - - }; // class HierarchyManager +}; // class HierarchyManager -} // namespace MueLu +} // namespace MueLu #define MUELU_HIERARCHYMANAGER_SHORT -#endif // MUELU_HIERARCHYMANAGER_HPP +#endif // MUELU_HIERARCHYMANAGER_HPP -//TODO: split into _decl/_def -// TODO: default value for first param (FactoryManager()) should not be duplicated (code maintainability) +// TODO: split into _decl/_def +// TODO: default value for first param (FactoryManager()) should not be duplicated (code maintainability) diff --git a/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.cpp b/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.cpp index a2c64571b2a3..6edaeabbc366 100644 --- a/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.cpp +++ b/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.cpp @@ -46,452 +46,503 @@ #include "MueLu_ConfigDefs.hpp" #if defined(HAVE_MUELU_ML) -# include -# if defined(HAVE_ML_EPETRA) && defined(HAVE_ML_TEUCHOS) -# include -# include // for default values -# include -# endif +#include +#if defined(HAVE_ML_EPETRA) && defined(HAVE_ML_TEUCHOS) +#include +#include // for default values +#include +#endif #endif #include namespace MueLu { +std::string ML2MueLuParameterTranslator::GetSmootherFactory(const Teuchos::ParameterList& paramList, Teuchos::ParameterList& adaptingParamList, const std::string& pname, const std::string& value) { + TEUCHOS_TEST_FOR_EXCEPTION(pname != "coarse: type" && pname != "coarse: list" && pname != "smoother: type" && pname.find("smoother: list", 0) != 0, + Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter::Setup(): Only \"coarse: type\", \"smoother: type\" or \"smoother: list\" (\"coarse: list\") are " + "supported as ML parameters for transformation of smoother/solver parameters to MueLu"); + + // string stream containing the smoother/solver xml parameters + std::stringstream mueluss; + + // Check whether we are dealing with coarse level (solver) parameters or level smoother parameters + std::string mode = "smoother:"; + bool is_coarse = false; + if (pname.find("coarse:", 0) == 0) { + mode = "coarse:"; + is_coarse = true; + } - std::string ML2MueLuParameterTranslator::GetSmootherFactory(const Teuchos::ParameterList& paramList, Teuchos::ParameterList& adaptingParamList, const std::string& pname, const std::string& value) { - - TEUCHOS_TEST_FOR_EXCEPTION(pname != "coarse: type" && pname != "coarse: list" && pname != "smoother: type" && pname.find("smoother: list",0) != 0, - Exceptions::RuntimeError, - "MueLu::MLParameterListInterpreter::Setup(): Only \"coarse: type\", \"smoother: type\" or \"smoother: list\" (\"coarse: list\") are " - "supported as ML parameters for transformation of smoother/solver parameters to MueLu"); + // check whether pre and/or post smoothing + std::string PreOrPost = "both"; + if (paramList.isParameter(mode + " pre or post")) + PreOrPost = paramList.get(mode + " pre or post"); + + TEUCHOS_TEST_FOR_EXCEPTION(mode == "coarse:" && PreOrPost != "both", Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter::Setup(): The parameter \"coarse: pre or post\" is not supported by MueLu. " + "It does not make sense for direct solvers. For iterative solvers you obtain the same effect by increasing, " + "e.g., the number of sweeps for the coarse grid smoother. Please remove it from your parameters."); + + // select smoother type + std::string valuestr = value; // temporary variable + std::transform(valuestr.begin(), valuestr.end(), valuestr.begin(), ::tolower); + if (valuestr == "jacobi" || valuestr == "gauss-seidel" || valuestr == "symmetric gauss-seidel") { + std::string my_name; + if (PreOrPost == "both") + my_name = "\"" + pname + "\""; + else + my_name = "\"smoother: " + PreOrPost + " type\""; + mueluss << "" << std::endl; + + } else if (valuestr == "hiptmair") { + std::string my_name; + if (PreOrPost == "both") + my_name = "\"" + pname + "\""; + else + my_name = "\"smoother: " + PreOrPost + " type\""; + mueluss << "" << std::endl; + + } else if (valuestr == "ifpack") { + std::string my_name = "\"" + pname + "\""; + if (paramList.isParameter("smoother: ifpack type")) { + if (paramList.get("smoother: ifpack type") == "ILU") { + mueluss << "" << std::endl; + adaptingParamList.remove("smoother: ifpack type", false); + } + if (paramList.get("smoother: ifpack type") == "ILUT") { + mueluss << "" << std::endl; + adaptingParamList.remove("smoother: ifpack type", false); + } + } - + } else if ((valuestr == "chebyshev") || (valuestr == "mls")) { + std::string my_name = "\"" + pname + "\""; + mueluss << "" << std::endl; - // string stream containing the smoother/solver xml parameters - std::stringstream mueluss; + } else if (valuestr.length() > strlen("amesos") && valuestr.substr(0, strlen("amesos")) == "amesos") { /* catch Amesos-* */ + std::string solverType = valuestr.substr(strlen("amesos") + 1); /* ("amesos-klu" -> "klu") */ - // Check whether we are dealing with coarse level (solver) parameters or level smoother parameters - std::string mode = "smoother:"; - bool is_coarse = false; - if (pname.find("coarse:", 0) == 0) { - mode = "coarse:"; - is_coarse = true; - } + bool valid = false; + const int validatorSize = 5; + std::string validator[validatorSize] = {"superlu", "superludist", "klu", "umfpack", "mumps"}; + for (int i = 0; i < validatorSize; i++) + if (validator[i] == solverType) + valid = true; + TEUCHOS_TEST_FOR_EXCEPTION(!valid, Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter: unknown smoother type. '" << solverType << "' not supported."); - // check whether pre and/or post smoothing - std::string PreOrPost = "both"; - if (paramList.isParameter(mode + " pre or post")) - PreOrPost = paramList.get(mode + " pre or post"); - - TEUCHOS_TEST_FOR_EXCEPTION(mode == "coarse:" && PreOrPost != "both", Exceptions::RuntimeError, - "MueLu::MLParameterListInterpreter::Setup(): The parameter \"coarse: pre or post\" is not supported by MueLu. " - "It does not make sense for direct solvers. For iterative solvers you obtain the same effect by increasing, " - "e.g., the number of sweeps for the coarse grid smoother. Please remove it from your parameters."); - - // select smoother type - std::string valuestr = value; // temporary variable - std::transform(valuestr.begin(), valuestr.end(), valuestr.begin(), ::tolower); - if ( valuestr == "jacobi" || valuestr == "gauss-seidel" || valuestr == "symmetric gauss-seidel" ) { - std::string my_name; - if ( PreOrPost == "both" ) my_name = "\"" + pname + "\""; - else my_name = "\"smoother: " + PreOrPost + " type\""; - mueluss << "" << std::endl; - - } else if ( valuestr == "hiptmair" ) { - std::string my_name; - if ( PreOrPost == "both" ) my_name = "\"" + pname + "\""; - else my_name = "\"smoother: " + PreOrPost + " type\""; - mueluss << "" << std::endl; - - } else if ( valuestr == "ifpack" ) { - std::string my_name = "\"" + pname + "\""; - if ( paramList.isParameter("smoother: ifpack type") ) { - if ( paramList.get("smoother: ifpack type") == "ILU" ) { - mueluss << "" << std::endl; - adaptingParamList.remove("smoother: ifpack type",false); - } - if ( paramList.get("smoother: ifpack type") == "ILUT" ) { - mueluss << "" << std::endl; - adaptingParamList.remove("smoother: ifpack type",false); - } - } + mueluss << "" << std::endl; - } else if (( valuestr == "chebyshev" ) || ( valuestr == "mls" )) { - std::string my_name = "\"" + pname + "\""; - mueluss << "" << std::endl; + } else { + // TODO error message + std::cout << "error in " << __FILE__ << ":" << __LINE__ << " could not find valid smoother/solver" << std::endl; + } - } else if (valuestr.length() > strlen("amesos") && valuestr.substr(0, strlen("amesos")) == "amesos") { /* catch Amesos-* */ - std::string solverType = valuestr.substr(strlen("amesos")+1); /* ("amesos-klu" -> "klu") */ + // set smoother: pre or post parameter + // Note that there is no "coarse: pre or post" in MueLu! + if (paramList.isParameter("smoother: pre or post") && mode == "smoother:") { + // std::cout << "paramList" << paramList << std::endl; + // std::string smootherPreOrPost = paramList.get("smoother: pre or post"); + // std::cout << "Create pre or post parameter with " << smootherPreOrPost << std::endl; + mueluss << "" << std::endl; + adaptingParamList.remove("smoother: pre or post", false); + } - bool valid = false; - const int validatorSize = 5; - std::string validator[validatorSize] = {"superlu", "superludist", "klu", "umfpack", "mumps"}; - for (int i=0; i < validatorSize; i++) - if (validator[i] == solverType) - valid = true; - TEUCHOS_TEST_FOR_EXCEPTION(!valid, Exceptions::RuntimeError, - "MueLu::MLParameterListInterpreter: unknown smoother type. '" << solverType << "' not supported."); + // create smoother parameter list + if (PreOrPost != "both") { + mueluss << "" << std::endl; + } else { + mueluss << "" << std::endl; + } - mueluss << "" << std::endl; + // relaxation based smoothers: - } else { - // TODO error message - std::cout << "error in " << __FILE__ << ":" << __LINE__ << " could not find valid smoother/solver" << std::endl; + if (valuestr == "jacobi" || valuestr == "gauss-seidel" || valuestr == "symmetric gauss-seidel") { + if (valuestr == "jacobi") { + mueluss << "" << std::endl; + adaptingParamList.remove("relaxation: type", false); + } + if (valuestr == "gauss-seidel") { + mueluss << "" << std::endl; + adaptingParamList.remove("relaxation: type", false); + } + if (valuestr == "symmetric gauss-seidel") { + mueluss << "" << std::endl; + adaptingParamList.remove("relaxation: type", false); } - // set smoother: pre or post parameter - // Note that there is no "coarse: pre or post" in MueLu! - if ( paramList.isParameter("smoother: pre or post") && mode == "smoother:") { - //std::cout << "paramList" << paramList << std::endl; - //std::string smootherPreOrPost = paramList.get("smoother: pre or post"); - //std::cout << "Create pre or post parameter with " << smootherPreOrPost << std::endl; - mueluss << "" << std::endl; - adaptingParamList.remove("smoother: pre or post",false); + if (paramList.isParameter("smoother: sweeps")) { + mueluss << "("smoother: sweeps") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: sweeps", false); + } + if (paramList.isParameter("smoother: damping factor")) { + mueluss << "("smoother: damping factor") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: damping factor", false); + } + if (paramList.isParameter("smoother: use l1 Gauss-Seidel")) { + mueluss << "("smoother: use l1 Gauss-Seidel") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: use l1 Gauss-Seidel", false); } + } - // create smoother parameter list - if (PreOrPost != "both") { - mueluss << "" << std::endl; + // Chebyshev + if (valuestr == "chebyshev") { + if (paramList.isParameter("smoother: polynomial order")) { + mueluss << "("smoother: polynomial order") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: polynomial order", false); } else { - mueluss << "" << std::endl; + mueluss << "" << std::endl; } - - // relaxation based smoothers: - - if ( valuestr == "jacobi" || valuestr == "gauss-seidel" || valuestr == "symmetric gauss-seidel" ) { - if ( valuestr == "jacobi" ) { mueluss << "" << std::endl; adaptingParamList.remove("relaxation: type",false); } - if ( valuestr == "gauss-seidel" ) { mueluss << "" << std::endl; adaptingParamList.remove("relaxation: type",false); } - if ( valuestr == "symmetric gauss-seidel" ) { mueluss << "" << std::endl; adaptingParamList.remove("relaxation: type",false); } - - if ( paramList.isParameter("smoother: sweeps") ) { mueluss << "("smoother: sweeps") << "\"/>" << std::endl; adaptingParamList.remove("smoother: sweeps",false); } - if ( paramList.isParameter("smoother: damping factor") ) { mueluss << "("smoother: damping factor") << "\"/>" << std::endl; adaptingParamList.remove("smoother: damping factor",false); } - if ( paramList.isParameter("smoother: use l1 Gauss-Seidel") ) { mueluss << "("smoother: use l1 Gauss-Seidel") << "\"/>" << std::endl; adaptingParamList.remove("smoother: use l1 Gauss-Seidel",false); } + if (paramList.isParameter("smoother: Chebyshev alpha")) { + mueluss << "("smoother: Chebyshev alpha") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: Chebyshev alpha", false); + } else { + mueluss << "" << std::endl; + adaptingParamList.remove("smoother: Chebyshev alpha", false); } - - // Chebyshev - if ( valuestr == "chebyshev") { - if ( paramList.isParameter("smoother: polynomial order") ) { mueluss << "("smoother: polynomial order") << "\"/>" << std::endl; adaptingParamList.remove("smoother: polynomial order",false); } - else { mueluss << "" << std::endl; } - if ( paramList.isParameter("smoother: Chebyshev alpha") ) { mueluss << "("smoother: Chebyshev alpha") << "\"/>" << std::endl; adaptingParamList.remove("smoother: Chebyshev alpha",false); } - else { mueluss << "" << std::endl; adaptingParamList.remove("smoother: Chebyshev alpha",false); } - if ( paramList.isParameter("eigen-analysis: type") ) { mueluss << "("eigen-analysis: type") << "\"/>" << std::endl; adaptingParamList.remove("eigen-analysis: type",false); } - else { mueluss << "" << std::endl; } + if (paramList.isParameter("eigen-analysis: type")) { + mueluss << "("eigen-analysis: type") << "\"/>" << std::endl; + adaptingParamList.remove("eigen-analysis: type", false); + } else { + mueluss << "" << std::endl; } + } - // MLS - if ( valuestr == "mls") { - if ( paramList.isParameter("smoother: MLS polynomial order") ) { mueluss << "("smoother: MLS polynomial order") << "\"/>" << std::endl; adaptingParamList.remove("smoother: MLS polynomial order",false); } - else if ( paramList.isParameter("smoother: polynomial order") ) { mueluss << "("smoother: polynomial order") << "\"/>" << std::endl; adaptingParamList.remove("smoother: polynomial order",false); } - else { mueluss << "" << std::endl; } - if ( paramList.isParameter("smoother: MLS alpha") ) { mueluss << "("smoother: MLS alpha") << "\"/>" << std::endl; adaptingParamList.remove("smoother: MLS alpha",false); } - else if ( paramList.isParameter("smoother: Chebyshev alpha") ) { mueluss << "("smoother: Chebyshev alpha") << "\"/>" << std::endl; adaptingParamList.remove("smoother: Chebyshev alpha",false); } - else { mueluss << "" << std::endl; } - if ( paramList.isParameter("eigen-analysis: type") ) { mueluss << "("eigen-analysis: type") << "\"/>" << std::endl; adaptingParamList.remove("eigen-analysis: type",false); } - else { mueluss << "" << std::endl; } + // MLS + if (valuestr == "mls") { + if (paramList.isParameter("smoother: MLS polynomial order")) { + mueluss << "("smoother: MLS polynomial order") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: MLS polynomial order", false); + } else if (paramList.isParameter("smoother: polynomial order")) { + mueluss << "("smoother: polynomial order") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: polynomial order", false); + } else { + mueluss << "" << std::endl; + } + if (paramList.isParameter("smoother: MLS alpha")) { + mueluss << "("smoother: MLS alpha") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: MLS alpha", false); + } else if (paramList.isParameter("smoother: Chebyshev alpha")) { + mueluss << "("smoother: Chebyshev alpha") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: Chebyshev alpha", false); + } else { + mueluss << "" << std::endl; + } + if (paramList.isParameter("eigen-analysis: type")) { + mueluss << "("eigen-analysis: type") << "\"/>" << std::endl; + adaptingParamList.remove("eigen-analysis: type", false); + } else { + mueluss << "" << std::endl; } + } - if ( valuestr == "hiptmair" ) { - std::string subSmootherType = "Chebyshev"; - if (!is_coarse && paramList.isParameter("subsmoother: type")) - subSmootherType = paramList.get("subsmoother: type"); - if (is_coarse && paramList.isParameter("smoother: subsmoother type")) - subSmootherType = paramList.get("smoother: subsmoother type"); - - std::string subSmootherIfpackType; - if (subSmootherType == "Chebyshev") - subSmootherIfpackType = "CHEBYSHEV"; - else if (subSmootherType == "Jacobi" || subSmootherType == "Gauss-Seidel" || subSmootherType == "symmetric Gauss-Seidel") { - if (subSmootherType == "symmetric Gauss-Seidel") subSmootherType = "Symmetric Gauss-Seidel"; // FIXME - subSmootherIfpackType = "RELAXATION"; - } else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListTranslator: unknown smoother type. '" << subSmootherType << "' not supported by MueLu."); - - mueluss << "" << std::endl; - mueluss << "" << std::endl; - - mueluss << "" << std::endl; - if (subSmootherType == "Chebyshev") { - std::string edge_sweeps = is_coarse ? "smoother: edge sweeps" : "subsmoother: edge sweeps"; - std::string cheby_alpha = is_coarse ? "smoother: Chebyshev alpha" : "subsmoother: Chebyshev_alpha"; - - if (paramList.isParameter(edge_sweeps)) { - mueluss << "(edge_sweeps) << "\"/>" << std::endl; - adaptingParamList.remove("subsmoother: edge sweeps", false); - } - if (paramList.isParameter(cheby_alpha)) { - mueluss << "(cheby_alpha) << "\"/>" << std::endl; - } - } else { - std::string edge_sweeps = is_coarse ? "smoother: edge sweeps" : "subsmoother: edge sweeps"; - std::string SGS_damping = is_coarse ? "smoother: SGS damping factor" : "subsmoother: SGS damping factor"; - - if (paramList.isParameter(edge_sweeps)) { - mueluss << "" << std::endl; - mueluss << "(edge_sweeps) << "\"/>" << std::endl; - adaptingParamList.remove(edge_sweeps, false); - } - if (paramList.isParameter(SGS_damping)) { - mueluss << "(SGS_damping) << "\"/>" << std::endl; - } + if (valuestr == "hiptmair") { + std::string subSmootherType = "Chebyshev"; + if (!is_coarse && paramList.isParameter("subsmoother: type")) + subSmootherType = paramList.get("subsmoother: type"); + if (is_coarse && paramList.isParameter("smoother: subsmoother type")) + subSmootherType = paramList.get("smoother: subsmoother type"); + + std::string subSmootherIfpackType; + if (subSmootherType == "Chebyshev") + subSmootherIfpackType = "CHEBYSHEV"; + else if (subSmootherType == "Jacobi" || subSmootherType == "Gauss-Seidel" || subSmootherType == "symmetric Gauss-Seidel") { + if (subSmootherType == "symmetric Gauss-Seidel") subSmootherType = "Symmetric Gauss-Seidel"; // FIXME + subSmootherIfpackType = "RELAXATION"; + } else + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListTranslator: unknown smoother type. '" << subSmootherType << "' not supported by MueLu."); + + mueluss << "" << std::endl; + mueluss << "" << std::endl; + + mueluss << "" << std::endl; + if (subSmootherType == "Chebyshev") { + std::string edge_sweeps = is_coarse ? "smoother: edge sweeps" : "subsmoother: edge sweeps"; + std::string cheby_alpha = is_coarse ? "smoother: Chebyshev alpha" : "subsmoother: Chebyshev_alpha"; + + if (paramList.isParameter(edge_sweeps)) { + mueluss << "(edge_sweeps) << "\"/>" << std::endl; + adaptingParamList.remove("subsmoother: edge sweeps", false); } - mueluss << "" << std::endl; - - mueluss << "" << std::endl; - if (subSmootherType == "Chebyshev") { - std::string node_sweeps = is_coarse ? "smoother: node sweeps" : "subsmoother: node sweeps"; - std::string cheby_alpha = is_coarse ? "smoother: Chebyshev alpha" : "subsmoother: Chebyshev_alpha"; - if (paramList.isParameter(node_sweeps)) { - mueluss << "(node_sweeps) << "\"/>" << std::endl; - adaptingParamList.remove("subsmoother: node sweeps", false); - } - if (paramList.isParameter(cheby_alpha)) { - mueluss << "(cheby_alpha) << "\"/>" << std::endl; - adaptingParamList.remove("subsmoother: Chebyshev alpha", false); - } - } else { - std::string node_sweeps = is_coarse ? "smoother: node sweeps" : "subsmoother: node sweeps"; - std::string SGS_damping = is_coarse ? "smoother: SGS damping factor" : "subsmoother: SGS damping factor"; - - if (paramList.isParameter(node_sweeps)) { - mueluss << "" << std::endl; - mueluss << "(node_sweeps) << "\"/>" << std::endl; - adaptingParamList.remove("subsmoother: node sweeps", false); - } - if (paramList.isParameter(SGS_damping)) { - mueluss << "(SGS_damping) << "\"/>" << std::endl; - adaptingParamList.remove("subsmoother: SGS damping factor", false); - } + if (paramList.isParameter(cheby_alpha)) { + mueluss << "(cheby_alpha) << "\"/>" << std::endl; } - mueluss << "" << std::endl; + } else { + std::string edge_sweeps = is_coarse ? "smoother: edge sweeps" : "subsmoother: edge sweeps"; + std::string SGS_damping = is_coarse ? "smoother: SGS damping factor" : "subsmoother: SGS damping factor"; + if (paramList.isParameter(edge_sweeps)) { + mueluss << "" << std::endl; + mueluss << "(edge_sweeps) << "\"/>" << std::endl; + adaptingParamList.remove(edge_sweeps, false); + } + if (paramList.isParameter(SGS_damping)) { + mueluss << "(SGS_damping) << "\"/>" << std::endl; + } } + mueluss << "" << std::endl; - // parameters for ILU based preconditioners - if ( valuestr == "ifpack") { + mueluss << "" << std::endl; + if (subSmootherType == "Chebyshev") { + std::string node_sweeps = is_coarse ? "smoother: node sweeps" : "subsmoother: node sweeps"; + std::string cheby_alpha = is_coarse ? "smoother: Chebyshev alpha" : "subsmoother: Chebyshev_alpha"; + if (paramList.isParameter(node_sweeps)) { + mueluss << "(node_sweeps) << "\"/>" << std::endl; + adaptingParamList.remove("subsmoother: node sweeps", false); + } + if (paramList.isParameter(cheby_alpha)) { + mueluss << "(cheby_alpha) << "\"/>" << std::endl; + adaptingParamList.remove("subsmoother: Chebyshev alpha", false); + } + } else { + std::string node_sweeps = is_coarse ? "smoother: node sweeps" : "subsmoother: node sweeps"; + std::string SGS_damping = is_coarse ? "smoother: SGS damping factor" : "subsmoother: SGS damping factor"; - // add Ifpack parameters - if ( paramList.isParameter("smoother: ifpack overlap") ) { mueluss << "("smoother: ifpack overlap") << "\"/>" << std::endl; adaptingParamList.remove("smoother: ifpack overlap",false); } - if ( paramList.isParameter("smoother: ifpack level-of-fill") ) { mueluss << "("smoother: ifpack level-of-fill") << "\"/>" << std::endl; adaptingParamList.remove("smoother: ifpack level-of-fill",false); } - if ( paramList.isParameter("smoother: ifpack absolute threshold") ) { mueluss << "("smoother: ifpack absolute threshold") << "\"/>" << std::endl; adaptingParamList.remove("smoother: ifpack absolute threshold",false); } - if ( paramList.isParameter("smoother: ifpack relative threshold") ) { mueluss << "("smoother: ifpack relative threshold") << "\"/>" << std::endl; adaptingParamList.remove("smoother: ifpack relative threshold",false); } + if (paramList.isParameter(node_sweeps)) { + mueluss << "" << std::endl; + mueluss << "(node_sweeps) << "\"/>" << std::endl; + adaptingParamList.remove("subsmoother: node sweeps", false); + } + if (paramList.isParameter(SGS_damping)) { + mueluss << "(SGS_damping) << "\"/>" << std::endl; + adaptingParamList.remove("subsmoother: SGS damping factor", false); + } } - mueluss << "" << std::endl; + } - // max coarse level size parameter (outside of smoother parameter lists) - if ( paramList.isParameter("smoother: max size") ) { - mueluss << "("smoother: max size") << "\"/>" << std::endl; adaptingParamList.remove("smoother: max size",false); + // parameters for ILU based preconditioners + if (valuestr == "ifpack") { + // add Ifpack parameters + if (paramList.isParameter("smoother: ifpack overlap")) { + mueluss << "("smoother: ifpack overlap") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: ifpack overlap", false); + } + if (paramList.isParameter("smoother: ifpack level-of-fill")) { + mueluss << "("smoother: ifpack level-of-fill") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: ifpack level-of-fill", false); + } + if (paramList.isParameter("smoother: ifpack absolute threshold")) { + mueluss << "("smoother: ifpack absolute threshold") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: ifpack absolute threshold", false); + } + if (paramList.isParameter("smoother: ifpack relative threshold")) { + mueluss << "("smoother: ifpack relative threshold") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: ifpack relative threshold", false); } - - return mueluss.str(); } - std::string ML2MueLuParameterTranslator::SetParameterList(const Teuchos::ParameterList & paramList_in, const std::string& defaultVals) { - Teuchos::ParameterList paramList = paramList_in; - - RCP out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); // TODO: use internal out (GetOStream()) - -#if defined(HAVE_MUELU_ML) && defined(HAVE_ML_EPETRA) && defined(HAVE_ML_TEUCHOS) + mueluss << "" << std::endl; - // TODO alternative with standard parameterlist from ML user guide? + // max coarse level size parameter (outside of smoother parameter lists) + if (paramList.isParameter("smoother: max size")) { + mueluss << "("smoother: max size") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: max size", false); + } - if (defaultVals != "") { - TEUCHOS_TEST_FOR_EXCEPTION(defaultVals!="SA" && defaultVals!="NSSA" && defaultVals!="refmaxwell" && defaultVals!="Maxwell", Exceptions::RuntimeError, - "MueLu::MLParameterListInterpreter: only \"SA\", \"NSSA\", \"refmaxwell\" and \"Maxwell\" allowed as options for ML default parameters."); - Teuchos::ParameterList ML_defaultlist; - if (defaultVals == "refmaxwell") - ML_Epetra::SetDefaultsRefMaxwell(ML_defaultlist); - else - ML_Epetra::SetDefaults(defaultVals,ML_defaultlist); + return mueluss.str(); +} - // merge user parameters with default parameters - MueLu::MergeParameterList(paramList_in, ML_defaultlist, true); - paramList = ML_defaultlist; - } -#else - if (defaultVals != "") { - // If no validator available: issue a warning and set parameter value to false in the output list - *out << "Warning: MueLu_ENABLE_ML=OFF, ML_ENABLE_Epetra=OFF or ML_ENABLE_TEUCHOS=OFF. No ML default values available." << std::endl; - } -#endif // HAVE_MUELU_ML && HAVE_ML_EPETRA && HAVE_ML_TEUCHOS +std::string ML2MueLuParameterTranslator::SetParameterList(const Teuchos::ParameterList& paramList_in, const std::string& defaultVals) { + Teuchos::ParameterList paramList = paramList_in; - // - // Move smoothers/aggregation/coarse parameters to sublists - // + RCP out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); // TODO: use internal out (GetOStream()) - // ML allows to have level-specific smoothers/aggregation/coarse parameters at the top level of the list or/and defined in sublists: - // See also: ML Guide section 6.4.1, MueLu::CreateSublists, ML_CreateSublists - ParameterList paramListWithSubList; - MueLu::CreateSublists(paramList, paramListWithSubList); +#if defined(HAVE_MUELU_ML) && defined(HAVE_ML_EPETRA) && defined(HAVE_ML_TEUCHOS) - paramList = paramListWithSubList; // swap - Teuchos::ParameterList adaptingParamList = paramList; // copy of paramList which is used to removed already interpreted parameters + // TODO alternative with standard parameterlist from ML user guide? - // - // Validate parameter list - // - { - bool validate = paramList.get("ML validate parameter list", true); /* true = default in ML */ - if (validate && defaultVals!="refmaxwell") { + if (defaultVals != "") { + TEUCHOS_TEST_FOR_EXCEPTION(defaultVals != "SA" && defaultVals != "NSSA" && defaultVals != "refmaxwell" && defaultVals != "Maxwell", Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter: only \"SA\", \"NSSA\", \"refmaxwell\" and \"Maxwell\" allowed as options for ML default parameters."); + Teuchos::ParameterList ML_defaultlist; + if (defaultVals == "refmaxwell") + ML_Epetra::SetDefaultsRefMaxwell(ML_defaultlist); + else + ML_Epetra::SetDefaults(defaultVals, ML_defaultlist); + // merge user parameters with default parameters + MueLu::MergeParameterList(paramList_in, ML_defaultlist, true); + paramList = ML_defaultlist; + } +#else + if (defaultVals != "") { + // If no validator available: issue a warning and set parameter value to false in the output list + *out << "Warning: MueLu_ENABLE_ML=OFF, ML_ENABLE_Epetra=OFF or ML_ENABLE_TEUCHOS=OFF. No ML default values available." << std::endl; + } +#endif // HAVE_MUELU_ML && HAVE_ML_EPETRA && HAVE_ML_TEUCHOS + + // + // Move smoothers/aggregation/coarse parameters to sublists + // + + // ML allows to have level-specific smoothers/aggregation/coarse parameters at the top level of the list or/and defined in sublists: + // See also: ML Guide section 6.4.1, MueLu::CreateSublists, ML_CreateSublists + ParameterList paramListWithSubList; + MueLu::CreateSublists(paramList, paramListWithSubList); + + paramList = paramListWithSubList; // swap + Teuchos::ParameterList adaptingParamList = paramList; // copy of paramList which is used to removed already interpreted parameters + + // + // Validate parameter list + // + { + bool validate = paramList.get("ML validate parameter list", true); /* true = default in ML */ + if (validate && defaultVals != "refmaxwell") { #if defined(HAVE_MUELU_ML) && defined(HAVE_ML_EPETRA) && defined(HAVE_ML_TEUCHOS) - // Validate parameter list using ML validator - int depth = paramList.get("ML validate depth", 5); /* 5 = default in ML */ - TEUCHOS_TEST_FOR_EXCEPTION(! ML_Epetra::ValidateMLPParameters(paramList, depth), Exceptions::RuntimeError, - "ERROR: ML's Teuchos::ParameterList contains incorrect parameter!"); + // Validate parameter list using ML validator + int depth = paramList.get("ML validate depth", 5); /* 5 = default in ML */ + TEUCHOS_TEST_FOR_EXCEPTION(!ML_Epetra::ValidateMLPParameters(paramList, depth), Exceptions::RuntimeError, + "ERROR: ML's Teuchos::ParameterList contains incorrect parameter!"); #else - // If no validator available: issue a warning and set parameter value to false in the output list - *out << "Warning: MueLu_ENABLE_ML=OFF, ML_ENABLE_Epetra=OFF or ML_ENABLE_TEUCHOS=OFF. The parameter list cannot be validated." << std::endl; - paramList.set("ML validate parameter list", false); - -#endif // HAVE_MUELU_ML && HAVE_ML_EPETRA && HAVE_ML_TEUCHOS - } // if(validate) - } // scope - - - { - // Special handling of ML's aux aggregation - // - // In ML, when "aggregation: aux: enable" == true, the threshold - // is set via "aggregation: aux: threshold" instead of - // "aggregation: threshold". In MueLu, we use "aggregation: drop - // tol" regardless of "sa: use filtering". So depending on - // "aggregation: aux: enable" we use either one or the other - // threshold to set "aggregation: drop tol". - if (paramListWithSubList.isParameter("aggregation: aux: enable") && paramListWithSubList.get("aggregation: aux: enable")) { - if (paramListWithSubList.isParameter("aggregation: aux: threshold")) { - paramListWithSubList.set("aggregation: threshold", paramListWithSubList.get("aggregation: aux: threshold")); - paramListWithSubList.remove("aggregation: aux: threshold"); - } - } - } - - // stringstream for concatenating xml parameter strings. - std::stringstream mueluss; + // If no validator available: issue a warning and set parameter value to false in the output list + *out << "Warning: MueLu_ENABLE_ML=OFF, ML_ENABLE_Epetra=OFF or ML_ENABLE_TEUCHOS=OFF. The parameter list cannot be validated." << std::endl; + paramList.set("ML validate parameter list", false); - // create surrounding MueLu parameter list - mueluss << "" << std::endl; +#endif // HAVE_MUELU_ML && HAVE_ML_EPETRA && HAVE_ML_TEUCHOS + } // if(validate) + } // scope - // make sure that MueLu's phase1 matches ML's - mueluss << "" << std::endl; - - // make sure that MueLu's phase2a matches ML's - mueluss << "" << std::endl; + { + // Special handling of ML's aux aggregation + // + // In ML, when "aggregation: aux: enable" == true, the threshold + // is set via "aggregation: aux: threshold" instead of + // "aggregation: threshold". In MueLu, we use "aggregation: drop + // tol" regardless of "sa: use filtering". So depending on + // "aggregation: aux: enable" we use either one or the other + // threshold to set "aggregation: drop tol". + if (paramListWithSubList.isParameter("aggregation: aux: enable") && paramListWithSubList.get("aggregation: aux: enable")) { + if (paramListWithSubList.isParameter("aggregation: aux: threshold")) { + paramListWithSubList.set("aggregation: threshold", paramListWithSubList.get("aggregation: aux: threshold")); + paramListWithSubList.remove("aggregation: aux: threshold"); + } + } + } - // make sure that MueLu's phase2b matches ML's - mueluss << "" << std::endl; + // stringstream for concatenating xml parameter strings. + std::stringstream mueluss; - // make sure that MueLu's drop tol matches ML's - mueluss << "" << std::endl; + // create surrounding MueLu parameter list + mueluss << "" << std::endl; + // make sure that MueLu's phase1 matches ML's + mueluss << "" << std::endl; - // loop over all ML parameters in provided parameter list - for (ParameterList::ConstIterator param = paramListWithSubList.begin(); param != paramListWithSubList.end(); ++param) { + // make sure that MueLu's phase2a matches ML's + mueluss << "" << std::endl; - // extract ML parameter name - const std::string & pname=paramListWithSubList.name(param); + // make sure that MueLu's phase2b matches ML's + mueluss << "" << std::endl; - // extract corresponding (ML) value - // remove ParameterList specific information from result string - std::stringstream valuess; - valuess << paramList.entry(param); - std::string valuestr = valuess.str(); - replaceAll(valuestr, "[unused]", ""); - replaceAll(valuestr, "[default]", ""); - valuestr = trim(valuestr); + // make sure that MueLu's drop tol matches ML's + mueluss << "" << std::endl; - // transform ML parameter to corresponding MueLu parameter and generate XML string - std::string valueInterpreterStr = "\"" + valuestr + "\""; - std::string ret = MasterList::interpretParameterName(MasterList::ML2MueLu(pname),valueInterpreterStr); + // loop over all ML parameters in provided parameter list + for (ParameterList::ConstIterator param = paramListWithSubList.begin(); param != paramListWithSubList.end(); ++param) { + // extract ML parameter name + const std::string& pname = paramListWithSubList.name(param); - if ((pname == "aggregation: aux: enable") && (paramListWithSubList.get("aggregation: aux: enable"))) { - mueluss << "" << std::endl; - } + // extract corresponding (ML) value + // remove ParameterList specific information from result string + std::stringstream valuess; + valuess << paramList.entry(param); + std::string valuestr = valuess.str(); + replaceAll(valuestr, "[unused]", ""); + replaceAll(valuestr, "[default]", ""); + valuestr = trim(valuestr); - // special handling for verbosity level - if (pname == "ML output") { - // Translate verbosity parameter - int verbosityLevel = std::stoi(valuestr); - std::string eVerbLevel = "none"; - if (verbosityLevel == 0) eVerbLevel = "none"; - if (verbosityLevel >= 1) eVerbLevel = "low"; - if (verbosityLevel >= 5) eVerbLevel = "medium"; - if (verbosityLevel >= 10) eVerbLevel = "high"; - if (verbosityLevel >= 11) eVerbLevel = "extreme"; - if (verbosityLevel >= 42) eVerbLevel = "test"; - if (verbosityLevel >= 666) eVerbLevel = "interfacetest"; - mueluss << "" << std::endl; - continue; - } + // transform ML parameter to corresponding MueLu parameter and generate XML string + std::string valueInterpreterStr = "\"" + valuestr + "\""; + std::string ret = MasterList::interpretParameterName(MasterList::ML2MueLu(pname), valueInterpreterStr); - // add XML string - if (ret != "") { - mueluss << ret << std::endl; + if ((pname == "aggregation: aux: enable") && (paramListWithSubList.get("aggregation: aux: enable"))) { + mueluss << "" << std::endl; + } - // remove parameter from ML parameter list - adaptingParamList.remove(pname,false); - } + // special handling for verbosity level + if (pname == "ML output") { + // Translate verbosity parameter + int verbosityLevel = std::stoi(valuestr); + std::string eVerbLevel = "none"; + if (verbosityLevel == 0) eVerbLevel = "none"; + if (verbosityLevel >= 1) eVerbLevel = "low"; + if (verbosityLevel >= 5) eVerbLevel = "medium"; + if (verbosityLevel >= 10) eVerbLevel = "high"; + if (verbosityLevel >= 11) eVerbLevel = "extreme"; + if (verbosityLevel >= 42) eVerbLevel = "test"; + if (verbosityLevel >= 666) eVerbLevel = "interfacetest"; + mueluss << "" << std::endl; + continue; + } - // special handling for energy minimization - // TAW: this is not optimal for symmetric problems but at least works. - // for symmetric problems the "energy minimization" parameter should not exist anyway... - if (pname == "energy minimization: enable") { - mueluss << "" << std::endl; - mueluss << "" << std::endl; - } + // add XML string + if (ret != "") { + mueluss << ret << std::endl; - // special handling for smoothers - if (pname == "smoother: type") { + // remove parameter from ML parameter list + adaptingParamList.remove(pname, false); + } - mueluss << GetSmootherFactory(paramList, adaptingParamList, pname, valuestr); + // special handling for energy minimization + // TAW: this is not optimal for symmetric problems but at least works. + // for symmetric problems the "energy minimization" parameter should not exist anyway... + if (pname == "energy minimization: enable") { + mueluss << "" << std::endl; + mueluss << "" << std::endl; + } - } + // special handling for smoothers + if (pname == "smoother: type") { + mueluss << GetSmootherFactory(paramList, adaptingParamList, pname, valuestr); + } - // special handling for level-specific smoothers - if (pname.find("smoother: list (level",0) == 0) { - // Scan pname (ex: pname="smoother: type (level 2)") - std::string type, option; - int levelID=-1; - { - typedef Teuchos::ArrayRCP::size_type size_type; - Teuchos::Array ctype (size_type(pname.size()+1)); - Teuchos::Array coption(size_type(pname.size()+1)); - - int matched = sscanf(pname.c_str(),"%s %[^(](level %d)", ctype.getRawPtr(), coption.getRawPtr(), &levelID); // use [^(] instead of %s to allow for strings with white-spaces (ex: "ifpack list") - type = std::string(ctype.getRawPtr()); - option = std::string(coption.getRawPtr()); option.resize(option.size () - 1); // remove final white-space - - if (matched != 3 || (type != "smoother:")) { - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "MueLu::CreateSublist(), Line " << __LINE__ << ". " - << "Error in creating level-specific sublists" << std::endl - << "Offending parameter: " << pname << std::endl); - } - - mueluss << "" << std::endl; - mueluss << GetSmootherFactory(paramList.sublist(pname),adaptingParamList.sublist(pname), "smoother: type", paramList.sublist(pname).get("smoother: type")); - mueluss << "" << std::endl; + // special handling for level-specific smoothers + if (pname.find("smoother: list (level", 0) == 0) { + // Scan pname (ex: pname="smoother: type (level 2)") + std::string type, option; + int levelID = -1; + { + typedef Teuchos::ArrayRCP::size_type size_type; + Teuchos::Array ctype(size_type(pname.size() + 1)); + Teuchos::Array coption(size_type(pname.size() + 1)); + + int matched = sscanf(pname.c_str(), "%s %[^(](level %d)", ctype.getRawPtr(), coption.getRawPtr(), &levelID); // use [^(] instead of %s to allow for strings with white-spaces (ex: "ifpack list") + type = std::string(ctype.getRawPtr()); + option = std::string(coption.getRawPtr()); + option.resize(option.size() - 1); // remove final white-space + + if (matched != 3 || (type != "smoother:")) { + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "MueLu::CreateSublist(), Line " << __LINE__ << ". " + << "Error in creating level-specific sublists" << std::endl + << "Offending parameter: " << pname << std::endl); } - } - - // special handling for coarse level - TEUCHOS_TEST_FOR_EXCEPTION(paramList.isParameter("coarse: type"), Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter::Setup(): The parameter \"coarse: type\" should not exist but being stored in \"coarse: list\" instead."); - if ( pname == "coarse: list" ) { - - // interpret smoother/coarse solver data. - // Note, that we inspect the "coarse: list" sublist to define the "coarse" smoother/solver - // Be aware, that MueLu::CreateSublists renames the prefix of the parameters in the "coarse: list" from "coarse" to "smoother". - // Therefore, we have to check the values of the "smoother" parameters - mueluss << GetSmootherFactory(paramList.sublist("coarse: list"), adaptingParamList.sublist("coarse: list"), "coarse: type", paramList.sublist("coarse: list").get("smoother: type")); - + mueluss << "" << std::endl; + mueluss << GetSmootherFactory(paramList.sublist(pname), adaptingParamList.sublist(pname), "smoother: type", paramList.sublist(pname).get("smoother: type")); + mueluss << "" << std::endl; } - } // for + } - mueluss << "" << std::endl; + // special handling for coarse level + TEUCHOS_TEST_FOR_EXCEPTION(paramList.isParameter("coarse: type"), Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter::Setup(): The parameter \"coarse: type\" should not exist but being stored in \"coarse: list\" instead."); + if (pname == "coarse: list") { + // interpret smoother/coarse solver data. + // Note, that we inspect the "coarse: list" sublist to define the "coarse" smoother/solver + // Be aware, that MueLu::CreateSublists renames the prefix of the parameters in the "coarse: list" from "coarse" to "smoother". + // Therefore, we have to check the values of the "smoother" parameters + mueluss << GetSmootherFactory(paramList.sublist("coarse: list"), adaptingParamList.sublist("coarse: list"), "coarse: type", paramList.sublist("coarse: list").get("smoother: type")); + } + } // for - return mueluss.str(); - } + mueluss << "" << std::endl; + return mueluss.str(); +} -} // namespace MueLu +} // namespace MueLu diff --git a/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.hpp b/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.hpp index 34697f6fd4e9..ee67d1a075d3 100644 --- a/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.hpp +++ b/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.hpp @@ -59,98 +59,95 @@ namespace MueLu { - /*! - @class ML2MueLuParameterTranslator class. - @brief Class that accepts ML-style parameters and builds a MueLu parameter list (easy input deck) - - This interpreter class is meant to make the transition from ML to MueLu easier. - */ - class ML2MueLuParameterTranslator { - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - ML2MueLuParameterTranslator() { } - - //! Destructor. - virtual ~ML2MueLuParameterTranslator() { } - - //@} - - //!@name Parameter translation from ML to MueLu - //@{ - - /// @brief: Translate ML parameters to MueLu parameter XML string - /// - /// @param [in] paramList_in: ML parameter list - /// @return std::string with MueLu XML parameters - static std::string translate(Teuchos::ParameterList & paramList, const std::string& defaultVals="") { - return SetParameterList(paramList, defaultVals); - } - - /// @brief: Translate ML parameters to MueLu parameter XML string - /// - /// @param [in] xmlFileName: file name with ML xml parameters - /// @return std::string with MueLu XML parameters - static std::string translate(const std::string & xmlFileName, const std::string& defaultVals="") { - Teuchos::RCP paramList = Teuchos::getParametersFromXmlFile(xmlFileName); - return SetParameterList(*paramList, defaultVals); - } - - //@} - - private: - - //! @name Parameter handling - //@{ - - /// @brief: Interpret parameter list - /// - /// @param [in] paramList_in: ML parameter list - /// @return std::string with MueLu XML parameters - static std::string SetParameterList(const Teuchos::ParameterList & paramList_in, const std::string& defaultVals); - - - /// @brief: Helper function which translates ML smoother/solver paramters to MueLu XML string - /// - /// @param [in] paramList: reference to Teuchos::ParameterList containing the ML smoother/solver parameters. - /// @param [in,out] adaptingParamList: reference to Teuchos::ParameterList containing the ML smoother/solver parameters. Note that the processed parameters are removed from the ParameterList. It can be used to detect non-interpreted ML parameters. - /// @param [in] pname: currently processed parameter TODO - /// @param [in] value: currently processed value TODO - static std::string GetSmootherFactory(const Teuchos::ParameterList& paramList, Teuchos::ParameterList& adaptingParamList, const std::string& pname, const std::string& value); - - //@} - - // - // helper routines - // - - // trim from start - static inline std::string <rim(std::string &s) { - s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c){return !std::isspace(c);})); - return s; - } - - // trim from end - static inline std::string &rtrim(std::string &s) { - s.erase(std::find_if(s.rbegin(), s.rend(), [](int c){return !std::isspace(c);}).base(), s.end()); - return s; - } - - // trim from both ends - static inline std::string &trim(std::string &s) { - return ltrim(rtrim(s)); - } - - //! @name Member variables - //@{ - //std::string xmlString_; ///! string containing MueLu XML parameters corresponding to ML parameters - //@} - - }; // class MLParameterListInterpreter - - -} // end namespace MueLu +/*! + @class ML2MueLuParameterTranslator class. + @brief Class that accepts ML-style parameters and builds a MueLu parameter list (easy input deck) + + This interpreter class is meant to make the transition from ML to MueLu easier. +*/ +class ML2MueLuParameterTranslator { + public: + //! @name Constructors/Destructors. + //@{ + + //! Constructor. + ML2MueLuParameterTranslator() {} + + //! Destructor. + virtual ~ML2MueLuParameterTranslator() {} + + //@} + + //!@name Parameter translation from ML to MueLu + //@{ + + /// @brief: Translate ML parameters to MueLu parameter XML string + /// + /// @param [in] paramList_in: ML parameter list + /// @return std::string with MueLu XML parameters + static std::string translate(Teuchos::ParameterList& paramList, const std::string& defaultVals = "") { + return SetParameterList(paramList, defaultVals); + } + + /// @brief: Translate ML parameters to MueLu parameter XML string + /// + /// @param [in] xmlFileName: file name with ML xml parameters + /// @return std::string with MueLu XML parameters + static std::string translate(const std::string& xmlFileName, const std::string& defaultVals = "") { + Teuchos::RCP paramList = Teuchos::getParametersFromXmlFile(xmlFileName); + return SetParameterList(*paramList, defaultVals); + } + + //@} + + private: + //! @name Parameter handling + //@{ + + /// @brief: Interpret parameter list + /// + /// @param [in] paramList_in: ML parameter list + /// @return std::string with MueLu XML parameters + static std::string SetParameterList(const Teuchos::ParameterList& paramList_in, const std::string& defaultVals); + + /// @brief: Helper function which translates ML smoother/solver paramters to MueLu XML string + /// + /// @param [in] paramList: reference to Teuchos::ParameterList containing the ML smoother/solver parameters. + /// @param [in,out] adaptingParamList: reference to Teuchos::ParameterList containing the ML smoother/solver parameters. Note that the processed parameters are removed from the ParameterList. It can be used to detect non-interpreted ML parameters. + /// @param [in] pname: currently processed parameter TODO + /// @param [in] value: currently processed value TODO + static std::string GetSmootherFactory(const Teuchos::ParameterList& paramList, Teuchos::ParameterList& adaptingParamList, const std::string& pname, const std::string& value); + + //@} + + // + // helper routines + // + + // trim from start + static inline std::string& ltrim(std::string& s) { + s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c) { return !std::isspace(c); })); + return s; + } + + // trim from end + static inline std::string& rtrim(std::string& s) { + s.erase(std::find_if(s.rbegin(), s.rend(), [](int c) { return !std::isspace(c); }).base(), s.end()); + return s; + } + + // trim from both ends + static inline std::string& trim(std::string& s) { + return ltrim(rtrim(s)); + } + + //! @name Member variables + //@{ + // std::string xmlString_; ///! string containing MueLu XML parameters corresponding to ML parameters + //@} + +}; // class MLParameterListInterpreter + +} // end namespace MueLu #endif /* MUELU_ML2MUELUPARAMETERTRANSLATOR_HPP */ diff --git a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp b/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp index de92e1a77b90..97f721807e87 100644 --- a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp +++ b/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp @@ -85,162 +85,155 @@ #include "MueLu_RebalanceMapFactory_fwd.hpp" #endif - #ifdef HAVE_MUELU_DEPRECATED_CODE #ifndef TRILINOS_HIDE_DEPRECATED_HEADER_WARNINGS #warning "The header file MueLu_MLParameterListInterpreter.hpp is deprecated" #endif -#else +#else #error "The header file MueLu_MLParameterListInterpreter.hpp is deprecated" #endif - - - - namespace MueLu { - /* - Utility that from an existing Teuchos::ParameterList creates a new list, in - which level-specific parameters are replaced with sublists. - - Currently, level-specific parameters that begin with "smoother:" - or "aggregation:" are placed in sublists. Coarse options are also placed - in a coarse list. - - Example: - Input: - smoother: type (level 0) = symmetric Gauss-Seidel - smoother: sweeps (level 0) = 1 - Output: - smoother: list (level 0) -> - smoother: type = symmetric Gauss-Seidel - smoother: sweeps = 1 - */ - // This function is a copy of ML_CreateSublists to avoid dependency on ML - // Throw exception on error instead of exit() - void CreateSublists(const ParameterList &List, ParameterList &newList); - - - /*! - @class MLParameterListInterpreter class. - @brief Class that accepts ML-style parameters and builds a MueLu preconditioner. - This interpreter uses the same default values as ML. This allows to compare ML/MueLu results - - The parameter list is validated only if the package ML is available and parameter "ML validate parameter list" is true. - TODO: A warning is issued if ML is not available - */ - - template - class MLParameterListInterpreter : public HierarchyManager { +/* + Utility that from an existing Teuchos::ParameterList creates a new list, in + which level-specific parameters are replaced with sublists. + + Currently, level-specific parameters that begin with "smoother:" + or "aggregation:" are placed in sublists. Coarse options are also placed + in a coarse list. + + Example: + Input: + smoother: type (level 0) = symmetric Gauss-Seidel + smoother: sweeps (level 0) = 1 + Output: + smoother: list (level 0) -> + smoother: type = symmetric Gauss-Seidel + smoother: sweeps = 1 +*/ +// This function is a copy of ML_CreateSublists to avoid dependency on ML +// Throw exception on error instead of exit() +void CreateSublists(const ParameterList& List, ParameterList& newList); + +/*! + @class MLParameterListInterpreter class. + @brief Class that accepts ML-style parameters and builds a MueLu preconditioner. + This interpreter uses the same default values as ML. This allows to compare ML/MueLu results + + The parameter list is validated only if the package ML is available and parameter "ML validate parameter list" is true. + TODO: A warning is issued if ML is not available +*/ + +template +class MLParameterListInterpreter : public HierarchyManager { #undef MUELU_MLPARAMETERLISTINTERPRETER_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - MLParameterListInterpreter() : nullspace_(NULL), blksize_(1) { } + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - //! @param paramList: parameter list with ML parameters - //! @param[in] comm (RCP >): Optional RCP of a Teuchos communicator (default: Teuchos::null) - //! @param factoryList: vector with RCP of FactoryBase objects - //! - //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. - //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML - //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - MLParameterListInterpreter(Teuchos::ParameterList & paramList, Teuchos::RCP > comm = Teuchos::null, std::vector > factoryList = std::vector >(0)); + //! Constructor. + MLParameterListInterpreter() + : nullspace_(NULL) + , blksize_(1) {} - //! Constructor. - //! @param xmlFileName: file name for XML file with ML parameters - //! @param factoryList: vector with RCP of FactoryBase objects - //! - //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. - //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML - //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - MLParameterListInterpreter(const std::string & xmlFileName,std::vector > factoryList = std::vector >(0)); + //! Constructor. + //! @param paramList: parameter list with ML parameters + //! @param[in] comm (RCP >): Optional RCP of a Teuchos communicator (default: Teuchos::null) + //! @param factoryList: vector with RCP of FactoryBase objects + //! + //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. + //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML + //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! + MLParameterListInterpreter(Teuchos::ParameterList& paramList, Teuchos::RCP > comm = Teuchos::null, std::vector > factoryList = std::vector >(0)); - //! Destructor. - virtual ~MLParameterListInterpreter() { } + //! Constructor. + //! @param xmlFileName: file name for XML file with ML parameters + //! @param factoryList: vector with RCP of FactoryBase objects + //! + //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. + //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML + //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! + MLParameterListInterpreter(const std::string& xmlFileName, std::vector > factoryList = std::vector >(0)); - //@} + //! Destructor. + virtual ~MLParameterListInterpreter() {} - //@{ + //@} - void SetParameterList(const Teuchos::ParameterList & paramList); + //@{ - //@} + void SetParameterList(const Teuchos::ParameterList& paramList); - //@{ + //@} - //! Setup Hierarchy object - virtual void SetupHierarchy(Hierarchy & H) const; + //@{ - //@} + //! Setup Hierarchy object + virtual void SetupHierarchy(Hierarchy& H) const; - //@{ + //@} - //! @name static helper functions translating parameter list to factories - //! @brief static helper functions that also can be used from outside for translating ML parameters into MueLu objects - //@{ + //@{ - //! Read smoother options and build the corresponding smoother factory - // @param AFact: Factory used by smoother to find 'A' - static RCP GetSmootherFactory(const Teuchos::ParameterList & paramList, const RCP & AFact = Teuchos::null); + //! @name static helper functions translating parameter list to factories + //! @brief static helper functions that also can be used from outside for translating ML parameters into MueLu objects + //@{ - //@} + //! Read smoother options and build the corresponding smoother factory + // @param AFact: Factory used by smoother to find 'A' + static RCP GetSmootherFactory(const Teuchos::ParameterList& paramList, const RCP& AFact = Teuchos::null); + //@} - //! @name Handling of additional user-specific transfer factories - //@{ - /*! @brief Add transfer factory in the end of list of transfer factories for RAPFactory. + //! @name Handling of additional user-specific transfer factories + //@{ + /*! @brief Add transfer factory in the end of list of transfer factories for RAPFactory. - This allows the user to add user-specific factories to the MueLu Hierarchy. The idea is to be able - to add some factories that write out some debug information etc. which are not handled by the ML - Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - */ - void AddTransferFactory(const RCP & factory); - - //! Returns number of transfer factories. - size_t NumTransferFactories() const; - //@} - - private: + This allows the user to add user-specific factories to the MueLu Hierarchy. The idea is to be able + to add some factories that write out some debug information etc. which are not handled by the ML + Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! + */ + void AddTransferFactory(const RCP& factory); - //! nullspace can be embedded in the ML parameter list - int nullspaceDim_; - double* nullspace_; //TODO: replace by Teuchos::ArrayRCP<> + //! Returns number of transfer factories. + size_t NumTransferFactories() const; + //@} - //! coordinates can be embedded in the ML parameter list - double* xcoord_; - double* ycoord_; - double* zcoord_; + private: + //! nullspace can be embedded in the ML parameter list + int nullspaceDim_; + double* nullspace_; // TODO: replace by Teuchos::ArrayRCP<> - //! list of user-defined transfer Factories - //! We use this vector to add some special user-given factories to the Hierarchy (RAPFactory) - //! This way the user can extend the standard functionality of the MLParameterListInterpreter beyond the - //! capabibilities of ML. - std::vector > TransferFacts_; + //! coordinates can be embedded in the ML parameter list + double* xcoord_; + double* ycoord_; + double* zcoord_; - //@{ Matrix configuration + //! list of user-defined transfer Factories + //! We use this vector to add some special user-given factories to the Hierarchy (RAPFactory) + //! This way the user can extend the standard functionality of the MLParameterListInterpreter beyond the + //! capabibilities of ML. + std::vector > TransferFacts_; - //! Setup Operator object - virtual void SetupOperator(Operator & Op) const; + //@{ Matrix configuration - //! Matrix configuration storage - int blksize_; + //! Setup Operator object + virtual void SetupOperator(Operator& Op) const; - //@} + //! Matrix configuration storage + int blksize_; - }; // class MLParameterListInterpreter + //@} -} // namespace MueLu +}; // class MLParameterListInterpreter +} // namespace MueLu #define MUELU_MLPARAMETERLISTINTERPRETER_SHORT #endif /* MUELU_MLPARAMETERLISTINTERPRETER_DECL_HPP */ diff --git a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_def.hpp b/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_def.hpp index 06f98e8dd2ad..c18c69acea82 100644 --- a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_def.hpp +++ b/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_def.hpp @@ -100,7 +100,6 @@ //#include "MueLu_RebalanceMapFactory.hpp" #endif - // Note: do not add options that are only recognized by MueLu. // TODO: this parameter list interpreter should force MueLu to use default ML parameters @@ -108,675 +107,668 @@ // Read a parameter value from a parameter list and store it into a variable named 'varName' #define MUELU_READ_PARAM(paramList, paramStr, varType, defaultValue, varName) \ - varType varName = defaultValue; if (paramList.isParameter(paramStr)) varName = paramList.get(paramStr); + varType varName = defaultValue; \ + if (paramList.isParameter(paramStr)) varName = paramList.get(paramStr); // Read a parameter value from a paraeter list and copy it into a new parameter list (with another parameter name) #define MUELU_COPY_PARAM(paramList, paramStr, varType, defaultValue, outParamList, outParamStr) \ - if (paramList.isParameter(paramStr)) \ - outParamList.set(outParamStr, paramList.get(paramStr)); \ - else outParamList.set(outParamStr, static_cast(defaultValue)); \ + if (paramList.isParameter(paramStr)) \ + outParamList.set(outParamStr, paramList.get(paramStr)); \ + else \ + outParamList.set(outParamStr, static_cast(defaultValue)); namespace MueLu { - template - MLParameterListInterpreter::MLParameterListInterpreter(Teuchos::ParameterList & paramList, Teuchos::RCP > comm, std::vector > factoryList) : nullspace_(NULL), xcoord_(NULL), ycoord_(NULL), zcoord_(NULL),TransferFacts_(factoryList), blksize_(1) { - - if (paramList.isParameter("xml parameter file")){ - std::string filename = paramList.get("xml parameter file",""); - if (filename.length() != 0) { - TEUCHOS_TEST_FOR_EXCEPTION(comm.is_null(), Exceptions::RuntimeError, "xml parameter file requires a valid comm"); - Teuchos::ParameterList paramList2 = paramList; - Teuchos::updateParametersFromXmlFileAndBroadcast(filename, Teuchos::Ptr(¶mList2),*comm); - paramList2.remove("xml parameter file"); - SetParameterList(paramList2); - } - else - SetParameterList(paramList); - } - else +template +MLParameterListInterpreter::MLParameterListInterpreter(Teuchos::ParameterList& paramList, Teuchos::RCP > comm, std::vector > factoryList) + : nullspace_(NULL) + , xcoord_(NULL) + , ycoord_(NULL) + , zcoord_(NULL) + , TransferFacts_(factoryList) + , blksize_(1) { + if (paramList.isParameter("xml parameter file")) { + std::string filename = paramList.get("xml parameter file", ""); + if (filename.length() != 0) { + TEUCHOS_TEST_FOR_EXCEPTION(comm.is_null(), Exceptions::RuntimeError, "xml parameter file requires a valid comm"); + Teuchos::ParameterList paramList2 = paramList; + Teuchos::updateParametersFromXmlFileAndBroadcast(filename, Teuchos::Ptr(¶mList2), *comm); + paramList2.remove("xml parameter file"); + SetParameterList(paramList2); + } else SetParameterList(paramList); + } else + SetParameterList(paramList); +} + +template +MLParameterListInterpreter::MLParameterListInterpreter(const std::string& xmlFileName, std::vector > factoryList) + : nullspace_(NULL) + , TransferFacts_(factoryList) + , blksize_(1) { + Teuchos::RCP paramList = Teuchos::getParametersFromXmlFile(xmlFileName); + SetParameterList(*paramList); +} + +template +void MLParameterListInterpreter::SetParameterList(const Teuchos::ParameterList& paramList_in) { + Teuchos::ParameterList paramList = paramList_in; + + // + // Read top-level of the parameter list + // + + // hard-coded default values == ML defaults according to the manual + MUELU_READ_PARAM(paramList, "ML output", int, 0, verbosityLevel); + MUELU_READ_PARAM(paramList, "max levels", int, 10, maxLevels); + MUELU_READ_PARAM(paramList, "PDE equations", int, 1, nDofsPerNode); + + MUELU_READ_PARAM(paramList, "coarse: max size", int, 128, maxCoarseSize); + + MUELU_READ_PARAM(paramList, "aggregation: type", std::string, "Uncoupled", agg_type); + // MUELU_READ_PARAM(paramList, "aggregation: threshold", double, 0.0, agg_threshold); + MUELU_READ_PARAM(paramList, "aggregation: damping factor", double, (double)4 / (double)3, agg_damping); + // MUELU_READ_PARAM(paramList, "aggregation: smoothing sweeps", int, 1, agg_smoothingsweeps); + MUELU_READ_PARAM(paramList, "aggregation: nodes per aggregate", int, 1, minPerAgg); + MUELU_READ_PARAM(paramList, "aggregation: keep Dirichlet bcs", bool, false, bKeepDirichletBcs); // This is a MueLu specific extension that does not exist in ML + MUELU_READ_PARAM(paramList, "aggregation: max neighbours already aggregated", int, 0, maxNbrAlreadySelected); // This is a MueLu specific extension that does not exist in M + MUELU_READ_PARAM(paramList, "aggregation: aux: enable", bool, false, agg_use_aux); + MUELU_READ_PARAM(paramList, "aggregation: aux: threshold", double, false, agg_aux_thresh); + + MUELU_READ_PARAM(paramList, "null space: type", std::string, "default vectors", nullspaceType); + MUELU_READ_PARAM(paramList, "null space: dimension", int, -1, nullspaceDim); // TODO: ML default not in documentation + MUELU_READ_PARAM(paramList, "null space: vectors", double*, NULL, nullspaceVec); // TODO: ML default not in documentation + + MUELU_READ_PARAM(paramList, "energy minimization: enable", bool, false, bEnergyMinimization); + + MUELU_READ_PARAM(paramList, "RAP: fix diagonal", bool, false, bFixDiagonal); // This is a MueLu specific extension that does not exist in ML + + MUELU_READ_PARAM(paramList, "x-coordinates", double*, NULL, xcoord); + MUELU_READ_PARAM(paramList, "y-coordinates", double*, NULL, ycoord); + MUELU_READ_PARAM(paramList, "z-coordinates", double*, NULL, zcoord); + + // + // Move smoothers/aggregation/coarse parameters to sublists + // + + // ML allows to have level-specific smoothers/aggregation/coarse parameters at the top level of the list or/and defined in sublists: + // See also: ML Guide section 6.4.1, MueLu::CreateSublists, ML_CreateSublists + ParameterList paramListWithSubList; + MueLu::CreateSublists(paramList, paramListWithSubList); + paramList = paramListWithSubList; // swap + + // pull out "use kokkos refactor" + bool setKokkosRefactor = false; + bool useKokkosRefactor = !Node::is_serial; + if (paramList.isType("use kokkos refactor")) { + useKokkosRefactor = paramList.get("use kokkos refactor"); + setKokkosRefactor = true; + paramList.remove("use kokkos refactor"); } - template - MLParameterListInterpreter::MLParameterListInterpreter(const std::string & xmlFileName, std::vector > factoryList) : nullspace_(NULL), TransferFacts_(factoryList), blksize_(1) { - Teuchos::RCP paramList = Teuchos::getParametersFromXmlFile(xmlFileName); - SetParameterList(*paramList); - } - - template - void MLParameterListInterpreter::SetParameterList(const Teuchos::ParameterList & paramList_in) { - Teuchos::ParameterList paramList = paramList_in; + // + // Validate parameter list + // - // - // Read top-level of the parameter list - // - - // hard-coded default values == ML defaults according to the manual - MUELU_READ_PARAM(paramList, "ML output", int, 0, verbosityLevel); - MUELU_READ_PARAM(paramList, "max levels", int, 10, maxLevels); - MUELU_READ_PARAM(paramList, "PDE equations", int, 1, nDofsPerNode); - - MUELU_READ_PARAM(paramList, "coarse: max size", int, 128, maxCoarseSize); - - MUELU_READ_PARAM(paramList, "aggregation: type", std::string, "Uncoupled", agg_type); - //MUELU_READ_PARAM(paramList, "aggregation: threshold", double, 0.0, agg_threshold); - MUELU_READ_PARAM(paramList, "aggregation: damping factor", double, (double)4/(double)3, agg_damping); - //MUELU_READ_PARAM(paramList, "aggregation: smoothing sweeps", int, 1, agg_smoothingsweeps); - MUELU_READ_PARAM(paramList, "aggregation: nodes per aggregate", int, 1, minPerAgg); - MUELU_READ_PARAM(paramList, "aggregation: keep Dirichlet bcs", bool, false, bKeepDirichletBcs); // This is a MueLu specific extension that does not exist in ML - MUELU_READ_PARAM(paramList, "aggregation: max neighbours already aggregated", int, 0, maxNbrAlreadySelected); // This is a MueLu specific extension that does not exist in M - MUELU_READ_PARAM(paramList, "aggregation: aux: enable", bool, false, agg_use_aux); - MUELU_READ_PARAM(paramList, "aggregation: aux: threshold", double, false, agg_aux_thresh); - - MUELU_READ_PARAM(paramList, "null space: type", std::string, "default vectors", nullspaceType); - MUELU_READ_PARAM(paramList, "null space: dimension", int, -1, nullspaceDim); // TODO: ML default not in documentation - MUELU_READ_PARAM(paramList, "null space: vectors", double*, NULL, nullspaceVec); // TODO: ML default not in documentation - - MUELU_READ_PARAM(paramList, "energy minimization: enable", bool, false, bEnergyMinimization); + { + bool validate = paramList.get("ML validate parameter list", true); /* true = default in ML */ + if (validate) { +#if defined(HAVE_MUELU_ML) && defined(HAVE_MUELU_EPETRA) + // Validate parameter list using ML validator + int depth = paramList.get("ML validate depth", 5); /* 5 = default in ML */ + TEUCHOS_TEST_FOR_EXCEPTION(!ML_Epetra::ValidateMLPParameters(paramList, depth), Exceptions::RuntimeError, + "ERROR: ML's Teuchos::ParameterList contains incorrect parameter!"); +#else + // If no validator available: issue a warning and set parameter value to false in the output list + this->GetOStream(Warnings0) << "Warning: MueLu_ENABLE_ML=OFF. The parameter list cannot be validated." << std::endl; + paramList.set("ML validate parameter list", false); + +#endif // HAVE_MUELU_ML + } // if(validate) + } // scope + + // Matrix option + blksize_ = nDofsPerNode; + + // Translate verbosity parameter + + // Translate verbosity parameter + MsgType eVerbLevel = None; + if (verbosityLevel == 0) eVerbLevel = None; + if (verbosityLevel >= 1) eVerbLevel = Low; + if (verbosityLevel >= 5) eVerbLevel = Medium; + if (verbosityLevel >= 10) eVerbLevel = High; + if (verbosityLevel >= 11) eVerbLevel = Extreme; + if (verbosityLevel >= 42) eVerbLevel = Test; + if (verbosityLevel >= 43) eVerbLevel = InterfaceTest; + this->verbosity_ = eVerbLevel; + + TEUCHOS_TEST_FOR_EXCEPTION(agg_type != "Uncoupled", Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter::SetParameterList(): parameter \"aggregation: type\": only 'Uncoupled' aggregation is supported."); + + // Create MueLu factories + RCP dropFact; + if (useKokkosRefactor) + dropFact = rcp(new CoalesceDropFactory_kokkos()); + else + dropFact = rcp(new CoalesceDropFactory()); + + if (agg_use_aux) { + dropFact->SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("distance laplacian"))); + dropFact->SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(agg_aux_thresh)); + } - MUELU_READ_PARAM(paramList, "RAP: fix diagonal", bool, false, bFixDiagonal); // This is a MueLu specific extension that does not exist in ML + // Uncoupled aggregation + RCP AggFact = Teuchos::null; + if (useKokkosRefactor) { + AggFact = rcp(new UncoupledAggregationFactory_kokkos()); + } else + AggFact = rcp(new UncoupledAggregationFactory()); + + AggFact->SetFactory("Graph", dropFact); + AggFact->SetFactory("DofsPerNode", dropFact); + AggFact->SetParameter("aggregation: preserve Dirichlet points", Teuchos::ParameterEntry(bKeepDirichletBcs)); + AggFact->SetParameter("aggregation: ordering", Teuchos::ParameterEntry(std::string("natural"))); + AggFact->SetParameter("aggregation: max selected neighbors", Teuchos::ParameterEntry(maxNbrAlreadySelected)); + AggFact->SetParameter("aggregation: min agg size", Teuchos::ParameterEntry(minPerAgg)); + + if (verbosityLevel > 3) { + std::ostringstream oss; + oss << "========================= Aggregate option summary  =========================" << std::endl; + oss << "min Nodes per aggregate :              " << minPerAgg << std::endl; + oss << "min # of root nbrs already aggregated : " << maxNbrAlreadySelected << std::endl; + oss << "aggregate ordering :                    natural" << std::endl; + oss << "=============================================================================" << std::endl; + this->GetOStream(Runtime1) << oss.str(); + } - MUELU_READ_PARAM(paramList, "x-coordinates", double*, NULL, xcoord); - MUELU_READ_PARAM(paramList, "y-coordinates", double*, NULL, ycoord); - MUELU_READ_PARAM(paramList, "z-coordinates", double*, NULL, zcoord); + RCP PFact; + RCP RFact; + RCP PtentFact; + if (useKokkosRefactor) + PtentFact = rcp(new TentativePFactory_kokkos()); + else + PtentFact = rcp(new TentativePFactory()); + if (agg_damping == 0.0 && bEnergyMinimization == false) { + // tentative prolongation operator (PA-AMG) + PFact = PtentFact; + RFact = rcp(new TransPFactory()); + } else if (agg_damping != 0.0 && bEnergyMinimization == false) { + // smoothed aggregation (SA-AMG) + RCP SaPFact; + if (useKokkosRefactor) + SaPFact = rcp(new SaPFactory_kokkos()); + else + SaPFact = rcp(new SaPFactory()); + SaPFact->SetParameter("sa: damping factor", ParameterEntry(agg_damping)); + PFact = SaPFact; + RFact = rcp(new TransPFactory()); + } else if (bEnergyMinimization == true) { + // Petrov Galerkin PG-AMG smoothed aggregation (energy minimization in ML) + PFact = rcp(new PgPFactory()); + RFact = rcp(new GenericRFactory()); + } + RCP AcFact = rcp(new RAPFactory()); + AcFact->SetParameter("RepairMainDiagonal", Teuchos::ParameterEntry(bFixDiagonal)); + for (size_t i = 0; i < TransferFacts_.size(); i++) { + AcFact->AddTransferFactory(TransferFacts_[i]); + } + // + // introduce rebalancing + // +#if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) + Teuchos::RCP RebalancedPFact = Teuchos::null; + Teuchos::RCP RebalancedRFact = Teuchos::null; + Teuchos::RCP RepartitionFact = Teuchos::null; + Teuchos::RCP RebalancedAFact = Teuchos::null; + + MUELU_READ_PARAM(paramList, "repartition: enable", int, 0, bDoRepartition); + if (bDoRepartition == 1) { + // The Factory Manager will be configured to return the rebalanced versions of P, R, A by default. + // Everytime we want to use the non-rebalanced versions, we need to explicitly define the generating factory. + RFact->SetFactory("P", PFact); // - // Move smoothers/aggregation/coarse parameters to sublists - // + AcFact->SetFactory("P", PFact); + AcFact->SetFactory("R", RFact); - // ML allows to have level-specific smoothers/aggregation/coarse parameters at the top level of the list or/and defined in sublists: - // See also: ML Guide section 6.4.1, MueLu::CreateSublists, ML_CreateSublists - ParameterList paramListWithSubList; - MueLu::CreateSublists(paramList, paramListWithSubList); - paramList = paramListWithSubList; // swap - - // pull out "use kokkos refactor" - bool setKokkosRefactor = false; - bool useKokkosRefactor = !Node::is_serial; - if (paramList.isType("use kokkos refactor")) { - useKokkosRefactor = paramList.get("use kokkos refactor"); - setKokkosRefactor = true; - paramList.remove("use kokkos refactor"); - } + // define rebalancing factory for coarse matrix + Teuchos::RCP > rebAmalgFact = Teuchos::rcp(new MueLu::AmalgamationFactory()); + rebAmalgFact->SetFactory("A", AcFact); - // - // Validate parameter list - // + MUELU_READ_PARAM(paramList, "repartition: max min ratio", double, 1.3, maxminratio); + MUELU_READ_PARAM(paramList, "repartition: min per proc", int, 512, minperproc); + // Repartitioning heuristic + RCP RepartitionHeuristicFact = Teuchos::rcp(new RepartitionHeuristicFactory()); { - bool validate = paramList.get("ML validate parameter list", true); /* true = default in ML */ - if (validate) { + Teuchos::ParameterList paramListRepFact; + paramListRepFact.set("repartition: min rows per proc", minperproc); + paramListRepFact.set("repartition: max imbalance", maxminratio); + RepartitionHeuristicFact->SetParameterList(paramListRepFact); + } + RepartitionHeuristicFact->SetFactory("A", AcFact); + + // create "Partition" + Teuchos::RCP > isoInterface = Teuchos::rcp(new MueLu::IsorropiaInterface()); + isoInterface->SetFactory("A", AcFact); + isoInterface->SetFactory("number of partitions", RepartitionHeuristicFact); + isoInterface->SetFactory("UnAmalgamationInfo", rebAmalgFact); + + // create "Partition" by unamalgamtion + Teuchos::RCP > repInterface = Teuchos::rcp(new MueLu::RepartitionInterface()); + repInterface->SetFactory("A", AcFact); + repInterface->SetFactory("number of partitions", RepartitionHeuristicFact); + repInterface->SetFactory("AmalgamatedPartition", isoInterface); + // repInterface->SetFactory("UnAmalgamationInfo", rebAmalgFact); // not necessary? + + // Repartitioning (creates "Importer" from "Partition") + RepartitionFact = Teuchos::rcp(new RepartitionFactory()); + RepartitionFact->SetFactory("A", AcFact); + RepartitionFact->SetFactory("number of partitions", RepartitionHeuristicFact); + RepartitionFact->SetFactory("Partition", repInterface); + + // Reordering of the transfer operators + RebalancedPFact = Teuchos::rcp(new RebalanceTransferFactory()); + RebalancedPFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Interpolation"))); + RebalancedPFact->SetFactory("P", PFact); + RebalancedPFact->SetFactory("Nullspace", PtentFact); + RebalancedPFact->SetFactory("Importer", RepartitionFact); + + RebalancedRFact = Teuchos::rcp(new RebalanceTransferFactory()); + RebalancedRFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Restriction"))); + RebalancedRFact->SetFactory("R", RFact); + RebalancedRFact->SetFactory("Importer", RepartitionFact); + + // Compute Ac from rebalanced P and R + RebalancedAFact = Teuchos::rcp(new RebalanceAcFactory()); + RebalancedAFact->SetFactory("A", AcFact); + } +#else // #ifdef HAVE_MUELU_ISORROPIA + // Get rid of [-Wunused] warnings + //(void) + // + // ^^^ FIXME (mfh 17 Nov 2013) That definitely doesn't compile. +#endif -#if defined(HAVE_MUELU_ML) && defined(HAVE_MUELU_EPETRA) - // Validate parameter list using ML validator - int depth = paramList.get("ML validate depth", 5); /* 5 = default in ML */ - TEUCHOS_TEST_FOR_EXCEPTION(! ML_Epetra::ValidateMLPParameters(paramList, depth), Exceptions::RuntimeError, - "ERROR: ML's Teuchos::ParameterList contains incorrect parameter!"); -#else - // If no validator available: issue a warning and set parameter value to false in the output list - this->GetOStream(Warnings0) << "Warning: MueLu_ENABLE_ML=OFF. The parameter list cannot be validated." << std::endl; - paramList.set("ML validate parameter list", false); + // + // Nullspace factory + // -#endif // HAVE_MUELU_ML - } // if(validate) - } // scope + // Set fine level nullspace + // extract pre-computed nullspace from ML parameter list + // store it in nullspace_ and nullspaceDim_ + if (nullspaceType != "default vectors") { + TEUCHOS_TEST_FOR_EXCEPTION(nullspaceType != "pre-computed", Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (no pre-computed null space). error."); + TEUCHOS_TEST_FOR_EXCEPTION(nullspaceDim == -1, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace dim == -1). error."); + TEUCHOS_TEST_FOR_EXCEPTION(nullspaceVec == NULL, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace == NULL). You have to provide a valid fine-level nullspace in \'null space: vectors\'"); + nullspaceDim_ = nullspaceDim; + nullspace_ = nullspaceVec; + } - // Matrix option - blksize_ = nDofsPerNode; + Teuchos::RCP nspFact = Teuchos::rcp(new NullspaceFactory("Nullspace")); + nspFact->SetFactory("Nullspace", PtentFact); - // Translate verbosity parameter + // Stash coordinates + xcoord_ = xcoord; + ycoord_ = ycoord; + zcoord_ = zcoord; - // Translate verbosity parameter - MsgType eVerbLevel = None; - if (verbosityLevel == 0) eVerbLevel = None; - if (verbosityLevel >= 1) eVerbLevel = Low; - if (verbosityLevel >= 5) eVerbLevel = Medium; - if (verbosityLevel >= 10) eVerbLevel = High; - if (verbosityLevel >= 11) eVerbLevel = Extreme; - if (verbosityLevel >= 42) eVerbLevel = Test; - if (verbosityLevel >= 43) eVerbLevel = InterfaceTest; - this->verbosity_ = eVerbLevel; + // + // Hierarchy + FactoryManager + // + // Hierarchy options + this->numDesiredLevel_ = maxLevels; + this->maxCoarseSize_ = maxCoarseSize; - TEUCHOS_TEST_FOR_EXCEPTION(agg_type != "Uncoupled", Exceptions::RuntimeError, - "MueLu::MLParameterListInterpreter::SetParameterList(): parameter \"aggregation: type\": only 'Uncoupled' aggregation is supported."); + // + // Coarse Smoother + // + ParameterList& coarseList = paramList.sublist("coarse: list"); + // check whether coarse solver is set properly. If not, set default coarse solver. + if (!coarseList.isParameter("smoother: type")) + coarseList.set("smoother: type", "Amesos-KLU"); // set default coarse solver according to ML 5.0 guide + RCP coarseFact = GetSmootherFactory(coarseList, Teuchos::null); - // Create MueLu factories - RCP dropFact; - if(useKokkosRefactor) - dropFact = rcp( new CoalesceDropFactory_kokkos() ); - else - dropFact = rcp( new CoalesceDropFactory() ); + // Smoothers Top Level Parameters - if (agg_use_aux) { - dropFact->SetParameter("aggregation: drop scheme",Teuchos::ParameterEntry(std::string("distance laplacian"))); - dropFact->SetParameter("aggregation: drop tol",Teuchos::ParameterEntry(agg_aux_thresh)); - } + RCP topLevelSmootherParam = ExtractSetOfParameters(paramList, "smoother"); - // Uncoupled aggregation - RCP AggFact = Teuchos::null; - if(useKokkosRefactor) { - AggFact = rcp( new UncoupledAggregationFactory_kokkos() ); - } - else - AggFact = rcp( new UncoupledAggregationFactory() ); - - AggFact->SetFactory("Graph", dropFact); - AggFact->SetFactory("DofsPerNode", dropFact); - AggFact->SetParameter("aggregation: preserve Dirichlet points", Teuchos::ParameterEntry(bKeepDirichletBcs)); - AggFact->SetParameter("aggregation: ordering", Teuchos::ParameterEntry(std::string("natural"))); - AggFact->SetParameter("aggregation: max selected neighbors", Teuchos::ParameterEntry(maxNbrAlreadySelected)); - AggFact->SetParameter("aggregation: min agg size", Teuchos::ParameterEntry(minPerAgg)); - - - if (verbosityLevel > 3) { - std::ostringstream oss; - oss << "========================= Aggregate option summary  =========================" << std::endl; - oss << "min Nodes per aggregate :              " << minPerAgg << std::endl; - oss << "min # of root nbrs already aggregated : " << maxNbrAlreadySelected << std::endl; - oss << "aggregate ordering :                    natural" << std::endl; - oss << "=============================================================================" << std::endl; - this->GetOStream(Runtime1) << oss.str(); - } + // - RCP PFact; - RCP RFact; - RCP PtentFact; - if(useKokkosRefactor) - PtentFact = rcp( new TentativePFactory_kokkos() ); - else - PtentFact = rcp( new TentativePFactory() ); - if (agg_damping == 0.0 && bEnergyMinimization == false) { - // tentative prolongation operator (PA-AMG) - PFact = PtentFact; - RFact = rcp( new TransPFactory() ); - } else if (agg_damping != 0.0 && bEnergyMinimization == false) { - // smoothed aggregation (SA-AMG) - RCP SaPFact; - if(useKokkosRefactor) - SaPFact = rcp( new SaPFactory_kokkos() ); - else - SaPFact = rcp( new SaPFactory() ); - SaPFact->SetParameter("sa: damping factor", ParameterEntry(agg_damping)); - PFact = SaPFact; - RFact = rcp( new TransPFactory() ); - } else if (bEnergyMinimization == true) { - // Petrov Galerkin PG-AMG smoothed aggregation (energy minimization in ML) - PFact = rcp( new PgPFactory() ); - RFact = rcp( new GenericRFactory() ); - } - - RCP AcFact = rcp( new RAPFactory() ); - AcFact->SetParameter("RepairMainDiagonal", Teuchos::ParameterEntry(bFixDiagonal)); - for (size_t i = 0; iAddTransferFactory(TransferFacts_[i]); - } + // Prepare factory managers + // TODO: smootherFact can be reuse accross level if same parameters/no specific parameterList + for (int levelID = 0; levelID < maxLevels; levelID++) { // - // introduce rebalancing + // Level FactoryManager // -#if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) - Teuchos::RCP RebalancedPFact = Teuchos::null; - Teuchos::RCP RebalancedRFact = Teuchos::null; - Teuchos::RCP RepartitionFact = Teuchos::null; - Teuchos::RCP RebalancedAFact = Teuchos::null; - MUELU_READ_PARAM(paramList, "repartition: enable", int, 0, bDoRepartition); - if (bDoRepartition == 1) { - // The Factory Manager will be configured to return the rebalanced versions of P, R, A by default. - // Everytime we want to use the non-rebalanced versions, we need to explicitly define the generating factory. - RFact->SetFactory("P", PFact); - // - AcFact->SetFactory("P", PFact); - AcFact->SetFactory("R", RFact); - - // define rebalancing factory for coarse matrix - Teuchos::RCP > rebAmalgFact = Teuchos::rcp(new MueLu::AmalgamationFactory()); - rebAmalgFact->SetFactory("A", AcFact); - - MUELU_READ_PARAM(paramList, "repartition: max min ratio", double, 1.3, maxminratio); - MUELU_READ_PARAM(paramList, "repartition: min per proc", int, 512, minperproc); - - // Repartitioning heuristic - RCP RepartitionHeuristicFact = Teuchos::rcp(new RepartitionHeuristicFactory()); - { - Teuchos::ParameterList paramListRepFact; - paramListRepFact.set("repartition: min rows per proc", minperproc); - paramListRepFact.set("repartition: max imbalance", maxminratio); - RepartitionHeuristicFact->SetParameterList(paramListRepFact); - } - RepartitionHeuristicFact->SetFactory("A", AcFact); - - // create "Partition" - Teuchos::RCP > isoInterface = Teuchos::rcp(new MueLu::IsorropiaInterface()); - isoInterface->SetFactory("A", AcFact); - isoInterface->SetFactory("number of partitions", RepartitionHeuristicFact); - isoInterface->SetFactory("UnAmalgamationInfo", rebAmalgFact); - - // create "Partition" by unamalgamtion - Teuchos::RCP > repInterface = Teuchos::rcp(new MueLu::RepartitionInterface()); - repInterface->SetFactory("A", AcFact); - repInterface->SetFactory("number of partitions", RepartitionHeuristicFact); - repInterface->SetFactory("AmalgamatedPartition", isoInterface); - //repInterface->SetFactory("UnAmalgamationInfo", rebAmalgFact); // not necessary? - - // Repartitioning (creates "Importer" from "Partition") - RepartitionFact = Teuchos::rcp(new RepartitionFactory()); - RepartitionFact->SetFactory("A", AcFact); - RepartitionFact->SetFactory("number of partitions", RepartitionHeuristicFact); - RepartitionFact->SetFactory("Partition", repInterface); - - // Reordering of the transfer operators - RebalancedPFact = Teuchos::rcp(new RebalanceTransferFactory()); - RebalancedPFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Interpolation"))); - RebalancedPFact->SetFactory("P", PFact); - RebalancedPFact->SetFactory("Nullspace", PtentFact); - RebalancedPFact->SetFactory("Importer", RepartitionFact); - - RebalancedRFact = Teuchos::rcp(new RebalanceTransferFactory()); - RebalancedRFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Restriction"))); - RebalancedRFact->SetFactory("R", RFact); - RebalancedRFact->SetFactory("Importer", RepartitionFact); - - // Compute Ac from rebalanced P and R - RebalancedAFact = Teuchos::rcp(new RebalanceAcFactory()); - RebalancedAFact->SetFactory("A", AcFact); - } -#else // #ifdef HAVE_MUELU_ISORROPIA - // Get rid of [-Wunused] warnings - //(void) - // - // ^^^ FIXME (mfh 17 Nov 2013) That definitely doesn't compile. -#endif + RCP manager = rcp(new FactoryManager()); + if (setKokkosRefactor) + manager->SetKokkosRefactor(useKokkosRefactor); // - // Nullspace factory + // Smoothers // - // Set fine level nullspace - // extract pre-computed nullspace from ML parameter list - // store it in nullspace_ and nullspaceDim_ - if (nullspaceType != "default vectors") { - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceType != "pre-computed", Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (no pre-computed null space). error."); - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceDim == -1, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace dim == -1). error."); - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceVec == NULL, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace == NULL). You have to provide a valid fine-level nullspace in \'null space: vectors\'"); - - nullspaceDim_ = nullspaceDim; - nullspace_ = nullspaceVec; - } - - Teuchos::RCP nspFact = Teuchos::rcp(new NullspaceFactory("Nullspace")); - nspFact->SetFactory("Nullspace", PtentFact); - - - // Stash coordinates - xcoord_ = xcoord; - ycoord_ = ycoord; - zcoord_ = zcoord; - + { + // Merge level-specific parameters with global parameters. level-specific parameters takes precedence. + // TODO: unit-test this part alone + ParameterList levelSmootherParam = GetMLSubList(paramList, "smoother", levelID); // copy + MergeParameterList(*topLevelSmootherParam, levelSmootherParam, false); /* false = do no overwrite levelSmootherParam parameters by topLevelSmootherParam parameters */ + // std::cout << std::endl << "Merged List for level " << levelID << std::endl; + // std::cout << levelSmootherParam << std::endl; - // - // Hierarchy + FactoryManager - // + RCP smootherFact = GetSmootherFactory(levelSmootherParam, Teuchos::null); // TODO: missing AFact input arg. - // Hierarchy options - this->numDesiredLevel_ = maxLevels; - this->maxCoarseSize_ = maxCoarseSize; + manager->SetFactory("Smoother", smootherFact); + } // - // Coarse Smoother + // Misc // - ParameterList& coarseList = paramList.sublist("coarse: list"); - // check whether coarse solver is set properly. If not, set default coarse solver. - if (!coarseList.isParameter("smoother: type")) - coarseList.set("smoother: type", "Amesos-KLU"); // set default coarse solver according to ML 5.0 guide - RCP coarseFact = GetSmootherFactory(coarseList, Teuchos::null); - - // Smoothers Top Level Parameters - - RCP topLevelSmootherParam = ExtractSetOfParameters(paramList, "smoother"); - - // - - // Prepare factory managers - // TODO: smootherFact can be reuse accross level if same parameters/no specific parameterList - - for (int levelID=0; levelID < maxLevels; levelID++) { - - // - // Level FactoryManager - // - - RCP manager = rcp(new FactoryManager()); - if (setKokkosRefactor) - manager->SetKokkosRefactor(useKokkosRefactor); - // - // Smoothers - // - - { - // Merge level-specific parameters with global parameters. level-specific parameters takes precedence. - // TODO: unit-test this part alone - - ParameterList levelSmootherParam = GetMLSubList(paramList, "smoother", levelID); // copy - MergeParameterList(*topLevelSmootherParam, levelSmootherParam, false); /* false = do no overwrite levelSmootherParam parameters by topLevelSmootherParam parameters */ - // std::cout << std::endl << "Merged List for level " << levelID << std::endl; - // std::cout << levelSmootherParam << std::endl; - - RCP smootherFact = GetSmootherFactory(levelSmootherParam, Teuchos::null); // TODO: missing AFact input arg. - - manager->SetFactory("Smoother", smootherFact); - } - - // - // Misc - // - - manager->SetFactory("CoarseSolver", coarseFact); // TODO: should not be done in the loop - manager->SetFactory("Graph", dropFact); - manager->SetFactory("Aggregates", AggFact); - manager->SetFactory("DofsPerNode", dropFact); - manager->SetFactory("Ptent", PtentFact); + manager->SetFactory("CoarseSolver", coarseFact); // TODO: should not be done in the loop + manager->SetFactory("Graph", dropFact); + manager->SetFactory("Aggregates", AggFact); + manager->SetFactory("DofsPerNode", dropFact); + manager->SetFactory("Ptent", PtentFact); #if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) if (bDoRepartition == 1) { manager->SetFactory("A", RebalancedAFact); manager->SetFactory("P", RebalancedPFact); manager->SetFactory("R", RebalancedRFact); - manager->SetFactory("Nullspace", RebalancedPFact); - manager->SetFactory("Importer", RepartitionFact); + manager->SetFactory("Nullspace", RebalancedPFact); + manager->SetFactory("Importer", RepartitionFact); } else { -#endif // #ifdef HAVE_MUELU_ISORROPIA - manager->SetFactory("Nullspace", nspFact); // use same nullspace factory throughout all multigrid levels - manager->SetFactory("A", AcFact); // same RAP factory for all levels - manager->SetFactory("P", PFact); // same prolongator and restrictor factories for all levels - manager->SetFactory("R", RFact); // same prolongator and restrictor factories for all levels +#endif // #ifdef HAVE_MUELU_ISORROPIA + manager->SetFactory("Nullspace", nspFact); // use same nullspace factory throughout all multigrid levels + manager->SetFactory("A", AcFact); // same RAP factory for all levels + manager->SetFactory("P", PFact); // same prolongator and restrictor factories for all levels + manager->SetFactory("R", RFact); // same prolongator and restrictor factories for all levels #if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) } #endif - this->AddFactoryManager(levelID, 1, manager); - } // for (level loop) - - } - - template - void MLParameterListInterpreter::SetupHierarchy(Hierarchy & H) const { - // if nullspace_ has already been extracted from ML parameter list - // make nullspace available for MueLu - if (nullspace_ != NULL) { - RCP fineLevel = H.GetLevel(0); - RCP Op = fineLevel->Get >("A"); - RCP A = rcp_dynamic_cast(Op); - if (!A.is_null()) { - const RCP rowMap = fineLevel->Get< RCP >("A")->getRowMap(); - RCP nullspace = MultiVectorFactory::Build(rowMap, nullspaceDim_, true); - - for ( size_t i=0; i < Teuchos::as(nullspaceDim_); i++) { - Teuchos::ArrayRCP nullspacei = nullspace->getDataNonConst(i); - const size_t myLength = nullspace->getLocalLength(); - - for (size_t j = 0; j < myLength; j++) { - nullspacei[j] = nullspace_[i*myLength + j]; - } + this->AddFactoryManager(levelID, 1, manager); + } // for (level loop) +} + +template +void MLParameterListInterpreter::SetupHierarchy(Hierarchy& H) const { + // if nullspace_ has already been extracted from ML parameter list + // make nullspace available for MueLu + if (nullspace_ != NULL) { + RCP fineLevel = H.GetLevel(0); + RCP Op = fineLevel->Get >("A"); + RCP A = rcp_dynamic_cast(Op); + if (!A.is_null()) { + const RCP rowMap = fineLevel->Get >("A")->getRowMap(); + RCP nullspace = MultiVectorFactory::Build(rowMap, nullspaceDim_, true); + + for (size_t i = 0; i < Teuchos::as(nullspaceDim_); i++) { + Teuchos::ArrayRCP nullspacei = nullspace->getDataNonConst(i); + const size_t myLength = nullspace->getLocalLength(); + + for (size_t j = 0; j < myLength; j++) { + nullspacei[j] = nullspace_[i * myLength + j]; } - - fineLevel->Set("Nullspace", nullspace); } + + fineLevel->Set("Nullspace", nullspace); } + } - // Do the same for coordinates - size_t num_coords = 0; - double * coordPTR[3]; - if (xcoord_) { - coordPTR[0] = xcoord_; + // Do the same for coordinates + size_t num_coords = 0; + double* coordPTR[3]; + if (xcoord_) { + coordPTR[0] = xcoord_; + num_coords++; + if (ycoord_) { + coordPTR[1] = ycoord_; num_coords++; - if (ycoord_) { - coordPTR[1] = ycoord_; + if (zcoord_) { + coordPTR[2] = zcoord_; num_coords++; - if (zcoord_) { - coordPTR[2] = zcoord_; - num_coords++; - } } } - if (num_coords){ - Teuchos::RCP fineLevel = H.GetLevel(0); - Teuchos::RCP Op = fineLevel->Get >("A"); - Teuchos::RCP A = rcp_dynamic_cast(Op); - if (!A.is_null()) { - const Teuchos::RCP rowMap = fineLevel->Get< RCP >("A")->getRowMap(); - Teuchos::RCP coordinates = MultiVectorFactory::Build(rowMap, num_coords, true); - - for ( size_t i=0; i < num_coords; i++) { - Teuchos::ArrayRCP coordsi = coordinates->getDataNonConst(i); - const size_t myLength = coordinates->getLocalLength(); - for (size_t j = 0; j < myLength; j++) { - coordsi[j] = coordPTR[i][j]; - } + } + if (num_coords) { + Teuchos::RCP fineLevel = H.GetLevel(0); + Teuchos::RCP Op = fineLevel->Get >("A"); + Teuchos::RCP A = rcp_dynamic_cast(Op); + if (!A.is_null()) { + const Teuchos::RCP rowMap = fineLevel->Get >("A")->getRowMap(); + Teuchos::RCP coordinates = MultiVectorFactory::Build(rowMap, num_coords, true); + + for (size_t i = 0; i < num_coords; i++) { + Teuchos::ArrayRCP coordsi = coordinates->getDataNonConst(i); + const size_t myLength = coordinates->getLocalLength(); + for (size_t j = 0; j < myLength; j++) { + coordsi[j] = coordPTR[i][j]; } - fineLevel->Set("Coordinates",coordinates); } + fineLevel->Set("Coordinates", coordinates); } - - HierarchyManager::SetupHierarchy(H); } - // TODO: code factorization with MueLu_ParameterListInterpreter. - template - RCP > - MLParameterListInterpreter:: - GetSmootherFactory (const Teuchos::ParameterList & paramList, - const RCP & AFact) - { - typedef Teuchos::ScalarTraits STS; - SC one = STS::one(); - - std::string type = "symmetric Gauss-Seidel"; // default - - // - // Get 'type' - // - -// //TODO: fix defaults!! - -// // Default coarse grid smoother -// std::string type; -// if ("smoother" == "coarse") { -// #if (defined(HAVE_MUELU_EPETRA) && defined( HAVE_MUELU_AMESOS)) || (defined(HAVE_MUELU_AMESOS2)) // FIXME: test is wrong (ex: compiled with Epetra&&Tpetra&&Amesos2 but without Amesos => error running Epetra problem) -// type = ""; // use default defined by AmesosSmoother or Amesos2Smoother -// #else -// type = "symmetric Gauss-Seidel"; // use a sym Gauss-Seidel (with no damping) as fallback "coarse solver" (TODO: needs Ifpack(2)) -// #endif -// } else { -// // TODO: default smoother? -// type = ""; -// } - - - if (paramList.isParameter("smoother: type")) type = paramList.get("smoother: type"); - TEUCHOS_TEST_FOR_EXCEPTION(type.empty(), Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no \"smoother: type\" in the smoother parameter list" << std::endl << paramList); - - // - // Create the smoother prototype - // - - RCP smooProto; - std::string ifpackType; - Teuchos::ParameterList smootherParamList; - - if (type == "Jacobi" || type == "Gauss-Seidel" || type == "symmetric Gauss-Seidel") { - if (type == "symmetric Gauss-Seidel") type = "Symmetric Gauss-Seidel"; // FIXME - - ifpackType = "RELAXATION"; - smootherParamList.set("relaxation: type", type); - - MUELU_COPY_PARAM(paramList, "smoother: sweeps", int, 2, smootherParamList, "relaxation: sweeps"); - MUELU_COPY_PARAM(paramList, "smoother: damping factor", Scalar, one, smootherParamList, "relaxation: damping factor"); - - smooProto = rcp( new TrilinosSmoother(ifpackType, smootherParamList, 0) ); - smooProto->SetFactory("A", AFact); - - } else if (type == "Chebyshev" || type == "MLS") { - - ifpackType = "CHEBYSHEV"; - - MUELU_COPY_PARAM(paramList, "smoother: sweeps", int, 2, smootherParamList, "chebyshev: degree"); - if (paramList.isParameter("smoother: MLS alpha")) { - MUELU_COPY_PARAM(paramList, "smoother: MLS alpha", double, 20, smootherParamList, "chebyshev: ratio eigenvalue"); - } else { - MUELU_COPY_PARAM(paramList, "smoother: Chebyshev alpha", double, 20, smootherParamList, "chebyshev: ratio eigenvalue"); - } - - - smooProto = rcp( new TrilinosSmoother(ifpackType, smootherParamList, 0) ); - smooProto->SetFactory("A", AFact); + HierarchyManager::SetupHierarchy(H); +} + +// TODO: code factorization with MueLu_ParameterListInterpreter. +template +RCP > +MLParameterListInterpreter:: + GetSmootherFactory(const Teuchos::ParameterList& paramList, + const RCP& AFact) { + typedef Teuchos::ScalarTraits STS; + SC one = STS::one(); + + std::string type = "symmetric Gauss-Seidel"; // default + + // + // Get 'type' + // + + // //TODO: fix defaults!! + + // // Default coarse grid smoother + // std::string type; + // if ("smoother" == "coarse") { + // #if (defined(HAVE_MUELU_EPETRA) && defined( HAVE_MUELU_AMESOS)) || (defined(HAVE_MUELU_AMESOS2)) // FIXME: test is wrong (ex: compiled with Epetra&&Tpetra&&Amesos2 but without Amesos => error running Epetra problem) + // type = ""; // use default defined by AmesosSmoother or Amesos2Smoother + // #else + // type = "symmetric Gauss-Seidel"; // use a sym Gauss-Seidel (with no damping) as fallback "coarse solver" (TODO: needs Ifpack(2)) + // #endif + // } else { + // // TODO: default smoother? + // type = ""; + // } + + if (paramList.isParameter("smoother: type")) type = paramList.get("smoother: type"); + TEUCHOS_TEST_FOR_EXCEPTION(type.empty(), Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no \"smoother: type\" in the smoother parameter list" << std::endl + << paramList); + + // + // Create the smoother prototype + // + + RCP smooProto; + std::string ifpackType; + Teuchos::ParameterList smootherParamList; + + if (type == "Jacobi" || type == "Gauss-Seidel" || type == "symmetric Gauss-Seidel") { + if (type == "symmetric Gauss-Seidel") type = "Symmetric Gauss-Seidel"; // FIXME + + ifpackType = "RELAXATION"; + smootherParamList.set("relaxation: type", type); + + MUELU_COPY_PARAM(paramList, "smoother: sweeps", int, 2, smootherParamList, "relaxation: sweeps"); + MUELU_COPY_PARAM(paramList, "smoother: damping factor", Scalar, one, smootherParamList, "relaxation: damping factor"); + + smooProto = rcp(new TrilinosSmoother(ifpackType, smootherParamList, 0)); + smooProto->SetFactory("A", AFact); + + } else if (type == "Chebyshev" || type == "MLS") { + ifpackType = "CHEBYSHEV"; + + MUELU_COPY_PARAM(paramList, "smoother: sweeps", int, 2, smootherParamList, "chebyshev: degree"); + if (paramList.isParameter("smoother: MLS alpha")) { + MUELU_COPY_PARAM(paramList, "smoother: MLS alpha", double, 20, smootherParamList, "chebyshev: ratio eigenvalue"); + } else { + MUELU_COPY_PARAM(paramList, "smoother: Chebyshev alpha", double, 20, smootherParamList, "chebyshev: ratio eigenvalue"); + } - } else if (type == "Hiptmair") { - ifpackType = "HIPTMAIR"; - std::string subSmootherType = "Chebyshev"; - if (paramList.isParameter("subsmoother: type")) - subSmootherType = paramList.get("subsmoother: type"); - std::string subSmootherIfpackType; - if (subSmootherType == "Chebyshev") - subSmootherIfpackType = "CHEBYSHEV"; - else if (subSmootherType == "Jacobi" || subSmootherType == "Gauss-Seidel" || subSmootherType == "symmetric Gauss-Seidel") { - if (subSmootherType == "symmetric Gauss-Seidel") subSmootherType = "Symmetric Gauss-Seidel"; // FIXME - subSmootherIfpackType = "RELAXATION"; - } else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << subSmootherType << "' not supported by MueLu."); - - smootherParamList.set("hiptmair: smoother type 1", subSmootherIfpackType); - smootherParamList.set("hiptmair: smoother type 2", subSmootherIfpackType); - - auto smoother1ParamList = smootherParamList.sublist("hiptmair: smoother list 1"); - auto smoother2ParamList = smootherParamList.sublist("hiptmair: smoother list 2"); - - if (subSmootherType == "Chebyshev") { - MUELU_COPY_PARAM(paramList, "subsmoother: edge sweeps", int, 2, smoother1ParamList, "chebyshev: degree"); - MUELU_COPY_PARAM(paramList, "subsmoother: node sweeps", int, 2, smoother2ParamList, "chebyshev: degree"); - - MUELU_COPY_PARAM(paramList, "subsmoother: Chebyshev", double, 20, smoother1ParamList, "chebyshev: ratio eigenvalue"); - MUELU_COPY_PARAM(paramList, "subsmoother: Chebyshev", double, 20, smoother2ParamList, "chebyshev: ratio eigenvalue"); - } else { - MUELU_COPY_PARAM(paramList, "subsmoother: edge sweeps", int, 2, smoother1ParamList, "relaxation: sweeps"); - MUELU_COPY_PARAM(paramList, "subsmoother: node sweeps", int, 2, smoother2ParamList, "relaxation: sweeps"); - - MUELU_COPY_PARAM(paramList, "subsmoother: SGS damping factor", double, 0.8, smoother2ParamList, "relaxation: damping factor"); - } + smooProto = rcp(new TrilinosSmoother(ifpackType, smootherParamList, 0)); + smooProto->SetFactory("A", AFact); + + } else if (type == "Hiptmair") { + ifpackType = "HIPTMAIR"; + std::string subSmootherType = "Chebyshev"; + if (paramList.isParameter("subsmoother: type")) + subSmootherType = paramList.get("subsmoother: type"); + std::string subSmootherIfpackType; + if (subSmootherType == "Chebyshev") + subSmootherIfpackType = "CHEBYSHEV"; + else if (subSmootherType == "Jacobi" || subSmootherType == "Gauss-Seidel" || subSmootherType == "symmetric Gauss-Seidel") { + if (subSmootherType == "symmetric Gauss-Seidel") subSmootherType = "Symmetric Gauss-Seidel"; // FIXME + subSmootherIfpackType = "RELAXATION"; + } else + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << subSmootherType << "' not supported by MueLu."); + + smootherParamList.set("hiptmair: smoother type 1", subSmootherIfpackType); + smootherParamList.set("hiptmair: smoother type 2", subSmootherIfpackType); + + auto smoother1ParamList = smootherParamList.sublist("hiptmair: smoother list 1"); + auto smoother2ParamList = smootherParamList.sublist("hiptmair: smoother list 2"); + + if (subSmootherType == "Chebyshev") { + MUELU_COPY_PARAM(paramList, "subsmoother: edge sweeps", int, 2, smoother1ParamList, "chebyshev: degree"); + MUELU_COPY_PARAM(paramList, "subsmoother: node sweeps", int, 2, smoother2ParamList, "chebyshev: degree"); + + MUELU_COPY_PARAM(paramList, "subsmoother: Chebyshev", double, 20, smoother1ParamList, "chebyshev: ratio eigenvalue"); + MUELU_COPY_PARAM(paramList, "subsmoother: Chebyshev", double, 20, smoother2ParamList, "chebyshev: ratio eigenvalue"); + } else { + MUELU_COPY_PARAM(paramList, "subsmoother: edge sweeps", int, 2, smoother1ParamList, "relaxation: sweeps"); + MUELU_COPY_PARAM(paramList, "subsmoother: node sweeps", int, 2, smoother2ParamList, "relaxation: sweeps"); + MUELU_COPY_PARAM(paramList, "subsmoother: SGS damping factor", double, 0.8, smoother2ParamList, "relaxation: damping factor"); + } - smooProto = rcp( new TrilinosSmoother(ifpackType, smootherParamList, 0) ); - smooProto->SetFactory("A", AFact); + smooProto = rcp(new TrilinosSmoother(ifpackType, smootherParamList, 0)); + smooProto->SetFactory("A", AFact); - } else if (type == "IFPACK") { // TODO: this option is not described in the ML Guide v5.0 + } else if (type == "IFPACK") { // TODO: this option is not described in the ML Guide v5.0 #if defined(HAVE_MUELU_EPETRA) && defined(HAVE_MUELU_IFPACK) - ifpackType = paramList.get("smoother: ifpack type"); - - if (ifpackType == "ILU") { - // TODO fix this (type mismatch double vs. int) - //MUELU_COPY_PARAM(paramList, "smoother: ifpack level-of-fill", double /*int*/, 0.0 /*2*/, smootherParamList, "fact: level-of-fill"); - if (paramList.isParameter("smoother: ifpack level-of-fill")) - smootherParamList.set("fact: level-of-fill", Teuchos::as(paramList.get("smoother: ifpack level-of-fill"))); - else smootherParamList.set("fact: level-of-fill", as(0)); - - MUELU_COPY_PARAM(paramList, "smoother: ifpack overlap", int, 2, smootherParamList, "partitioner: overlap"); - - // TODO change to TrilinosSmoother as soon as Ifpack2 supports all preconditioners from Ifpack - smooProto = - MueLu::GetIfpackSmoother (ifpackType, - smootherParamList, - paramList.get ("smoother: ifpack overlap")); - smooProto->SetFactory("A", AFact); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown ML smoother type " + type + " (IFPACK) not supported by MueLu. Only ILU is supported."); - } -#else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: MueLu compiled without Ifpack support"); -#endif + ifpackType = paramList.get("smoother: ifpack type"); - } else if (type.length() > strlen("Amesos") && type.substr(0, strlen("Amesos")) == "Amesos") { /* catch Amesos-* */ - std::string solverType = type.substr(strlen("Amesos")+1); /* ("Amesos-KLU" -> "KLU") */ - - // Validator: following upper/lower case is what is allowed by ML - bool valid = false; - const int validatorSize = 5; - std::string validator[validatorSize] = {"Superlu", "Superludist", "KLU", "UMFPACK", "MUMPS"}; /* TODO: should "" be allowed? */ - for (int i=0; i < validatorSize; i++) { if (validator[i] == solverType) valid = true; } - TEUCHOS_TEST_FOR_EXCEPTION(!valid, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << type << "' not supported."); + if (ifpackType == "ILU") { + // TODO fix this (type mismatch double vs. int) + // MUELU_COPY_PARAM(paramList, "smoother: ifpack level-of-fill", double /*int*/, 0.0 /*2*/, smootherParamList, "fact: level-of-fill"); + if (paramList.isParameter("smoother: ifpack level-of-fill")) + smootherParamList.set("fact: level-of-fill", Teuchos::as(paramList.get("smoother: ifpack level-of-fill"))); + else + smootherParamList.set("fact: level-of-fill", as(0)); - // FIXME: MueLu should accept any Upper/Lower case. Not the case for the moment - std::transform(solverType.begin()+1, solverType.end(), solverType.begin()+1, ::tolower); + MUELU_COPY_PARAM(paramList, "smoother: ifpack overlap", int, 2, smootherParamList, "partitioner: overlap"); - smooProto = Teuchos::rcp( new DirectSolver(solverType, Teuchos::ParameterList()) ); + // TODO change to TrilinosSmoother as soon as Ifpack2 supports all preconditioners from Ifpack + smooProto = + MueLu::GetIfpackSmoother(ifpackType, + smootherParamList, + paramList.get("smoother: ifpack overlap")); smooProto->SetFactory("A", AFact); - } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown ML smoother type " + type + " (IFPACK) not supported by MueLu. Only ILU is supported."); + } +#else + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: MueLu compiled without Ifpack support"); +#endif - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << type << "' not supported by MueLu."); + } else if (type.length() > strlen("Amesos") && type.substr(0, strlen("Amesos")) == "Amesos") { /* catch Amesos-* */ + std::string solverType = type.substr(strlen("Amesos") + 1); /* ("Amesos-KLU" -> "KLU") */ + // Validator: following upper/lower case is what is allowed by ML + bool valid = false; + const int validatorSize = 5; + std::string validator[validatorSize] = {"Superlu", "Superludist", "KLU", "UMFPACK", "MUMPS"}; /* TODO: should "" be allowed? */ + for (int i = 0; i < validatorSize; i++) { + if (validator[i] == solverType) valid = true; } - TEUCHOS_TEST_FOR_EXCEPTION(smooProto == Teuchos::null, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no smoother prototype. fatal error."); - - // - // Create the smoother factory - // + TEUCHOS_TEST_FOR_EXCEPTION(!valid, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << type << "' not supported."); - RCP SmooFact = rcp( new SmootherFactory() ); + // FIXME: MueLu should accept any Upper/Lower case. Not the case for the moment + std::transform(solverType.begin() + 1, solverType.end(), solverType.begin() + 1, ::tolower); - // Set parameters of the smoother factory - MUELU_READ_PARAM(paramList, "smoother: pre or post", std::string, "both", preOrPost); - if (preOrPost == "both") { - SmooFact->SetSmootherPrototypes(smooProto, smooProto); - } else if (preOrPost == "pre") { - SmooFact->SetSmootherPrototypes(smooProto, Teuchos::null); - } else if (preOrPost == "post") { - SmooFact->SetSmootherPrototypes(Teuchos::null, smooProto); - } + smooProto = Teuchos::rcp(new DirectSolver(solverType, Teuchos::ParameterList())); + smooProto->SetFactory("A", AFact); - return SmooFact; + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << type << "' not supported by MueLu."); } - - template - void MLParameterListInterpreter::AddTransferFactory(const RCP& factory) { - // check if it's a TwoLevelFactoryBase based transfer factory - TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, "Transfer factory is not derived from TwoLevelFactoryBase. Since transfer factories will be handled by the RAPFactory they have to be derived from TwoLevelFactoryBase!"); - TransferFacts_.push_back(factory); + TEUCHOS_TEST_FOR_EXCEPTION(smooProto == Teuchos::null, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no smoother prototype. fatal error."); + + // + // Create the smoother factory + // + + RCP SmooFact = rcp(new SmootherFactory()); + + // Set parameters of the smoother factory + MUELU_READ_PARAM(paramList, "smoother: pre or post", std::string, "both", preOrPost); + if (preOrPost == "both") { + SmooFact->SetSmootherPrototypes(smooProto, smooProto); + } else if (preOrPost == "pre") { + SmooFact->SetSmootherPrototypes(smooProto, Teuchos::null); + } else if (preOrPost == "post") { + SmooFact->SetSmootherPrototypes(Teuchos::null, smooProto); } - template - size_t MLParameterListInterpreter::NumTransferFactories() const { - return TransferFacts_.size(); - } + return SmooFact; +} - template - void MLParameterListInterpreter::SetupOperator(Operator & Op) const { - try { - Matrix& A = dynamic_cast(Op); - if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blksize_)) - this->GetOStream(Warnings0) << "Setting matrix block size to " << blksize_ << " (value of the parameter in the list) " - << "instead of " << A.GetFixedBlockSize() << " (provided matrix)." << std::endl; +template +void MLParameterListInterpreter::AddTransferFactory(const RCP& factory) { + // check if it's a TwoLevelFactoryBase based transfer factory + TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, "Transfer factory is not derived from TwoLevelFactoryBase. Since transfer factories will be handled by the RAPFactory they have to be derived from TwoLevelFactoryBase!"); + TransferFacts_.push_back(factory); +} - A.SetFixedBlockSize(blksize_); +template +size_t MLParameterListInterpreter::NumTransferFactories() const { + return TransferFacts_.size(); +} + +template +void MLParameterListInterpreter::SetupOperator(Operator& Op) const { + try { + Matrix& A = dynamic_cast(Op); + if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blksize_)) + this->GetOStream(Warnings0) << "Setting matrix block size to " << blksize_ << " (value of the parameter in the list) " + << "instead of " << A.GetFixedBlockSize() << " (provided matrix)." << std::endl; + + A.SetFixedBlockSize(blksize_); #ifdef HAVE_MUELU_DEBUG - MatrixUtils::checkLocalRowMapMatchesColMap(A); -#endif // HAVE_MUELU_DEBUG + MatrixUtils::checkLocalRowMapMatchesColMap(A); +#endif // HAVE_MUELU_DEBUG - } catch (std::bad_cast&) { - this->GetOStream(Warnings0) << "Skipping setting block size as the operator is not a matrix" << std::endl; - } + } catch (std::bad_cast&) { + this->GetOStream(Warnings0) << "Skipping setting block size as the operator is not a matrix" << std::endl; } +} -} // namespace MueLu - +} // namespace MueLu #define MUELU_MLPARAMETERLISTINTERPRETER_SHORT #endif /* MUELU_MLPARAMETERLISTINTERPRETER_DEF_HPP */ -//TODO: see if it can be factorized with ML interpreter (ex: generation of Ifpack param list) +// TODO: see if it can be factorized with ML interpreter (ex: generation of Ifpack param list) diff --git a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter.cpp b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter.cpp index fe96d582432b..6d646b407a4d 100644 --- a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter.cpp +++ b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter.cpp @@ -51,44 +51,44 @@ namespace MueLu { - size_t LevenshteinDistance(const char* s, size_t len_s, const char* t, size_t len_t) { - // degenerate cases - if (len_s == 0) return len_t; - if (len_t == 0) return len_s; - if (!strncmp(s, t, std::min(len_s, len_t))) return 0; +size_t LevenshteinDistance(const char* s, size_t len_s, const char* t, size_t len_t) { + // degenerate cases + if (len_s == 0) return len_t; + if (len_t == 0) return len_s; + if (!strncmp(s, t, std::min(len_s, len_t))) return 0; - // create two work vectors of integer distances - size_t len = len_t + 1; - std::vector v0(len); - std::vector v1(len); + // create two work vectors of integer distances + size_t len = len_t + 1; + std::vector v0(len); + std::vector v1(len); - // initialize v0 (the previous row of distances) - // this row is A[0][i]: edit distance for an empty s - // the distance is just the number of characters to delete from t - for (size_t i = 0; i < len; i++) - v0[i] = i; + // initialize v0 (the previous row of distances) + // this row is A[0][i]: edit distance for an empty s + // the distance is just the number of characters to delete from t + for (size_t i = 0; i < len; i++) + v0[i] = i; - for (size_t i = 0; i < len_s; i++) { - // calculate v1 (current row distances) from the previous row v0 + for (size_t i = 0; i < len_s; i++) { + // calculate v1 (current row distances) from the previous row v0 - // first element of v1 is A[i+1][0] - // edit distance is delete (i+1) chars from s to match empty t - v1[0] = i + 1; + // first element of v1 is A[i+1][0] + // edit distance is delete (i+1) chars from s to match empty t + v1[0] = i + 1; - // use formula to fill in the rest of the row - for (size_t j = 0; j < len_t; j++) { - size_t cost = (s[i] == t[j]) ? 0 : 1; - v1[j+1] = std::min(v1[j] + 1, - std::min(v0[j + 1] + 1, - v0[j] + cost)); - } - - // copy v1 (current row) to v0 (previous row) for next iteration - for (size_t j = 0; j < len; j++) - v0[j] = v1[j]; + // use formula to fill in the rest of the row + for (size_t j = 0; j < len_t; j++) { + size_t cost = (s[i] == t[j]) ? 0 : 1; + v1[j + 1] = std::min(v1[j] + 1, + std::min(v0[j + 1] + 1, + v0[j] + cost)); } - return v1[len_t]; + // copy v1 (current row) to v0 (previous row) for next iteration + for (size_t j = 0; j < len; j++) + v0[j] = v1[j]; } + return v1[len_t]; } + +} // namespace MueLu diff --git a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_decl.hpp b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_decl.hpp index 395dc4231ffc..d4554d8016db 100644 --- a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_decl.hpp +++ b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_decl.hpp @@ -99,7 +99,6 @@ #include "MueLu_SingleLevelMatlabFactory_fwd.hpp" #endif - #include "MueLu_CoalesceDropFactory_kokkos_fwd.hpp" #include "MueLu_NullspaceFactory_kokkos_fwd.hpp" #include "MueLu_SaPFactory_kokkos_fwd.hpp" @@ -113,184 +112,180 @@ namespace MueLu { - template - class ParameterListInterpreter : - public HierarchyManager { +template +class ParameterListInterpreter : public HierarchyManager { #undef MUELU_PARAMETERLISTINTERPRETER_SHORT #include "MueLu_UseShortNames.hpp" - typedef std::pair keep_pair; + typedef std::pair keep_pair; - public: - //! @name Constructors/Destructors - //@{ + public: + //! @name Constructors/Destructors + //@{ - protected: - /*! @brief Empty constructor - * - * Constructor for derived classes - */ - ParameterListInterpreter() { - factFact_ = Teuchos::null; - facadeFact_ = Teuchos::rcp(new FacadeClassFactory()); - } + protected: + /*! @brief Empty constructor + * + * Constructor for derived classes + */ + ParameterListInterpreter() { + factFact_ = Teuchos::null; + facadeFact_ = Teuchos::rcp(new FacadeClassFactory()); + } - public: - /*! @brief Constructor that accepts a user-provided ParameterList. + public: + /*! @brief Constructor that accepts a user-provided ParameterList. - Constructor for parameter list interpreter which directly interprets Teuchos::ParameterLists + Constructor for parameter list interpreter which directly interprets Teuchos::ParameterLists - @details The parameter list can be either in the easy parameter list format or in the factory driven parameter list format. + @details The parameter list can be either in the easy parameter list format or in the factory driven parameter list format. - @param[in] paramList (Teuchos::ParameterList): ParameterList containing the MueLu parameters - @param[in] comm (RCP >): Optional RCP of a Teuchos communicator (default: Teuchos::null) - @param[in] factFact (RCP): Optional parameter allowing to define user-specific factory interpreters for user-specific extensions of the XML interface. (default: Teuchos::null) - @param[in] facadeFact (RCP): Optional parameter containing a FacadeFactory class. The user can register its own facade classes in the FacadeFactory and provide it to the ParameterListInterpreter. (default: Teuchos::null, means, only standard FacadeClass that come with MueLu are available) + @param[in] paramList (Teuchos::ParameterList): ParameterList containing the MueLu parameters + @param[in] comm (RCP >): Optional RCP of a Teuchos communicator (default: Teuchos::null) + @param[in] factFact (RCP): Optional parameter allowing to define user-specific factory interpreters for user-specific extensions of the XML interface. (default: Teuchos::null) + @param[in] facadeFact (RCP): Optional parameter containing a FacadeFactory class. The user can register its own facade classes in the FacadeFactory and provide it to the ParameterListInterpreter. (default: Teuchos::null, means, only standard FacadeClass that come with MueLu are available) - */ - ParameterListInterpreter(Teuchos::ParameterList& paramList, Teuchos::RCP > comm = Teuchos::null, Teuchos::RCP factFact = Teuchos::null, Teuchos::RCP facadeFact = Teuchos::null); + */ + ParameterListInterpreter(Teuchos::ParameterList& paramList, Teuchos::RCP > comm = Teuchos::null, Teuchos::RCP factFact = Teuchos::null, Teuchos::RCP facadeFact = Teuchos::null); - /*! @brief Constructor that reads parameters from an XML file. + /*! @brief Constructor that reads parameters from an XML file. - XML options are converted to ParameterList entries by Teuchos. + XML options are converted to ParameterList entries by Teuchos. - @param[in] xmlFileName (std::string): XML file to read - @param[in] comm (Teuchos::Comm): Teuchos communicator - @param[in] factFact (RCP): Optional parameter allowing to define user-specific factory interpreters for user-specific extensions of the XML interface. (default: Teuchos::null) - @param[in] facadeFact (RCP): Optional parameter containing a FacadeFactory class. The user can register its own facade classes in the FacadeFactory and provide it to the ParameterListInterpreter. (default: Teuchos::null, means, only standard FacadeClass that come with MueLu are available) + @param[in] xmlFileName (std::string): XML file to read + @param[in] comm (Teuchos::Comm): Teuchos communicator + @param[in] factFact (RCP): Optional parameter allowing to define user-specific factory interpreters for user-specific extensions of the XML interface. (default: Teuchos::null) + @param[in] facadeFact (RCP): Optional parameter containing a FacadeFactory class. The user can register its own facade classes in the FacadeFactory and provide it to the ParameterListInterpreter. (default: Teuchos::null, means, only standard FacadeClass that come with MueLu are available) - */ - ParameterListInterpreter(const std::string& xmlFileName, const Teuchos::Comm& comm, Teuchos::RCP factFact = Teuchos::null, Teuchos::RCP facadeFact = Teuchos::null); + */ + ParameterListInterpreter(const std::string& xmlFileName, const Teuchos::Comm& comm, Teuchos::RCP factFact = Teuchos::null, Teuchos::RCP facadeFact = Teuchos::null); - //! Destructor. - virtual ~ParameterListInterpreter() { } + //! Destructor. + virtual ~ParameterListInterpreter() {} - //@} + //@} - /*! @brief Set parameter list for Parameter list interpreter. + /*! @brief Set parameter list for Parameter list interpreter. - The routine checks whether it is a parameter list in the easy parameter format or the more advanced factory-based parameter format and calls the corresponding interpreter routine. + The routine checks whether it is a parameter list in the easy parameter format or the more advanced factory-based parameter format and calls the corresponding interpreter routine. - When finished, the parameter list is set that will used by the hierarchy build phase. + When finished, the parameter list is set that will used by the hierarchy build phase. - This method includes validation and some pre-parsing of the list for: - - verbosity level - - data to export - - cycle type - - max coarse size - - max levels - - number of equations + This method includes validation and some pre-parsing of the list for: + - verbosity level + - data to export + - cycle type + - max coarse size + - max levels + - number of equations - @param[in] paramList: ParameterList containing the MueLu parameters. - */ - void SetParameterList(const Teuchos::ParameterList& paramList); + @param[in] paramList: ParameterList containing the MueLu parameters. + */ + void SetParameterList(const Teuchos::ParameterList& paramList); - //! Call the SetupHierarchy routine from the HiearchyManager object. - void SetupHierarchy(Hierarchy& H) const; + //! Call the SetupHierarchy routine from the HiearchyManager object. + void SetupHierarchy(Hierarchy& H) const; - private: - //! Setup Operator object - virtual void SetupOperator(Operator& A) const; + private: + //! Setup Operator object + virtual void SetupOperator(Operator& A) const; - int blockSize_; ///< block size of matrix (fixed block size) - CycleType Cycle_; ///< multigrid cycle type (V-cycle or W-cycle) - int WCycleStartLevel_; ///< in case of W-cycle, level on which cycle should start - double scalingFactor_; ///< prolongator scaling factor - GlobalOrdinal dofOffset_; ///< global offset variable describing offset of DOFs in operator + int blockSize_; ///< block size of matrix (fixed block size) + CycleType Cycle_; ///< multigrid cycle type (V-cycle or W-cycle) + int WCycleStartLevel_; ///< in case of W-cycle, level on which cycle should start + double scalingFactor_; ///< prolongator scaling factor + GlobalOrdinal dofOffset_; ///< global offset variable describing offset of DOFs in operator - //! Easy interpreter stuff - //@{ - // These three variables are only needed to print out proper [default] - bool changedPRrebalance_; - bool changedPRViaCopyrebalance_; - bool changedImplicitTranspose_; + //! Easy interpreter stuff + //@{ + // These three variables are only needed to print out proper [default] + bool changedPRrebalance_; + bool changedPRViaCopyrebalance_; + bool changedImplicitTranspose_; - void SetEasyParameterList(const Teuchos::ParameterList& paramList); - void Validate(const Teuchos::ParameterList& paramList) const; + void SetEasyParameterList(const Teuchos::ParameterList& paramList); + void Validate(const Teuchos::ParameterList& paramList) const; - void UpdateFactoryManager(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; + void UpdateFactoryManager(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; - // "Generic components" for UpdateFactoryManager - void UpdateFactoryManager_Smoothers(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_CoarseSolvers(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Aggregation_TentativeP(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Restriction(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_RAP(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Coordinates(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + // "Generic components" for UpdateFactoryManager + void UpdateFactoryManager_Smoothers(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_CoarseSolvers(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Repartition(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps, RCP & nullSpaceFactory) const; - void UpdateFactoryManager_LowPrecision(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Nullspace(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps, RCP & nullSpaceFactory) const; - void UpdateFactoryManager_BlockNumber(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, - FactoryManager& manager,int levelID, std::vector& keeps) const; - void UpdateFactoryManager_LocalOrdinalTransfer(const std::string& VarName, const std::string& multigridAlgo, Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, - FactoryManager& manager,int levelID, std::vector& keeps) const; - - // Algorithm-specific components for UpdateFactoryManager - void UpdateFactoryManager_SemiCoarsen(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_PCoarsen(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + void UpdateFactoryManager_Aggregation_TentativeP(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_Restriction(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_RAP(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_Coordinates(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_Repartition(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps, RCP& nullSpaceFactory) const; + void UpdateFactoryManager_LowPrecision(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_Nullspace(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps, RCP& nullSpaceFactory) const; + void UpdateFactoryManager_BlockNumber(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, + FactoryManager& manager, int levelID, std::vector& keeps) const; + void UpdateFactoryManager_LocalOrdinalTransfer(const std::string& VarName, const std::string& multigridAlgo, Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, + FactoryManager& manager, int levelID, std::vector& keeps) const; + + // Algorithm-specific components for UpdateFactoryManager + void UpdateFactoryManager_SemiCoarsen(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_PCoarsen(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_SA(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_Reitzinger(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, int levelID, std::vector& keeps) const; - void UpdateFactoryManager_SA(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + void UpdateFactoryManager_Emin(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Reitzinger(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Emin(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + void UpdateFactoryManager_PG(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_Replicate(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_Combine(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_Matlab(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, int levelID, std::vector& keeps) const; - void UpdateFactoryManager_PG(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Replicate(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Combine(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Matlab(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - - - - bool useCoordinates_; - bool useBlockNumber_; - bool useKokkos_; - //@} + bool useCoordinates_; + bool useBlockNumber_; + bool useKokkos_; + //@} - //! Factory interpreter stuff - // TODO: - // - parameter list validator - // - SetParameterList - // - Set/Get directly Level manager - // - build per level - // - comments/docs - // - use FactoryManager instead of FactoryMap - //@{ - void SetFactoryParameterList(const Teuchos::ParameterList& paramList); + //! Factory interpreter stuff + // TODO: + // - parameter list validator + // - SetParameterList + // - Set/Get directly Level manager + // - build per level + // - comments/docs + // - use FactoryManager instead of FactoryMap + //@{ + void SetFactoryParameterList(const Teuchos::ParameterList& paramList); - typedef std::map > FactoryMap; //TODO: remove this line - typedef std::map > FactoryManagerMap; + typedef std::map > FactoryMap; // TODO: remove this line + typedef std::map > FactoryManagerMap; - void BuildFactoryMap(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, FactoryMap& factoryMapOut, FactoryManagerMap& factoryManagers) const; + void BuildFactoryMap(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, FactoryMap& factoryMapOut, FactoryManagerMap& factoryManagers) const; - //! Internal factory for factories - Teuchos::RCP factFact_; + //! Internal factory for factories + Teuchos::RCP factFact_; - //! FacadeClass factory - Teuchos::RCP > facadeFact_; + //! FacadeClass factory + Teuchos::RCP > facadeFact_; - //@} - }; + //@} +}; -} // namespace MueLu +} // namespace MueLu #define MUELU_PARAMETERLISTINTERPRETER_SHORT #endif /* MUELU_PARAMETERLISTINTERPRETER_DECL_HPP */ diff --git a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_def.hpp b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_def.hpp index d27cfdca878c..48af3ae2fda1 100644 --- a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_def.hpp +++ b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_def.hpp @@ -129,1838 +129,1789 @@ namespace MueLu { - template - ParameterListInterpreter::ParameterListInterpreter(ParameterList& paramList, Teuchos::RCP > comm, Teuchos::RCP factFact, Teuchos::RCP facadeFact) : factFact_(factFact) { - RCP tM = rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer(std::string("MueLu: ParameterListInterpreter (ParameterList)")))); - if(facadeFact == Teuchos::null) - facadeFact_ = Teuchos::rcp(new FacadeClassFactory()); - else - facadeFact_ = facadeFact; - - if (paramList.isParameter("xml parameter file")) { - std::string filename = paramList.get("xml parameter file", ""); - if (filename.length() != 0) { - TEUCHOS_TEST_FOR_EXCEPTION(comm.is_null(), Exceptions::RuntimeError, "xml parameter file requires a valid comm"); - - ParameterList paramList2 = paramList; - Teuchos::updateParametersFromXmlFileAndBroadcast(filename, Teuchos::Ptr(¶mList2), *comm); - SetParameterList(paramList2); - - } else { - SetParameterList(paramList); - } +template +ParameterListInterpreter::ParameterListInterpreter(ParameterList& paramList, Teuchos::RCP > comm, Teuchos::RCP factFact, Teuchos::RCP facadeFact) + : factFact_(factFact) { + RCP tM = rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer(std::string("MueLu: ParameterListInterpreter (ParameterList)")))); + if (facadeFact == Teuchos::null) + facadeFact_ = Teuchos::rcp(new FacadeClassFactory()); + else + facadeFact_ = facadeFact; + + if (paramList.isParameter("xml parameter file")) { + std::string filename = paramList.get("xml parameter file", ""); + if (filename.length() != 0) { + TEUCHOS_TEST_FOR_EXCEPTION(comm.is_null(), Exceptions::RuntimeError, "xml parameter file requires a valid comm"); + + ParameterList paramList2 = paramList; + Teuchos::updateParametersFromXmlFileAndBroadcast(filename, Teuchos::Ptr(¶mList2), *comm); + SetParameterList(paramList2); } else { SetParameterList(paramList); } - } - template - ParameterListInterpreter::ParameterListInterpreter(const std::string& xmlFileName, const Teuchos::Comm& comm, Teuchos::RCP factFact, Teuchos::RCP facadeFact) : factFact_(factFact) { - RCP tM = rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer(std::string("MueLu: ParameterListInterpreter (XML)")))); - if(facadeFact == Teuchos::null) - facadeFact_ = Teuchos::rcp(new FacadeClassFactory()); - else - facadeFact_ = facadeFact; - - ParameterList paramList; - Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, Teuchos::Ptr(¶mList), comm); + } else { SetParameterList(paramList); } - - template - void ParameterListInterpreter::SetParameterList(const ParameterList& paramList) { - Cycle_ = Hierarchy::GetDefaultCycle(); - WCycleStartLevel_ = Hierarchy::GetDefaultCycleStartLevel(); - scalingFactor_= Teuchos::ScalarTraits::one(); - blockSize_ = 1; - dofOffset_ = 0; - - if (paramList.isSublist("Hierarchy")) { - SetFactoryParameterList(paramList); - - } else if (paramList.isParameter("MueLu preconditioner") == true) { - this->GetOStream(Runtime0) << "Use facade class: " << paramList.get("MueLu preconditioner") << std::endl; - Teuchos::RCP pp = facadeFact_->SetParameterList(paramList); - SetFactoryParameterList(*pp); - - } else { - // The validator doesn't work correctly for non-serializable data (Hint: template parameters), so strip it out - ParameterList serialList, nonSerialList; - - ExtractNonSerializableData(paramList, serialList, nonSerialList); - Validate(serialList); - SetEasyParameterList(paramList); - } +} + +template +ParameterListInterpreter::ParameterListInterpreter(const std::string& xmlFileName, const Teuchos::Comm& comm, Teuchos::RCP factFact, Teuchos::RCP facadeFact) + : factFact_(factFact) { + RCP tM = rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer(std::string("MueLu: ParameterListInterpreter (XML)")))); + if (facadeFact == Teuchos::null) + facadeFact_ = Teuchos::rcp(new FacadeClassFactory()); + else + facadeFact_ = facadeFact; + + ParameterList paramList; + Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, Teuchos::Ptr(¶mList), comm); + SetParameterList(paramList); +} + +template +void ParameterListInterpreter::SetParameterList(const ParameterList& paramList) { + Cycle_ = Hierarchy::GetDefaultCycle(); + WCycleStartLevel_ = Hierarchy::GetDefaultCycleStartLevel(); + scalingFactor_ = Teuchos::ScalarTraits::one(); + blockSize_ = 1; + dofOffset_ = 0; + + if (paramList.isSublist("Hierarchy")) { + SetFactoryParameterList(paramList); + + } else if (paramList.isParameter("MueLu preconditioner") == true) { + this->GetOStream(Runtime0) << "Use facade class: " << paramList.get("MueLu preconditioner") << std::endl; + Teuchos::RCP pp = facadeFact_->SetParameterList(paramList); + SetFactoryParameterList(*pp); + + } else { + // The validator doesn't work correctly for non-serializable data (Hint: template parameters), so strip it out + ParameterList serialList, nonSerialList; + + ExtractNonSerializableData(paramList, serialList, nonSerialList); + Validate(serialList); + SetEasyParameterList(paramList); } +} - // ===================================================================================================== - // ====================================== EASY interpreter ============================================= - // ===================================================================================================== - //! Helper functions to compare two paramter lists - static inline bool areSame(const ParameterList& list1, const ParameterList& list2); +// ===================================================================================================== +// ====================================== EASY interpreter ============================================= +// ===================================================================================================== +//! Helper functions to compare two paramter lists +static inline bool areSame(const ParameterList& list1, const ParameterList& list2); - // Get value from one of the lists, or set it to default - // Use case: check for a parameter value in a level-specific sublist, then in a root level list; - // if it is absent from both, set it to default +// Get value from one of the lists, or set it to default +// Use case: check for a parameter value in a level-specific sublist, then in a root level list; +// if it is absent from both, set it to default #define MUELU_SET_VAR_2LIST(paramList, defaultList, paramName, paramType, varName) \ - paramType varName; \ - if (paramList.isParameter(paramName)) varName = paramList.get(paramName); \ - else if (defaultList.isParameter(paramName)) varName = defaultList.get(paramName); \ - else varName = MasterList::getDefault(paramName); + paramType varName; \ + if (paramList.isParameter(paramName)) \ + varName = paramList.get(paramName); \ + else if (defaultList.isParameter(paramName)) \ + varName = defaultList.get(paramName); \ + else \ + varName = MasterList::getDefault(paramName); #define MUELU_TEST_AND_SET_VAR(paramList, paramName, paramType, varName) \ (paramList.isParameter(paramName) ? varName = paramList.get(paramName), true : false) - // Set parameter in a list if it is present in any of two lists - // User case: set factory specific parameter, first checking for a level-specific value, then cheking root level value -#define MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, paramName, paramType, listWrite) \ - try { \ - if (paramList .isParameter(paramName)) listWrite.set(paramName, paramList .get(paramName)); \ - else if (defaultList.isParameter(paramName)) listWrite.set(paramName, defaultList.get(paramName)); \ - } \ - catch(Teuchos::Exceptions::InvalidParameterType&) { \ - TEUCHOS_TEST_FOR_EXCEPTION_PURE_MSG(true, Teuchos::Exceptions::InvalidParameterType, \ +// Set parameter in a list if it is present in any of two lists +// User case: set factory specific parameter, first checking for a level-specific value, then cheking root level value +#define MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, paramName, paramType, listWrite) \ + try { \ + if (paramList.isParameter(paramName)) \ + listWrite.set(paramName, paramList.get(paramName)); \ + else if (defaultList.isParameter(paramName)) \ + listWrite.set(paramName, defaultList.get(paramName)); \ + } catch (Teuchos::Exceptions::InvalidParameterType&) { \ + TEUCHOS_TEST_FOR_EXCEPTION_PURE_MSG(true, Teuchos::Exceptions::InvalidParameterType, \ "Error: parameter \"" << paramName << "\" must be of type " << Teuchos::TypeNameTraits::name()); \ - } \ + } #define MUELU_TEST_PARAM_2LIST(paramList, defaultList, paramName, paramType, cmpValue) \ - (cmpValue == ( \ - paramList.isParameter(paramName) ? paramList .get(paramName) : ( \ - defaultList.isParameter(paramName) ? defaultList.get(paramName) : \ - MasterList::getDefault(paramName) ) ) ) + (cmpValue == (paramList.isParameter(paramName) ? paramList.get(paramName) : (defaultList.isParameter(paramName) ? defaultList.get(paramName) : MasterList::getDefault(paramName)))) #define MUELU_KOKKOS_FACTORY(varName, oldFactory, newFactory) \ - RCP varName; \ - if (!useKokkos_) varName = rcp(new oldFactory()); \ - else varName = rcp(new newFactory()); + RCP varName; \ + if (!useKokkos_) \ + varName = rcp(new oldFactory()); \ + else \ + varName = rcp(new newFactory()); #define MUELU_KOKKOS_FACTORY_NO_DECL(varName, oldFactory, newFactory) \ - if (!useKokkos_) varName = rcp(new oldFactory()); \ - else varName = rcp(new newFactory()); - - template - void ParameterListInterpreter:: - SetEasyParameterList(const ParameterList& constParamList) { - ParameterList paramList; - - MUELU_SET_VAR_2LIST(constParamList, constParamList, "problem: type", std::string, problemType); - if (problemType != "unknown") { - paramList = *MasterList::GetProblemSpecificList(problemType); - paramList.setParameters(constParamList); - } else { - // Create a non const copy of the parameter list - // Working with a modifiable list is much much easier than with original one - paramList = constParamList; - } - - // Check for Kokkos - useKokkos_ = !Node::is_serial; - (void)MUELU_TEST_AND_SET_VAR(paramList, "use kokkos refactor", bool, useKokkos_); - - // Check for timer synchronization - MUELU_SET_VAR_2LIST(paramList, paramList, "synchronize factory timers", bool, syncTimers); - if (syncTimers) - Factory::EnableTimerSync(); - - // Translate cycle type parameter - if (paramList.isParameter("cycle type")) { - std::map cycleMap; - cycleMap["V"] = VCYCLE; - cycleMap["W"] = WCYCLE; - - auto cycleType = paramList.get("cycle type"); - TEUCHOS_TEST_FOR_EXCEPTION(cycleMap.count(cycleType) == 0, Exceptions::RuntimeError, - "Invalid cycle type: \"" << cycleType << "\""); - Cycle_ = cycleMap[cycleType]; - } - - if (paramList.isParameter("W cycle start level")) { - WCycleStartLevel_ = paramList.get("W cycle start level"); - } - - if (paramList.isParameter("coarse grid correction scaling factor")) - scalingFactor_ = paramList.get("coarse grid correction scaling factor"); - - this->maxCoarseSize_ = paramList.get ("coarse: max size", MasterList::getDefault("coarse: max size")); - this->numDesiredLevel_ = paramList.get ("max levels", MasterList::getDefault("max levels")); - blockSize_ = paramList.get ("number of equations", MasterList::getDefault("number of equations")); - - - (void)MUELU_TEST_AND_SET_VAR(paramList, "debug: graph level", int, this->graphOutputLevel_); - - // Generic data saving (this saves the data on all levels) - if(paramList.isParameter("save data")) - this->dataToSave_ = Teuchos::getArrayFromStringParameter(paramList,"save data"); + if (!useKokkos_) \ + varName = rcp(new oldFactory()); \ + else \ + varName = rcp(new newFactory()); + +template +void ParameterListInterpreter:: + SetEasyParameterList(const ParameterList& constParamList) { + ParameterList paramList; + + MUELU_SET_VAR_2LIST(constParamList, constParamList, "problem: type", std::string, problemType); + if (problemType != "unknown") { + paramList = *MasterList::GetProblemSpecificList(problemType); + paramList.setParameters(constParamList); + } else { + // Create a non const copy of the parameter list + // Working with a modifiable list is much much easier than with original one + paramList = constParamList; + } - // Save level data - if (paramList.isSublist("export data")) { - ParameterList printList = paramList.sublist("export data"); + // Check for Kokkos + useKokkos_ = !Node::is_serial; + (void)MUELU_TEST_AND_SET_VAR(paramList, "use kokkos refactor", bool, useKokkos_); + + // Check for timer synchronization + MUELU_SET_VAR_2LIST(paramList, paramList, "synchronize factory timers", bool, syncTimers); + if (syncTimers) + Factory::EnableTimerSync(); + + // Translate cycle type parameter + if (paramList.isParameter("cycle type")) { + std::map cycleMap; + cycleMap["V"] = VCYCLE; + cycleMap["W"] = WCYCLE; + + auto cycleType = paramList.get("cycle type"); + TEUCHOS_TEST_FOR_EXCEPTION(cycleMap.count(cycleType) == 0, Exceptions::RuntimeError, + "Invalid cycle type: \"" << cycleType << "\""); + Cycle_ = cycleMap[cycleType]; + } - // Vectors, aggregates and other things that need special handling - if (printList.isParameter("Nullspace")) - this->nullspaceToPrint_ = Teuchos::getArrayFromStringParameter(printList, "Nullspace"); - if (printList.isParameter("Coordinates")) - this->coordinatesToPrint_ = Teuchos::getArrayFromStringParameter(printList, "Coordinates"); - if (printList.isParameter("Aggregates")) - this->aggregatesToPrint_ = Teuchos::getArrayFromStringParameter(printList, "Aggregates"); - if (printList.isParameter("pcoarsen: element to node map")) - this->elementToNodeMapsToPrint_ = Teuchos::getArrayFromStringParameter(printList, "pcoarsen: element to node map"); + if (paramList.isParameter("W cycle start level")) { + WCycleStartLevel_ = paramList.get("W cycle start level"); + } - // If we asked for an arbitrary matrix to be printed, we do that here - for(auto iter = printList.begin(); iter != printList.end(); iter++) { - const std::string & name = printList.name(iter); - // Ignore the special cases - if(name == "Nullspace" || name == "Coordinates" || name == "Aggregates" || name == "pcoarsen: element to node map") - continue; + if (paramList.isParameter("coarse grid correction scaling factor")) + scalingFactor_ = paramList.get("coarse grid correction scaling factor"); + + this->maxCoarseSize_ = paramList.get("coarse: max size", MasterList::getDefault("coarse: max size")); + this->numDesiredLevel_ = paramList.get("max levels", MasterList::getDefault("max levels")); + blockSize_ = paramList.get("number of equations", MasterList::getDefault("number of equations")); + + (void)MUELU_TEST_AND_SET_VAR(paramList, "debug: graph level", int, this->graphOutputLevel_); + + // Generic data saving (this saves the data on all levels) + if (paramList.isParameter("save data")) + this->dataToSave_ = Teuchos::getArrayFromStringParameter(paramList, "save data"); + + // Save level data + if (paramList.isSublist("export data")) { + ParameterList printList = paramList.sublist("export data"); + + // Vectors, aggregates and other things that need special handling + if (printList.isParameter("Nullspace")) + this->nullspaceToPrint_ = Teuchos::getArrayFromStringParameter(printList, "Nullspace"); + if (printList.isParameter("Coordinates")) + this->coordinatesToPrint_ = Teuchos::getArrayFromStringParameter(printList, "Coordinates"); + if (printList.isParameter("Aggregates")) + this->aggregatesToPrint_ = Teuchos::getArrayFromStringParameter(printList, "Aggregates"); + if (printList.isParameter("pcoarsen: element to node map")) + this->elementToNodeMapsToPrint_ = Teuchos::getArrayFromStringParameter(printList, "pcoarsen: element to node map"); + + // If we asked for an arbitrary matrix to be printed, we do that here + for (auto iter = printList.begin(); iter != printList.end(); iter++) { + const std::string& name = printList.name(iter); + // Ignore the special cases + if (name == "Nullspace" || name == "Coordinates" || name == "Aggregates" || name == "pcoarsen: element to node map") + continue; - this->matricesToPrint_[name] = Teuchos::getArrayFromStringParameter(printList, name); - } + this->matricesToPrint_[name] = Teuchos::getArrayFromStringParameter(printList, name); } + } - // Set verbosity parameter - VerbLevel oldVerbLevel = VerboseObject::GetDefaultVerbLevel(); - { - MUELU_SET_VAR_2LIST(paramList, paramList, "verbosity", std::string, verbosityLevel); - this->verbosity_ = toVerbLevel(verbosityLevel); - VerboseObject::SetDefaultVerbLevel(this->verbosity_); - } + // Set verbosity parameter + VerbLevel oldVerbLevel = VerboseObject::GetDefaultVerbLevel(); + { + MUELU_SET_VAR_2LIST(paramList, paramList, "verbosity", std::string, verbosityLevel); + this->verbosity_ = toVerbLevel(verbosityLevel); + VerboseObject::SetDefaultVerbLevel(this->verbosity_); + } - MUELU_SET_VAR_2LIST(paramList, paramList, "output filename", std::string, outputFilename); - if (outputFilename != "") - VerboseObject::SetMueLuOFileStream(outputFilename); - - // Detect if we need to transfer coordinates to coarse levels. We do that iff - // - we use "distance laplacian" dropping on some level, or - // - we use a repartitioner on some level that needs coordinates - // - we use brick aggregation - // - we use Ifpack2 line partitioner - // This is not ideal, as we may have "repartition: enable" turned on by default - // and not present in the list, but it is better than nothing. - useCoordinates_ = false; - useBlockNumber_ = false; - if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "distance laplacian") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: type", std::string, "brick") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: export visualization data", bool, true)) { + MUELU_SET_VAR_2LIST(paramList, paramList, "output filename", std::string, outputFilename); + if (outputFilename != "") + VerboseObject::SetMueLuOFileStream(outputFilename); + + // Detect if we need to transfer coordinates to coarse levels. We do that iff + // - we use "distance laplacian" dropping on some level, or + // - we use a repartitioner on some level that needs coordinates + // - we use brick aggregation + // - we use Ifpack2 line partitioner + // This is not ideal, as we may have "repartition: enable" turned on by default + // and not present in the list, but it is better than nothing. + useCoordinates_ = false; + useBlockNumber_ = false; + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "distance laplacian") || + MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: type", std::string, "brick") || + MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: export visualization data", bool, true)) { + useCoordinates_ = true; + } else if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal distance laplacian")) { + useCoordinates_ = true; + useBlockNumber_ = true; + } else if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal") || + MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal classical") || + MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal signed classical") || + MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal colored signed classical") || + MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "signed classical")) { + useBlockNumber_ = true; + } else if (paramList.isSublist("smoother: params")) { + const auto smooParamList = paramList.sublist("smoother: params"); + if (smooParamList.isParameter("partitioner: type") && + (smooParamList.get("partitioner: type") == "line")) { useCoordinates_ = true; - } else if(MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal distance laplacian")) { - useCoordinates_ = true; - useBlockNumber_ = true; - } else if(MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal classical") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal signed classical") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal colored signed classical") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "signed classical")) { - useBlockNumber_ = true; - } else if(paramList.isSublist("smoother: params")) { - const auto smooParamList = paramList.sublist("smoother: params"); - if(smooParamList.isParameter("partitioner: type") && - (smooParamList.get("partitioner: type") == "line")) { - useCoordinates_ = true; - } - } else { - for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { - std::string levelStr = "level " + toString(levelID); + } + } else { + for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { + std::string levelStr = "level " + toString(levelID); - if (paramList.isSublist(levelStr)) { - const ParameterList& levelList = paramList.sublist(levelStr); + if (paramList.isSublist(levelStr)) { + const ParameterList& levelList = paramList.sublist(levelStr); - if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: drop scheme", std::string, "distance laplacian") || - MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: type", std::string, "brick") || - MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: export visualization data", bool, true)) { - useCoordinates_ = true; - } - else if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: drop scheme", std::string, "block diagonal distance laplacian")) { - useCoordinates_ = true; - useBlockNumber_ = true; - } - else if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: drop scheme", std::string, "block diagonal") || + if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: drop scheme", std::string, "distance laplacian") || + MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: type", std::string, "brick") || + MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: export visualization data", bool, true)) { + useCoordinates_ = true; + } else if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: drop scheme", std::string, "block diagonal distance laplacian")) { + useCoordinates_ = true; + useBlockNumber_ = true; + } else if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: drop scheme", std::string, "block diagonal") || MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: drop scheme", std::string, "block diagonal classical") || MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal signed classical") || MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal colored signed classical") || MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "signed classical")) { - useBlockNumber_ = true; - } + useBlockNumber_ = true; } } } + } - if(MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: enable", bool, true)) { - // We don't need coordinates if we're doing the in-place restriction - if(MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: use subcommunicators", bool, true) && - MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: use subcommunicators in place", bool, true)) { - // do nothing --- these don't need coordinates - } else if (!paramList.isSublist("repartition: params")) { - useCoordinates_ = true; - } else { - const ParameterList& repParams = paramList.sublist("repartition: params"); - if (repParams.isType("algorithm")) { - const std::string algo = repParams.get("algorithm"); - if (algo == "multijagged" || algo == "rcb") { - useCoordinates_ = true; - } - } else { + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: enable", bool, true)) { + // We don't need coordinates if we're doing the in-place restriction + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: use subcommunicators", bool, true) && + MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: use subcommunicators in place", bool, true)) { + // do nothing --- these don't need coordinates + } else if (!paramList.isSublist("repartition: params")) { + useCoordinates_ = true; + } else { + const ParameterList& repParams = paramList.sublist("repartition: params"); + if (repParams.isType("algorithm")) { + const std::string algo = repParams.get("algorithm"); + if (algo == "multijagged" || algo == "rcb") { useCoordinates_ = true; } + } else { + useCoordinates_ = true; } } - for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { - std::string levelStr = "level " + toString(levelID); + } + for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { + std::string levelStr = "level " + toString(levelID); - if (paramList.isSublist(levelStr)) { - const ParameterList& levelList = paramList.sublist(levelStr); + if (paramList.isSublist(levelStr)) { + const ParameterList& levelList = paramList.sublist(levelStr); - if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "repartition: enable", bool, true)) { - if (!levelList.isSublist("repartition: params")) { - useCoordinates_ = true; - break; - } else { - const ParameterList& repParams = levelList.sublist("repartition: params"); - if (repParams.isType("algorithm")) { - const std::string algo = repParams.get("algorithm"); - if (algo == "multijagged" || algo == "rcb"){ - useCoordinates_ = true; - break; - } - } else { + if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "repartition: enable", bool, true)) { + if (!levelList.isSublist("repartition: params")) { + useCoordinates_ = true; + break; + } else { + const ParameterList& repParams = levelList.sublist("repartition: params"); + if (repParams.isType("algorithm")) { + const std::string algo = repParams.get("algorithm"); + if (algo == "multijagged" || algo == "rcb") { useCoordinates_ = true; break; } + } else { + useCoordinates_ = true; + break; } } } } + } - // Detect if we do implicit P and R rebalance - changedPRrebalance_ = false; - changedPRViaCopyrebalance_ = false; - if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: enable", bool, true)) { - changedPRrebalance_ = MUELU_TEST_AND_SET_VAR(paramList, "repartition: rebalance P and R", bool, this->doPRrebalance_); - changedPRViaCopyrebalance_ = MUELU_TEST_AND_SET_VAR(paramList,"repartition: explicit via new copy rebalance P and R", bool, this->doPRViaCopyrebalance_); - } - - // Detect if we use implicit transpose - changedImplicitTranspose_ = MUELU_TEST_AND_SET_VAR(paramList, "transpose: use implicit", bool, this->implicitTranspose_); + // Detect if we do implicit P and R rebalance + changedPRrebalance_ = false; + changedPRViaCopyrebalance_ = false; + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: enable", bool, true)) { + changedPRrebalance_ = MUELU_TEST_AND_SET_VAR(paramList, "repartition: rebalance P and R", bool, this->doPRrebalance_); + changedPRViaCopyrebalance_ = MUELU_TEST_AND_SET_VAR(paramList, "repartition: explicit via new copy rebalance P and R", bool, this->doPRViaCopyrebalance_); + } - // Detect if we use fuse prolongation and update - (void)MUELU_TEST_AND_SET_VAR(paramList, "fuse prolongation and update", bool, this->fuseProlongationAndUpdate_); + // Detect if we use implicit transpose + changedImplicitTranspose_ = MUELU_TEST_AND_SET_VAR(paramList, "transpose: use implicit", bool, this->implicitTranspose_); - // Detect if we suppress the dimension check of the user-given nullspace - (void)MUELU_TEST_AND_SET_VAR(paramList, "nullspace: suppress dimension check", bool, this->suppressNullspaceDimensionCheck_); + // Detect if we use fuse prolongation and update + (void)MUELU_TEST_AND_SET_VAR(paramList, "fuse prolongation and update", bool, this->fuseProlongationAndUpdate_); - if (paramList.isSublist("matvec params")) - this->matvecParams_ = Teuchos::parameterList(paramList.sublist("matvec params")); + // Detect if we suppress the dimension check of the user-given nullspace + (void)MUELU_TEST_AND_SET_VAR(paramList, "nullspace: suppress dimension check", bool, this->suppressNullspaceDimensionCheck_); - // Create default manager - // FIXME: should it be here, or higher up - RCP defaultManager = rcp(new FactoryManager()); - defaultManager->SetVerbLevel(this->verbosity_); - defaultManager->SetKokkosRefactor(useKokkos_); + if (paramList.isSublist("matvec params")) + this->matvecParams_ = Teuchos::parameterList(paramList.sublist("matvec params")); - // We will ignore keeps0 - std::vector keeps0; - UpdateFactoryManager(paramList, ParameterList(), *defaultManager, 0/*levelID*/, keeps0); + // Create default manager + // FIXME: should it be here, or higher up + RCP defaultManager = rcp(new FactoryManager()); + defaultManager->SetVerbLevel(this->verbosity_); + defaultManager->SetKokkosRefactor(useKokkos_); - // std::cout<<"*** Default Manager ***"<Print(); + // We will ignore keeps0 + std::vector keeps0; + UpdateFactoryManager(paramList, ParameterList(), *defaultManager, 0 /*levelID*/, keeps0); - // Create level specific factory managers - for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { - // Note, that originally if there were no level specific parameters, we - // simply copied the defaultManager However, with the introduction of - // levelID to UpdateFactoryManager (required for reuse), we can no longer - // guarantee that the kept variables are the same for each level even if - // dependency structure does not change. - RCP levelManager = rcp(new FactoryManager(*defaultManager)); - levelManager->SetVerbLevel(defaultManager->GetVerbLevel()); - - std::vector keeps; - if (paramList.isSublist("level " + toString(levelID))) { - // We do this so the parameters on the level get flagged correctly as "used" - ParameterList& levelList = paramList.sublist("level " + toString(levelID), true/*mustAlreadyExist*/); - UpdateFactoryManager(levelList, paramList, *levelManager, levelID, keeps); - - } else { - ParameterList levelList; - UpdateFactoryManager(levelList, paramList, *levelManager, levelID, keeps); - } + // std::cout<<"*** Default Manager ***"<Print(); - this->keep_[levelID] = keeps; - this->AddFactoryManager(levelID, 1, levelManager); + // Create level specific factory managers + for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { + // Note, that originally if there were no level specific parameters, we + // simply copied the defaultManager However, with the introduction of + // levelID to UpdateFactoryManager (required for reuse), we can no longer + // guarantee that the kept variables are the same for each level even if + // dependency structure does not change. + RCP levelManager = rcp(new FactoryManager(*defaultManager)); + levelManager->SetVerbLevel(defaultManager->GetVerbLevel()); - // std::cout<<"*** Level "<Print(); + std::vector keeps; + if (paramList.isSublist("level " + toString(levelID))) { + // We do this so the parameters on the level get flagged correctly as "used" + ParameterList& levelList = paramList.sublist("level " + toString(levelID), true /*mustAlreadyExist*/); + UpdateFactoryManager(levelList, paramList, *levelManager, levelID, keeps); + } else { + ParameterList levelList; + UpdateFactoryManager(levelList, paramList, *levelManager, levelID, keeps); } - // FIXME: parameters passed to packages, like Ifpack2, are not touched by us, resulting in "[unused]" flag - // being displayed. On the other hand, we don't want to simply iterate through them touching. I don't know - // what a good solution looks like - if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "print initial parameters", bool, true)) - this->GetOStream(static_cast(Runtime1), 0) << paramList << std::endl; + this->keep_[levelID] = keeps; + this->AddFactoryManager(levelID, 1, levelManager); - if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "print unused parameters", bool, true)) { - // Check unused parameters - ParameterList unusedParamList; + // std::cout<<"*** Level "<Print(); + } - // Check for unused parameters that aren't lists - for (ParameterList::ConstIterator it = paramList.begin(); it != paramList.end(); it++) { - const ParameterEntry& entry = paramList.entry(it); + // FIXME: parameters passed to packages, like Ifpack2, are not touched by us, resulting in "[unused]" flag + // being displayed. On the other hand, we don't want to simply iterate through them touching. I don't know + // what a good solution looks like + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "print initial parameters", bool, true)) + this->GetOStream(static_cast(Runtime1), 0) << paramList << std::endl; - if (!entry.isList() && !entry.isUsed()) - unusedParamList.setEntry(paramList.name(it), entry); - } + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "print unused parameters", bool, true)) { + // Check unused parameters + ParameterList unusedParamList; - // Check for unused parameters in level-specific sublists - for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { - std::string levelStr = "level " + toString(levelID); + // Check for unused parameters that aren't lists + for (ParameterList::ConstIterator it = paramList.begin(); it != paramList.end(); it++) { + const ParameterEntry& entry = paramList.entry(it); - if (paramList.isSublist(levelStr)) { - const ParameterList& levelList = paramList.sublist(levelStr); + if (!entry.isList() && !entry.isUsed()) + unusedParamList.setEntry(paramList.name(it), entry); + } - for (ParameterList::ConstIterator itr = levelList.begin(); itr != levelList.end(); ++itr) { - const ParameterEntry& entry = levelList.entry(itr); + // Check for unused parameters in level-specific sublists + for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { + std::string levelStr = "level " + toString(levelID); - if (!entry.isList() && !entry.isUsed()) - unusedParamList.sublist(levelStr).setEntry(levelList.name(itr), entry); - } - } - } + if (paramList.isSublist(levelStr)) { + const ParameterList& levelList = paramList.sublist(levelStr); - if (unusedParamList.numParams() > 0) { - std::ostringstream unusedParamsStream; - int indent = 4; - unusedParamList.print(unusedParamsStream, indent); + for (ParameterList::ConstIterator itr = levelList.begin(); itr != levelList.end(); ++itr) { + const ParameterEntry& entry = levelList.entry(itr); - this->GetOStream(Warnings1) << "The following parameters were not used:\n" << unusedParamsStream.str() << std::endl; + if (!entry.isList() && !entry.isUsed()) + unusedParamList.sublist(levelStr).setEntry(levelList.name(itr), entry); + } } } - VerboseObject::SetDefaultVerbLevel(oldVerbLevel); + if (unusedParamList.numParams() > 0) { + std::ostringstream unusedParamsStream; + int indent = 4; + unusedParamList.print(unusedParamsStream, indent); + this->GetOStream(Warnings1) << "The following parameters were not used:\n" + << unusedParamsStream.str() << std::endl; + } } + VerboseObject::SetDefaultVerbLevel(oldVerbLevel); +} - // ===================================================================================================== - // ==================================== UpdateFactoryManager =========================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const - { - // NOTE: Factory::SetParameterList must be called prior to Factory::SetFactory, as - // SetParameterList sets default values for non mentioned parameters, including factories +// ===================================================================================================== +// ==================================== UpdateFactoryManager =========================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const { + // NOTE: Factory::SetParameterList must be called prior to Factory::SetFactory, as + // SetParameterList sets default values for non mentioned parameters, including factories - using strings = std::unordered_set; + using strings = std::unordered_set; - // shortcut - if (paramList.numParams() == 0 && defaultList.numParams() > 0) - paramList = ParameterList(defaultList); + // shortcut + if (paramList.numParams() == 0 && defaultList.numParams() > 0) + paramList = ParameterList(defaultList); - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - TEUCHOS_TEST_FOR_EXCEPTION(strings({"none", "tP", "RP", "emin", "RAP", "full", "S"}).count(reuseType) == 0, - Exceptions::RuntimeError, "Unknown \"reuse: type\" value: \"" << reuseType << "\". Please consult User's Guide."); + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); + TEUCHOS_TEST_FOR_EXCEPTION(strings({"none", "tP", "RP", "emin", "RAP", "full", "S"}).count(reuseType) == 0, + Exceptions::RuntimeError, "Unknown \"reuse: type\" value: \"" << reuseType << "\". Please consult User's Guide."); - MUELU_SET_VAR_2LIST(paramList, defaultList, "multigrid algorithm", std::string, multigridAlgo); - TEUCHOS_TEST_FOR_EXCEPTION(strings({"unsmoothed", "sa", "pg", "emin", "matlab", "pcoarsen","classical","smoothed reitzinger","unsmoothed reitzinger","replicate","combine"}).count(multigridAlgo) == 0, - Exceptions::RuntimeError, "Unknown \"multigrid algorithm\" value: \"" << multigridAlgo << "\". Please consult User's Guide."); + MUELU_SET_VAR_2LIST(paramList, defaultList, "multigrid algorithm", std::string, multigridAlgo); + TEUCHOS_TEST_FOR_EXCEPTION(strings({"unsmoothed", "sa", "pg", "emin", "matlab", "pcoarsen", "classical", "smoothed reitzinger", "unsmoothed reitzinger", "replicate", "combine"}).count(multigridAlgo) == 0, + Exceptions::RuntimeError, "Unknown \"multigrid algorithm\" value: \"" << multigridAlgo << "\". Please consult User's Guide."); #ifndef HAVE_MUELU_MATLAB - TEUCHOS_TEST_FOR_EXCEPTION(multigridAlgo == "matlab", Exceptions::RuntimeError, - "Cannot use matlab for multigrid algorithm - MueLu was not configured with MATLAB support."); + TEUCHOS_TEST_FOR_EXCEPTION(multigridAlgo == "matlab", Exceptions::RuntimeError, + "Cannot use matlab for multigrid algorithm - MueLu was not configured with MATLAB support."); #endif #ifndef HAVE_MUELU_INTREPID2 - TEUCHOS_TEST_FOR_EXCEPTION(multigridAlgo == "pcoarsen", Exceptions::RuntimeError, - "Cannot use IntrepidPCoarsen prolongator factory - MueLu was not configured with Intrepid support."); + TEUCHOS_TEST_FOR_EXCEPTION(multigridAlgo == "pcoarsen", Exceptions::RuntimeError, + "Cannot use IntrepidPCoarsen prolongator factory - MueLu was not configured with Intrepid support."); #endif - // Only some combinations of reuse and multigrid algorithms are tested, all - // other are considered invalid at the moment - if (reuseType == "none" || reuseType == "S" || reuseType == "RP" || reuseType == "RAP") { - // This works for all kinds of multigrid algorithms - - } else if (reuseType == "tP" && (multigridAlgo != "sa" && multigridAlgo != "unsmoothed")) { - reuseType = "none"; - this->GetOStream(Warnings0) << "Ignoring \"tP\" reuse option as it is only compatible with \"sa\", " - "or \"unsmoothed\" multigrid algorithms" << std::endl; - - } else if (reuseType == "emin" && multigridAlgo != "emin") { - reuseType = "none"; - this->GetOStream(Warnings0) << "Ignoring \"emin\" reuse option it is only compatible with " - "\"emin\" multigrid algorithm" << std::endl; - } - - // == Non-serializable data === - // Check both the parameter and the type - bool have_userP = false; - if (paramList.isParameter("P") && !paramList.get >("P").is_null()) - have_userP = true; + // Only some combinations of reuse and multigrid algorithms are tested, all + // other are considered invalid at the moment + if (reuseType == "none" || reuseType == "S" || reuseType == "RP" || reuseType == "RAP") { + // This works for all kinds of multigrid algorithms + + } else if (reuseType == "tP" && (multigridAlgo != "sa" && multigridAlgo != "unsmoothed")) { + reuseType = "none"; + this->GetOStream(Warnings0) << "Ignoring \"tP\" reuse option as it is only compatible with \"sa\", " + "or \"unsmoothed\" multigrid algorithms" + << std::endl; + + } else if (reuseType == "emin" && multigridAlgo != "emin") { + reuseType = "none"; + this->GetOStream(Warnings0) << "Ignoring \"emin\" reuse option it is only compatible with " + "\"emin\" multigrid algorithm" + << std::endl; + } - // === Coarse solver === - UpdateFactoryManager_CoarseSolvers(paramList, defaultList, manager, levelID, keeps); + // == Non-serializable data === + // Check both the parameter and the type + bool have_userP = false; + if (paramList.isParameter("P") && !paramList.get >("P").is_null()) + have_userP = true; + + // === Coarse solver === + UpdateFactoryManager_CoarseSolvers(paramList, defaultList, manager, levelID, keeps); + + // == Smoothers == + UpdateFactoryManager_Smoothers(paramList, defaultList, manager, levelID, keeps); + + // === BlockNumber === + if (levelID == 0) + UpdateFactoryManager_BlockNumber(paramList, defaultList, manager, levelID, keeps); + + // === Aggregation === + if (multigridAlgo == "unsmoothed reitzinger" || multigridAlgo == "smoothed reitzinger") + UpdateFactoryManager_Reitzinger(paramList, defaultList, manager, levelID, keeps); + else + UpdateFactoryManager_Aggregation_TentativeP(paramList, defaultList, manager, levelID, keeps); + + // === Nullspace === + RCP nullSpaceFactory; // Cache thcAN is guy for the combination of semi-coarsening & repartitioning + UpdateFactoryManager_Nullspace(paramList, defaultList, manager, levelID, keeps, nullSpaceFactory); + + // === Prolongation === + // NOTE: None of the UpdateFactoryManager routines called here check the + // multigridAlgo. This is intentional, to allow for reuse of components + // underneath. Thus, the multigridAlgo was checked in the beginning of the + // function. + if (have_userP) { + // User prolongator + manager.SetFactory("P", NoFactory::getRCP()); + + } else if (multigridAlgo == "unsmoothed" || multigridAlgo == "unsmoothed reitzinger") { + // Unsmoothed aggregation + manager.SetFactory("P", manager.GetFactory("Ptent")); + + } else if (multigridAlgo == "classical") { + // Classical AMG + manager.SetFactory("P", manager.GetFactory("Ptent")); + + } else if (multigridAlgo == "sa" || multigridAlgo == "smoothed reitzinger") { + // Smoothed aggregation + UpdateFactoryManager_SA(paramList, defaultList, manager, levelID, keeps); - // == Smoothers == - UpdateFactoryManager_Smoothers(paramList, defaultList, manager, levelID, keeps); + } else if (multigridAlgo == "emin") { + // Energy minimization + UpdateFactoryManager_Emin(paramList, defaultList, manager, levelID, keeps); - // === BlockNumber === - if(levelID == 0) - UpdateFactoryManager_BlockNumber(paramList, defaultList, manager, levelID, keeps); + } else if (multigridAlgo == "replicate") { + UpdateFactoryManager_Replicate(paramList, defaultList, manager, levelID, keeps); - // === Aggregation === - if(multigridAlgo == "unsmoothed reitzinger" || multigridAlgo == "smoothed reitzinger") - UpdateFactoryManager_Reitzinger(paramList, defaultList, manager, levelID, keeps); - else - UpdateFactoryManager_Aggregation_TentativeP(paramList, defaultList, manager, levelID, keeps); - - // === Nullspace === - RCP nullSpaceFactory; // Cache thcAN is guy for the combination of semi-coarsening & repartitioning - UpdateFactoryManager_Nullspace(paramList, defaultList, manager, levelID, keeps, nullSpaceFactory); - - // === Prolongation === - // NOTE: None of the UpdateFactoryManager routines called here check the - // multigridAlgo. This is intentional, to allow for reuse of components - // underneath. Thus, the multigridAlgo was checked in the beginning of the - // function. - if (have_userP) { - // User prolongator - manager.SetFactory("P", NoFactory::getRCP()); - - } else if (multigridAlgo == "unsmoothed" || multigridAlgo == "unsmoothed reitzinger") { - // Unsmoothed aggregation - manager.SetFactory("P", manager.GetFactory("Ptent")); - - } else if (multigridAlgo == "classical") { - // Classical AMG - manager.SetFactory("P", manager.GetFactory("Ptent")); - - } else if (multigridAlgo == "sa" || multigridAlgo == "smoothed reitzinger") { - // Smoothed aggregation - UpdateFactoryManager_SA(paramList, defaultList, manager, levelID, keeps); - - } else if (multigridAlgo == "emin") { - // Energy minimization - UpdateFactoryManager_Emin(paramList, defaultList, manager, levelID, keeps); + } else if (multigridAlgo == "combine") { + UpdateFactoryManager_Combine(paramList, defaultList, manager, levelID, keeps); - } else if (multigridAlgo == "replicate") { - UpdateFactoryManager_Replicate(paramList, defaultList, manager, levelID, keeps); + } else if (multigridAlgo == "pg") { + // Petrov-Galerkin + UpdateFactoryManager_PG(paramList, defaultList, manager, levelID, keeps); - } else if (multigridAlgo == "combine") { - UpdateFactoryManager_Combine(paramList, defaultList, manager, levelID, keeps); + } else if (multigridAlgo == "matlab") { + // Matlab Coarsneing + UpdateFactoryManager_Matlab(paramList, defaultList, manager, levelID, keeps); - } else if (multigridAlgo == "pg") { - // Petrov-Galerkin - UpdateFactoryManager_PG(paramList, defaultList, manager, levelID, keeps); + } else if (multigridAlgo == "pcoarsen") { + // P-Coarsening + UpdateFactoryManager_PCoarsen(paramList, defaultList, manager, levelID, keeps); + } - } else if (multigridAlgo == "matlab") { - // Matlab Coarsneing - UpdateFactoryManager_Matlab(paramList, defaultList, manager, levelID, keeps); + // === Semi-coarsening === + UpdateFactoryManager_SemiCoarsen(paramList, defaultList, manager, levelID, keeps); - } else if (multigridAlgo == "pcoarsen") { - // P-Coarsening - UpdateFactoryManager_PCoarsen(paramList, defaultList, manager, levelID, keeps); - } + // === Restriction === + UpdateFactoryManager_Restriction(paramList, defaultList, manager, levelID, keeps); - // === Semi-coarsening === - UpdateFactoryManager_SemiCoarsen(paramList, defaultList, manager, levelID, keeps); + // === RAP === + UpdateFactoryManager_RAP(paramList, defaultList, manager, levelID, keeps); - // === Restriction === - UpdateFactoryManager_Restriction(paramList, defaultList, manager, levelID, keeps); + // == BlockNumber Transfer == + UpdateFactoryManager_LocalOrdinalTransfer("BlockNumber", multigridAlgo, paramList, defaultList, manager, levelID, keeps); - // === RAP === - UpdateFactoryManager_RAP(paramList, defaultList, manager, levelID, keeps); + // === Coordinates === + UpdateFactoryManager_Coordinates(paramList, defaultList, manager, levelID, keeps); - // == BlockNumber Transfer == - UpdateFactoryManager_LocalOrdinalTransfer("BlockNumber",multigridAlgo,paramList,defaultList,manager,levelID,keeps); + // === Pre-Repartition Keeps for Reuse === + if ((reuseType == "RP" || reuseType == "RAP" || reuseType == "full") && levelID) + keeps.push_back(keep_pair("Nullspace", manager.GetFactory("Nullspace").get())); + if (reuseType == "RP" && levelID) { + keeps.push_back(keep_pair("P", manager.GetFactory("P").get())); + if (!this->implicitTranspose_) + keeps.push_back(keep_pair("R", manager.GetFactory("R").get())); + } + if ((reuseType == "tP" || reuseType == "RP" || reuseType == "emin") && useCoordinates_ && levelID) + keeps.push_back(keep_pair("Coordinates", manager.GetFactory("Coordinates").get())); - // === Coordinates === - UpdateFactoryManager_Coordinates(paramList, defaultList, manager, levelID, keeps); + // === Repartitioning === + UpdateFactoryManager_Repartition(paramList, defaultList, manager, levelID, keeps, nullSpaceFactory); - // === Pre-Repartition Keeps for Reuse === - if ((reuseType == "RP" || reuseType == "RAP" || reuseType == "full") && levelID) - keeps.push_back(keep_pair("Nullspace", manager.GetFactory("Nullspace").get())); + // === Lower precision transfers === + UpdateFactoryManager_LowPrecision(paramList, defaultList, manager, levelID, keeps); - if (reuseType == "RP" && levelID) { - keeps.push_back(keep_pair("P", manager.GetFactory("P").get())); - if (!this->implicitTranspose_) - keeps.push_back(keep_pair("R", manager.GetFactory("R").get())); - } - if ((reuseType == "tP" || reuseType == "RP" || reuseType == "emin") && useCoordinates_ && levelID) - keeps.push_back(keep_pair("Coordinates", manager.GetFactory("Coordinates").get())); + // === Final Keeps for Reuse === + if ((reuseType == "RAP" || reuseType == "full") && levelID) { + keeps.push_back(keep_pair("P", manager.GetFactory("P").get())); + if (!this->implicitTranspose_) + keeps.push_back(keep_pair("R", manager.GetFactory("R").get())); + keeps.push_back(keep_pair("A", manager.GetFactory("A").get())); + } - // === Repartitioning === - UpdateFactoryManager_Repartition(paramList, defaultList, manager, levelID, keeps, nullSpaceFactory); + // In case you ever want to inspect the FactoryManager as it is generated for each level + /*std::cout<<"*** Factory Manager on level "< +void ParameterListInterpreter:: + UpdateFactoryManager_Smoothers(ParameterList& paramList, const ParameterList& defaultList, + FactoryManager& manager, int levelID, std::vector& keeps) const { + MUELU_SET_VAR_2LIST(paramList, defaultList, "multigrid algorithm", std::string, multigridAlgo); + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); + bool useMaxAbsDiagonalScaling = false; + if (defaultList.isParameter("sa: use rowsumabs diagonal scaling")) + useMaxAbsDiagonalScaling = defaultList.get("sa: use rowsumabs diagonal scaling"); + + // === Smoothing === + // FIXME: should custom smoother check default list too? + bool isCustomSmoother = + paramList.isParameter("smoother: pre or post") || + paramList.isParameter("smoother: type") || paramList.isParameter("smoother: pre type") || paramList.isParameter("smoother: post type") || + paramList.isSublist("smoother: params") || paramList.isSublist("smoother: pre params") || paramList.isSublist("smoother: post params") || + paramList.isParameter("smoother: sweeps") || paramList.isParameter("smoother: pre sweeps") || paramList.isParameter("smoother: post sweeps") || + paramList.isParameter("smoother: overlap") || paramList.isParameter("smoother: pre overlap") || paramList.isParameter("smoother: post overlap"); + + MUELU_SET_VAR_2LIST(paramList, defaultList, "smoother: pre or post", std::string, PreOrPost); + if (PreOrPost == "none") { + manager.SetFactory("Smoother", Teuchos::null); + + } else if (isCustomSmoother) { + // FIXME: get default values from the factory + // NOTE: none of the smoothers at the moment use parameter validation framework, so we + // cannot get the default values from it. +#define TEST_MUTUALLY_EXCLUSIVE(arg1, arg2) \ + TEUCHOS_TEST_FOR_EXCEPTION(paramList.isParameter(#arg1) && paramList.isParameter(#arg2), \ + Exceptions::InvalidArgument, "You cannot specify both \"" #arg1 "\" and \"" #arg2 "\""); +#define TEST_MUTUALLY_EXCLUSIVE_S(arg1, arg2) \ + TEUCHOS_TEST_FOR_EXCEPTION(paramList.isSublist(#arg1) && paramList.isSublist(#arg2), \ + Exceptions::InvalidArgument, "You cannot specify both \"" #arg1 "\" and \"" #arg2 "\""); + + TEST_MUTUALLY_EXCLUSIVE("smoother: type", "smoother: pre type"); + TEST_MUTUALLY_EXCLUSIVE("smoother: type", "smoother: post type"); + TEST_MUTUALLY_EXCLUSIVE("smoother: sweeps", "smoother: pre sweeps"); + TEST_MUTUALLY_EXCLUSIVE("smoother: sweeps", "smoother: post sweeps"); + TEST_MUTUALLY_EXCLUSIVE("smoother: overlap", "smoother: pre overlap"); + TEST_MUTUALLY_EXCLUSIVE("smoother: overlap", "smoother: post overlap"); + TEST_MUTUALLY_EXCLUSIVE_S("smoother: params", "smoother: pre params"); + TEST_MUTUALLY_EXCLUSIVE_S("smoother: params", "smoother: post params"); + TEUCHOS_TEST_FOR_EXCEPTION(PreOrPost == "both" && (paramList.isParameter("smoother: pre type") != paramList.isParameter("smoother: post type")), + Exceptions::InvalidArgument, "You must specify both \"smoother: pre type\" and \"smoother: post type\""); + + // Default values + int overlap = 0; + ParameterList defaultSmootherParams; + defaultSmootherParams.set("relaxation: type", "Symmetric Gauss-Seidel"); + defaultSmootherParams.set("relaxation: sweeps", Teuchos::OrdinalTraits::one()); + defaultSmootherParams.set("relaxation: damping factor", Teuchos::ScalarTraits::one()); + + RCP preSmoother = Teuchos::null, postSmoother = Teuchos::null; + std::string preSmootherType, postSmootherType; + ParameterList preSmootherParams, postSmootherParams; + + if (paramList.isParameter("smoother: overlap")) + overlap = paramList.get("smoother: overlap"); + + if (PreOrPost == "pre" || PreOrPost == "both") { + if (paramList.isParameter("smoother: pre type")) { + preSmootherType = paramList.get("smoother: pre type"); + } else { + MUELU_SET_VAR_2LIST(paramList, defaultList, "smoother: type", std::string, preSmootherTypeTmp); + preSmootherType = preSmootherTypeTmp; + } + if (paramList.isParameter("smoother: pre overlap")) + overlap = paramList.get("smoother: pre overlap"); - // === Lower precision transfers === - UpdateFactoryManager_LowPrecision(paramList, defaultList, manager, levelID, keeps); + if (paramList.isSublist("smoother: pre params")) + preSmootherParams = paramList.sublist("smoother: pre params"); + else if (paramList.isSublist("smoother: params")) + preSmootherParams = paramList.sublist("smoother: params"); + else if (defaultList.isSublist("smoother: params")) + preSmootherParams = defaultList.sublist("smoother: params"); + else if (preSmootherType == "RELAXATION") + preSmootherParams = defaultSmootherParams; - // === Final Keeps for Reuse === - if ((reuseType == "RAP" || reuseType == "full") && levelID) { - keeps.push_back(keep_pair("P", manager.GetFactory("P").get())); - if (!this->implicitTranspose_) - keeps.push_back(keep_pair("R", manager.GetFactory("R").get())); - keeps.push_back(keep_pair("A", manager.GetFactory("A").get())); - } + if (preSmootherType == "CHEBYSHEV" && useMaxAbsDiagonalScaling) + preSmootherParams.set("chebyshev: use rowsumabs diagonal scaling", true); - // In case you ever want to inspect the FactoryManager as it is generated for each level - /*std::cout<<"*** Factory Manager on level "< - void ParameterListInterpreter:: - UpdateFactoryManager_Smoothers(ParameterList& paramList, const ParameterList& defaultList, - FactoryManager& manager, int levelID, std::vector& keeps) const - { - MUELU_SET_VAR_2LIST(paramList, defaultList, "multigrid algorithm", std::string, multigridAlgo); - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - bool useMaxAbsDiagonalScaling = false; - if (defaultList.isParameter("sa: use rowsumabs diagonal scaling")) - useMaxAbsDiagonalScaling = defaultList.get("sa: use rowsumabs diagonal scaling"); - - // === Smoothing === - // FIXME: should custom smoother check default list too? - bool isCustomSmoother = - paramList.isParameter("smoother: pre or post") || - paramList.isParameter("smoother: type") || paramList.isParameter("smoother: pre type") || paramList.isParameter("smoother: post type") || - paramList.isSublist ("smoother: params") || paramList.isSublist ("smoother: pre params") || paramList.isSublist ("smoother: post params") || - paramList.isParameter("smoother: sweeps") || paramList.isParameter("smoother: pre sweeps") || paramList.isParameter("smoother: post sweeps") || - paramList.isParameter("smoother: overlap") || paramList.isParameter("smoother: pre overlap") || paramList.isParameter("smoother: post overlap"); - - MUELU_SET_VAR_2LIST(paramList, defaultList, "smoother: pre or post", std::string, PreOrPost); - if (PreOrPost == "none") { - manager.SetFactory("Smoother", Teuchos::null); - - } else if (isCustomSmoother) { - // FIXME: get default values from the factory - // NOTE: none of the smoothers at the moment use parameter validation framework, so we - // cannot get the default values from it. - #define TEST_MUTUALLY_EXCLUSIVE(arg1,arg2) \ - TEUCHOS_TEST_FOR_EXCEPTION(paramList.isParameter(#arg1) && paramList.isParameter(#arg2), \ - Exceptions::InvalidArgument, "You cannot specify both \""#arg1"\" and \""#arg2"\""); - #define TEST_MUTUALLY_EXCLUSIVE_S(arg1,arg2) \ - TEUCHOS_TEST_FOR_EXCEPTION(paramList.isSublist(#arg1) && paramList.isSublist(#arg2), \ - Exceptions::InvalidArgument, "You cannot specify both \""#arg1"\" and \""#arg2"\""); - - TEST_MUTUALLY_EXCLUSIVE ("smoother: type", "smoother: pre type"); - TEST_MUTUALLY_EXCLUSIVE ("smoother: type", "smoother: post type"); - TEST_MUTUALLY_EXCLUSIVE ("smoother: sweeps", "smoother: pre sweeps"); - TEST_MUTUALLY_EXCLUSIVE ("smoother: sweeps", "smoother: post sweeps"); - TEST_MUTUALLY_EXCLUSIVE ("smoother: overlap", "smoother: pre overlap"); - TEST_MUTUALLY_EXCLUSIVE ("smoother: overlap", "smoother: post overlap"); - TEST_MUTUALLY_EXCLUSIVE_S("smoother: params", "smoother: pre params"); - TEST_MUTUALLY_EXCLUSIVE_S("smoother: params", "smoother: post params"); - TEUCHOS_TEST_FOR_EXCEPTION(PreOrPost == "both" && (paramList.isParameter("smoother: pre type") != paramList.isParameter("smoother: post type")), - Exceptions::InvalidArgument, "You must specify both \"smoother: pre type\" and \"smoother: post type\""); - - // Default values - int overlap = 0; - ParameterList defaultSmootherParams; - defaultSmootherParams.set("relaxation: type", "Symmetric Gauss-Seidel"); - defaultSmootherParams.set("relaxation: sweeps", Teuchos::OrdinalTraits::one()); - defaultSmootherParams.set("relaxation: damping factor", Teuchos::ScalarTraits::one()); - - RCP preSmoother = Teuchos::null, postSmoother = Teuchos::null; - std::string preSmootherType, postSmootherType; - ParameterList preSmootherParams, postSmootherParams; - - if (paramList.isParameter("smoother: overlap")) - overlap = paramList.get("smoother: overlap"); - - if (PreOrPost == "pre" || PreOrPost == "both") { - if (paramList.isParameter("smoother: pre type")) { - preSmootherType = paramList.get("smoother: pre type"); - } else { - MUELU_SET_VAR_2LIST(paramList, defaultList, "smoother: type", std::string, preSmootherTypeTmp); - preSmootherType = preSmootherTypeTmp; - } - if (paramList.isParameter("smoother: pre overlap")) - overlap = paramList.get("smoother: pre overlap"); - - if (paramList.isSublist("smoother: pre params")) - preSmootherParams = paramList.sublist("smoother: pre params"); - else if (paramList.isSublist("smoother: params")) - preSmootherParams = paramList.sublist("smoother: params"); - else if (defaultList.isSublist("smoother: params")) - preSmootherParams = defaultList.sublist("smoother: params"); - else if (preSmootherType == "RELAXATION") - preSmootherParams = defaultSmootherParams; - - if (preSmootherType == "CHEBYSHEV" && useMaxAbsDiagonalScaling) - preSmootherParams.set("chebyshev: use rowsumabs diagonal scaling",true); - - #ifdef HAVE_MUELU_INTREPID2 - // Propagate P-coarsening for Topo smoothing - if (multigridAlgo == "pcoarsen" && preSmootherType == "TOPOLOGICAL" && - defaultList.isParameter("pcoarsen: schedule") && defaultList.isParameter("pcoarsen: element")) { - // P-Coarsening by schedule (new interface) - // NOTE: levelID represents the *coarse* level in this case - auto pcoarsen_schedule = Teuchos::getArrayFromStringParameter(defaultList, "pcoarsen: schedule"); - auto pcoarsen_element = defaultList.get("pcoarsen: element"); - - if (levelID < (int)pcoarsen_schedule.size()) { - // Topo info for P-Coarsening - auto lo = pcoarsen_element + std::to_string(pcoarsen_schedule[levelID]); - preSmootherParams.set("pcoarsen: hi basis", lo); - } - } - #endif - - #ifdef HAVE_MUELU_MATLAB - if (preSmootherType == "matlab") - preSmoother = rcp(new SmootherFactory(rcp(new MatlabSmoother(preSmootherParams)))); - else - #endif - preSmoother = rcp(new SmootherFactory(rcp(new TrilinosSmoother(preSmootherType, preSmootherParams, overlap)))); - } - - if (PreOrPost == "post" || PreOrPost == "both") { - if (paramList.isParameter("smoother: post type")) - postSmootherType = paramList.get("smoother: post type"); - else { - MUELU_SET_VAR_2LIST(paramList, defaultList, "smoother: type", std::string, postSmootherTypeTmp); - postSmootherType = postSmootherTypeTmp; - } - - if (paramList.isSublist("smoother: post params")) - postSmootherParams = paramList.sublist("smoother: post params"); - else if (paramList.isSublist("smoother: params")) - postSmootherParams = paramList.sublist("smoother: params"); - else if (defaultList.isSublist("smoother: params")) - postSmootherParams = defaultList.sublist("smoother: params"); - else if (postSmootherType == "RELAXATION") - postSmootherParams = defaultSmootherParams; - if (paramList.isParameter("smoother: post overlap")) - overlap = paramList.get("smoother: post overlap"); - - if (postSmootherType == "CHEBYSHEV" && useMaxAbsDiagonalScaling) - postSmootherParams.set("chebyshev: use rowsumabs diagonal scaling",true); - - if (postSmootherType == preSmootherType && areSame(preSmootherParams, postSmootherParams)) - postSmoother = preSmoother; - else { - #ifdef HAVE_MUELU_INTREPID2 - // Propagate P-coarsening for Topo smoothing - if (multigridAlgo == "pcoarsen" && preSmootherType == "TOPOLOGICAL" && - defaultList.isParameter("pcoarsen: schedule") && defaultList.isParameter("pcoarsen: element")) { - // P-Coarsening by schedule (new interface) - // NOTE: levelID represents the *coarse* level in this case - auto pcoarsen_schedule = Teuchos::getArrayFromStringParameter(defaultList,"pcoarsen: schedule"); - auto pcoarsen_element = defaultList.get("pcoarsen: element"); - - if (levelID < (int)pcoarsen_schedule.size()) { - // Topo info for P-Coarsening - auto lo = pcoarsen_element + std::to_string(pcoarsen_schedule[levelID]); - postSmootherParams.set("pcoarsen: hi basis", lo); - } - } - #endif - - #ifdef HAVE_MUELU_MATLAB - if (postSmootherType == "matlab") - postSmoother = rcp(new SmootherFactory(rcp(new MatlabSmoother(postSmootherParams)))); - else - #endif - postSmoother = rcp(new SmootherFactory(rcp(new TrilinosSmoother(postSmootherType, postSmootherParams, overlap)))); - } - } - - if (preSmoother == postSmoother) - manager.SetFactory("Smoother", preSmoother); - else { - manager.SetFactory("PreSmoother", preSmoother); - manager.SetFactory("PostSmoother", postSmoother); - } - } - - // The first clause is not necessary, but it is here for clarity Smoothers - // are reused if smoother explicitly said to reuse them, or if any other - // reuse option is enabled - bool reuseSmoothers = (reuseType == "S" || reuseType != "none"); - if (reuseSmoothers) { - auto preSmootherFactory = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("PreSmoother"))); - - if (preSmootherFactory != Teuchos::null) { - ParameterList postSmootherFactoryParams; - postSmootherFactoryParams.set("keep smoother data", true); - preSmootherFactory->SetParameterList(postSmootherFactoryParams); - - keeps.push_back(keep_pair("PreSmoother data", preSmootherFactory.get())); - } - - auto postSmootherFactory = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("PostSmoother"))); - if (postSmootherFactory != Teuchos::null) { - ParameterList postSmootherFactoryParams; - postSmootherFactoryParams.set("keep smoother data", true); - postSmootherFactory->SetParameterList(postSmootherFactoryParams); - - keeps.push_back(keep_pair("PostSmoother data", postSmootherFactory.get())); - } - - auto coarseFactory = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("CoarseSolver"))); - if (coarseFactory != Teuchos::null) { - ParameterList coarseFactoryParams; - coarseFactoryParams.set("keep smoother data", true); - coarseFactory->SetParameterList(coarseFactoryParams); - - keeps.push_back(keep_pair("PreSmoother data", coarseFactory.get())); - } - } - - if ((reuseType == "RAP" && levelID) || (reuseType == "full")) { - // The difference between "RAP" and "full" is keeping smoothers. However, - // as in both cases we keep coarse matrices, we do not need to update - // coarse smoothers. On the other hand, if a user changes fine level - // matrix, "RAP" would update the fine level smoother, while "full" would - // not - keeps.push_back(keep_pair("PreSmoother", manager.GetFactory("PreSmoother") .get())); - keeps.push_back(keep_pair("PostSmoother", manager.GetFactory("PostSmoother").get())); - - // We do keep_pair("PreSmoother", manager.GetFactory("CoarseSolver").get()) - // as the coarse solver factory is in fact a smoothing factory, so the - // only pieces of data it generates are PreSmoother and PostSmoother - keeps.push_back(keep_pair("PreSmoother", manager.GetFactory("CoarseSolver").get())); - } - } - - // ===================================================================================================== - // ====================================== Coarse Solvers =============================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_CoarseSolvers(ParameterList& paramList, const ParameterList& defaultList, - FactoryManager& manager, int /* levelID */, std::vector& /* keeps */) const - { - // FIXME: should custom coarse solver check default list too? - bool isCustomCoarseSolver = - paramList.isParameter("coarse: type") || - paramList.isParameter("coarse: params"); - if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, "coarse: type", std::string, "none")) { - manager.SetFactory("CoarseSolver", Teuchos::null); - - } else if (isCustomCoarseSolver) { - // FIXME: get default values from the factory - // NOTE: none of the smoothers at the moment use parameter validation framework, so we - // cannot get the default values from it. - MUELU_SET_VAR_2LIST(paramList, defaultList, "coarse: type", std::string, coarseType); - - int overlap = 0; - if (paramList.isParameter("coarse: overlap")) - overlap = paramList.get("coarse: overlap"); - - ParameterList coarseParams; - if (paramList.isSublist("coarse: params")) - coarseParams = paramList.sublist("coarse: params"); - else if (defaultList.isSublist("coarse: params")) - coarseParams = defaultList.sublist("coarse: params"); - - using strings = std::unordered_set; - - RCP coarseSmoother; - // TODO: this is not a proper place to check. If we consider direct solver to be a special - // case of smoother, we would like to unify Amesos and Ifpack2 smoothers in src/Smoothers, and - // have a single factory responsible for those. Then, this check would belong there. - if (strings({"RELAXATION", "CHEBYSHEV", "ILUT", "ILU", "RILUK", "SCHWARZ", "Amesos", - "BLOCK RELAXATION", "BLOCK_RELAXATION", "BLOCKRELAXATION" , - "SPARSE BLOCK RELAXATION", "SPARSE_BLOCK_RELAXATION", "SPARSEBLOCKRELAXATION", - "LINESMOOTHING_BANDEDRELAXATION", "LINESMOOTHING_BANDED_RELAXATION", "LINESMOOTHING_BANDED RELAXATION", - "LINESMOOTHING_TRIDIRELAXATION", "LINESMOOTHING_TRIDI_RELAXATION", "LINESMOOTHING_TRIDI RELAXATION", - "LINESMOOTHING_TRIDIAGONALRELAXATION", "LINESMOOTHING_TRIDIAGONAL_RELAXATION", "LINESMOOTHING_TRIDIAGONAL RELAXATION", - "TOPOLOGICAL", "FAST_ILU", "FAST_IC", "FAST_ILDL","HIPTMAIR"}).count(coarseType)) { - coarseSmoother = rcp(new TrilinosSmoother(coarseType, coarseParams, overlap)); - } else { - #ifdef HAVE_MUELU_MATLAB - if (coarseType == "matlab") - coarseSmoother = rcp(new MatlabSmoother(coarseParams)); - else - #endif - coarseSmoother = rcp(new DirectSolver(coarseType, coarseParams)); - } - - manager.SetFactory("CoarseSolver", rcp(new SmootherFactory(coarseSmoother))); - } - } - - - // ===================================================================================================== - // ========================================= TentativeP================================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Reitzinger(ParameterList& paramList, const ParameterList& defaultList, - FactoryManager& manager, int levelID, std::vector& keeps) const - { - ParameterList rParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: enable", bool, rParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, rParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: constant column sums", bool, rParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: calculate qr", bool, rParams); - - RCP rFactory = rcp(new ReitzingerPFactory()); - rFactory->SetParameterList(rParams); - - // These are all going to be user provided, so NoFactory - rFactory->SetFactory("Pnodal", NoFactory::getRCP()); - rFactory->SetFactory("NodeAggMatrix", NoFactory::getRCP()); - //rFactory->SetFactory("NodeMatrix", NoFactory::getRCP()); - - if(levelID > 1) - rFactory->SetFactory("D0", this->GetFactoryManager(levelID-1)->GetFactory("D0")); - else - rFactory->SetFactory("D0", NoFactory::getRCP()); - - manager.SetFactory("Ptent", rFactory); - manager.SetFactory("D0", rFactory); - manager.SetFactory("InPlaceMap", rFactory); - - } - - // ===================================================================================================== - // ========================================= TentativeP================================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Aggregation_TentativeP(ParameterList& paramList, const ParameterList& defaultList, - FactoryManager& manager, int levelID, std::vector& keeps) const - { - using strings = std::unordered_set; - - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - - MUELU_SET_VAR_2LIST(paramList, defaultList, "aggregation: type", std::string, aggType); - TEUCHOS_TEST_FOR_EXCEPTION(!strings({"uncoupled", "coupled", "brick", "matlab","notay","classical"}).count(aggType), - Exceptions::RuntimeError, "Unknown aggregation algorithm: \"" << aggType << "\". Please consult User's Guide."); - - - // Only doing this for classical because otherwise, the gold tests get broken badly - RCP amalgFact; - if(aggType == "classical") { - amalgFact = rcp(new AmalgamationFactory()); - manager.SetFactory("UnAmalgamationInfo",amalgFact); - } - - // Aggregation graph - RCP dropFactory; - - if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "matlab")) { - #ifdef HAVE_MUELU_MATLAB - dropFactory = rcp(new SingleLevelMatlabFactory()); - ParameterList socParams = paramList.sublist("strength-of-connection: params"); - dropFactory->SetParameterList(socParams); - #else - throw std::runtime_error("Cannot use MATLAB evolutionary strength-of-connection - MueLu was not configured with MATLAB support."); - #endif - } else if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "unsupported vector smoothing")) { - dropFactory = rcp(new MueLu::SmooVecCoalesceDropFactory()); - ParameterList dropParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: block diagonal: interleaved blocksize", int, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: number of random vectors", int, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: number of times to pre or post smooth", int, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: penalty parameters", Teuchos::Array, dropParams); - dropFactory->SetParameterList(dropParams); - } - else { - MUELU_KOKKOS_FACTORY_NO_DECL(dropFactory, CoalesceDropFactory, CoalesceDropFactory_kokkos); - ParameterList dropParams; - if (!rcp_dynamic_cast(dropFactory).is_null()) - dropParams.set("lightweight wrap", true); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: row sum drop tol", double, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: block diagonal: interleaved blocksize", int, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop tol", double, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: use ml scaling of drop tol", bool, dropParams); - - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: Dirichlet threshold", double, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: greedy Dirichlet", bool, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: distance laplacian algo", std::string, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: classical algo", std::string, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: distance laplacian directional weights",Teuchos::Array,dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: coloring: localize color graph", bool, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: dropping may create Dirichlet", bool, dropParams); - if (useKokkos_) { - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use lumping", bool, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse graph", bool, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse eigenvalue", bool, dropParams); - } - - if(!amalgFact.is_null()) - dropFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); - - if(dropParams.isParameter("aggregation: drop scheme")) { - std::string drop_scheme = dropParams.get("aggregation: drop scheme"); - if(drop_scheme == "block diagonal colored signed classical") - manager.SetFactory("Coloring Graph",dropFactory); - if (drop_scheme.find("block diagonal") != std::string::npos || drop_scheme == "signed classical") { - if(levelID > 0) - dropFactory->SetFactory("BlockNumber", this->GetFactoryManager(levelID-1)->GetFactory("BlockNumber")); - else - dropFactory->SetFactory("BlockNumber", manager.GetFactory("BlockNumber")); - } - } - - dropFactory->SetParameterList(dropParams); - } - manager.SetFactory("Graph", dropFactory); - - - // Aggregation scheme - #ifndef HAVE_MUELU_MATLAB - if (aggType == "matlab") - throw std::runtime_error("Cannot use MATLAB aggregation - MueLu was not configured with MATLAB support."); - #endif - RCP aggFactory; - if (aggType == "uncoupled") { - MUELU_KOKKOS_FACTORY_NO_DECL(aggFactory, UncoupledAggregationFactory, UncoupledAggregationFactory_kokkos); - ParameterList aggParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: mode", std::string, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: ordering", std::string, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: min agg size", int, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: max agg size", int, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: max selected neighbors", int, aggParams); - if(useKokkos_) { - //if not using kokkos refactor Uncoupled, there is no algorithm option (always Serial) - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: phase 1 algorithm", std::string, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: deterministic", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: coloring algorithm", std::string, aggParams); - } - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 1", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 2a", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 2b", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 3", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: match ML phase1", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: match ML phase2a", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: match ML phase2b", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: phase2a agg factor", double, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: preserve Dirichlet points", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: error on nodes with no on-rank neighbors", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: phase3 avoid singletons", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: compute aggregate qualities", bool, aggParams); - aggFactory->SetParameterList(aggParams); - // make sure that the aggregation factory has all necessary data - aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); - aggFactory->SetFactory("Graph", manager.GetFactory("Graph")); - // aggFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); - - } else if (aggType == "brick") { - aggFactory = rcp(new BrickAggregationFactory()); - ParameterList aggParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick x size", int, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick y size", int, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick z size", int, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick x Dirichlet", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick y Dirichlet", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick z Dirichlet", bool, aggParams); - aggFactory->SetParameterList(aggParams); - - // Unlike other factories, BrickAggregationFactory makes the Graph/DofsPerNode itself - manager.SetFactory("Graph", aggFactory); - manager.SetFactory("DofsPerNode", aggFactory); - manager.SetFactory("Filtering", aggFactory); - if (levelID > 1) { - // We check for levelID > 0, as in the interpreter aggFactory for - // levelID really corresponds to level 0. Managers are clunky, as they - // contain factories for two different levels - aggFactory->SetFactory("Coordinates", this->GetFactoryManager(levelID-1)->GetFactory("Coordinates")); +#ifdef HAVE_MUELU_INTREPID2 + // Propagate P-coarsening for Topo smoothing + if (multigridAlgo == "pcoarsen" && preSmootherType == "TOPOLOGICAL" && + defaultList.isParameter("pcoarsen: schedule") && defaultList.isParameter("pcoarsen: element")) { + // P-Coarsening by schedule (new interface) + // NOTE: levelID represents the *coarse* level in this case + auto pcoarsen_schedule = Teuchos::getArrayFromStringParameter(defaultList, "pcoarsen: schedule"); + auto pcoarsen_element = defaultList.get("pcoarsen: element"); + + if (levelID < (int)pcoarsen_schedule.size()) { + // Topo info for P-Coarsening + auto lo = pcoarsen_element + std::to_string(pcoarsen_schedule[levelID]); + preSmootherParams.set("pcoarsen: hi basis", lo); + } } +#endif + +#ifdef HAVE_MUELU_MATLAB + if (preSmootherType == "matlab") + preSmoother = rcp(new SmootherFactory(rcp(new MatlabSmoother(preSmootherParams)))); + else +#endif + preSmoother = rcp(new SmootherFactory(rcp(new TrilinosSmoother(preSmootherType, preSmootherParams, overlap)))); } - else if (aggType == "classical") { - // Map and coloring - RCP mapFact = rcp(new ClassicalMapFactory()); - ParameterList mapParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: deterministic", bool, mapParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: coloring algorithm", std::string, mapParams); - - ParameterList tempParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, tempParams); - std::string drop_algo = tempParams.get("aggregation: drop scheme"); - if(drop_algo == "block diagonal colored signed classical") { - mapParams.set("aggregation: coloring: use color graph",true); - mapFact->SetFactory("Coloring Graph", manager.GetFactory("Coloring Graph")); + if (PreOrPost == "post" || PreOrPost == "both") { + if (paramList.isParameter("smoother: post type")) + postSmootherType = paramList.get("smoother: post type"); + else { + MUELU_SET_VAR_2LIST(paramList, defaultList, "smoother: type", std::string, postSmootherTypeTmp); + postSmootherType = postSmootherTypeTmp; } - mapFact->SetParameterList(mapParams); - mapFact->SetFactory("Graph", manager.GetFactory("Graph")); - mapFact->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); - - manager.SetFactory("FC Splitting", mapFact); - manager.SetFactory("CoarseMap", mapFact); - - - aggFactory = rcp(new ClassicalPFactory()); - ParameterList aggParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: classical scheme", std::string, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, aggParams); - aggFactory->SetParameterList(aggParams); - aggFactory->SetFactory("FC Splitting",manager.GetFactory("FC Splitting")); - aggFactory->SetFactory("CoarseMap",manager.GetFactory("CoarseMap")); - aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); - aggFactory->SetFactory("Graph", manager.GetFactory("Graph")); - - if (drop_algo.find("block diagonal") != std::string::npos || drop_algo == "signed classical") { - if(levelID > 0) - aggFactory->SetFactory("BlockNumber", this->GetFactoryManager(levelID-1)->GetFactory("BlockNumber")); - else - aggFactory->SetFactory("BlockNumber", manager.GetFactory("BlockNumber")); - } - - // Now we short-circuit, because we neither need nor want TentativePFactory here - manager.SetFactory("Ptent", aggFactory); - manager.SetFactory("P Graph", aggFactory); + if (paramList.isSublist("smoother: post params")) + postSmootherParams = paramList.sublist("smoother: post params"); + else if (paramList.isSublist("smoother: params")) + postSmootherParams = paramList.sublist("smoother: params"); + else if (defaultList.isSublist("smoother: params")) + postSmootherParams = defaultList.sublist("smoother: params"); + else if (postSmootherType == "RELAXATION") + postSmootherParams = defaultSmootherParams; + if (paramList.isParameter("smoother: post overlap")) + overlap = paramList.get("smoother: post overlap"); + + if (postSmootherType == "CHEBYSHEV" && useMaxAbsDiagonalScaling) + postSmootherParams.set("chebyshev: use rowsumabs diagonal scaling", true); + + if (postSmootherType == preSmootherType && areSame(preSmootherParams, postSmootherParams)) + postSmoother = preSmoother; + else { +#ifdef HAVE_MUELU_INTREPID2 + // Propagate P-coarsening for Topo smoothing + if (multigridAlgo == "pcoarsen" && preSmootherType == "TOPOLOGICAL" && + defaultList.isParameter("pcoarsen: schedule") && defaultList.isParameter("pcoarsen: element")) { + // P-Coarsening by schedule (new interface) + // NOTE: levelID represents the *coarse* level in this case + auto pcoarsen_schedule = Teuchos::getArrayFromStringParameter(defaultList, "pcoarsen: schedule"); + auto pcoarsen_element = defaultList.get("pcoarsen: element"); + + if (levelID < (int)pcoarsen_schedule.size()) { + // Topo info for P-Coarsening + auto lo = pcoarsen_element + std::to_string(pcoarsen_schedule[levelID]); + postSmootherParams.set("pcoarsen: hi basis", lo); + } + } +#endif - if (reuseType == "tP" && levelID) { - // keeps.push_back(keep_pair("Nullspace", Ptent.get())); - keeps.push_back(keep_pair("Ptent",aggFactory.get())); - } - return; - } - else if (aggType == "notay") { - aggFactory = rcp(new NotayAggregationFactory()); - ParameterList aggParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: pairwise: size", int, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: pairwise: tie threshold", double, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: Dirichlet threshold", double, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: ordering", std::string, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: compute aggregate qualities",bool, aggParams); - aggFactory->SetParameterList(aggParams); - aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); - aggFactory->SetFactory("Graph", manager.GetFactory("Graph")); - } #ifdef HAVE_MUELU_MATLAB - else if(aggType == "matlab") { - ParameterList aggParams = paramList.sublist("aggregation: params"); - aggFactory = rcp(new SingleLevelMatlabFactory()); - aggFactory->SetParameterList(aggParams); - } + if (postSmootherType == "matlab") + postSmoother = rcp(new SmootherFactory(rcp(new MatlabSmoother(postSmootherParams)))); + else #endif + postSmoother = rcp(new SmootherFactory(rcp(new TrilinosSmoother(postSmootherType, postSmootherParams, overlap)))); + } + } + if (preSmoother == postSmoother) + manager.SetFactory("Smoother", preSmoother); + else { + manager.SetFactory("PreSmoother", preSmoother); + manager.SetFactory("PostSmoother", postSmoother); + } + } + // The first clause is not necessary, but it is here for clarity Smoothers + // are reused if smoother explicitly said to reuse them, or if any other + // reuse option is enabled + bool reuseSmoothers = (reuseType == "S" || reuseType != "none"); + if (reuseSmoothers) { + auto preSmootherFactory = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("PreSmoother"))); - manager.SetFactory("Aggregates", aggFactory); + if (preSmootherFactory != Teuchos::null) { + ParameterList postSmootherFactoryParams; + postSmootherFactoryParams.set("keep smoother data", true); + preSmootherFactory->SetParameterList(postSmootherFactoryParams); - // Coarse map - RCP coarseMap = rcp(new CoarseMapFactory()); - coarseMap->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - manager.SetFactory("CoarseMap", coarseMap); + keeps.push_back(keep_pair("PreSmoother data", preSmootherFactory.get())); + } - // Aggregate qualities - if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: compute aggregate qualities", bool, true)) { - RCP aggQualityFact = rcp(new AggregateQualityEstimateFactory()); - ParameterList aggQualityParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: good aggregate threshold", double, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: file output", bool, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: file base", std::string, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: check symmetry", bool, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: algorithm", std::string, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: zero threshold", double, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: percentiles", Teuchos::Array,aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: mode", std::string, aggQualityParams); - aggQualityFact->SetParameterList(aggQualityParams); - manager.SetFactory("AggregateQualities", aggQualityFact); + auto postSmootherFactory = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("PostSmoother"))); + if (postSmootherFactory != Teuchos::null) { + ParameterList postSmootherFactoryParams; + postSmootherFactoryParams.set("keep smoother data", true); + postSmootherFactory->SetParameterList(postSmootherFactoryParams); - assert(aggType == "uncoupled"); - aggFactory->SetFactory("AggregateQualities", aggQualityFact); + keeps.push_back(keep_pair("PostSmoother data", postSmootherFactory.get())); } + auto coarseFactory = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("CoarseSolver"))); + if (coarseFactory != Teuchos::null) { + ParameterList coarseFactoryParams; + coarseFactoryParams.set("keep smoother data", true); + coarseFactory->SetParameterList(coarseFactoryParams); - // Tentative P - MUELU_KOKKOS_FACTORY(Ptent, TentativePFactory, TentativePFactory_kokkos); - ParameterList ptentParams; - if (paramList.isSublist("matrixmatrix: kernel params")) - ptentParams.sublist("matrixmatrix: kernel params", false) = paramList.sublist("matrixmatrix: kernel params"); - if (defaultList.isSublist("matrixmatrix: kernel params")) - ptentParams.sublist("matrixmatrix: kernel params", false) = defaultList.sublist("matrixmatrix: kernel params"); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: calculate qr", bool, ptentParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: build coarse coordinates", bool, ptentParams); - Ptent->SetParameterList(ptentParams); - Ptent->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - Ptent->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); - manager.SetFactory("Ptent", Ptent); - - if (reuseType == "tP" && levelID) { - keeps.push_back(keep_pair("Nullspace", Ptent.get())); - keeps.push_back(keep_pair("P", Ptent.get())); + keeps.push_back(keep_pair("PreSmoother data", coarseFactory.get())); } } - // ===================================================================================================== - // ============================================ RAP ==================================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_RAP(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const - { - if (paramList.isParameter("A") && !paramList.get >("A").is_null()) { - // We have user matrix A - manager.SetFactory("A", NoFactory::getRCP()); - return; - } + if ((reuseType == "RAP" && levelID) || (reuseType == "full")) { + // The difference between "RAP" and "full" is keeping smoothers. However, + // as in both cases we keep coarse matrices, we do not need to update + // coarse smoothers. On the other hand, if a user changes fine level + // matrix, "RAP" would update the fine level smoother, while "full" would + // not + keeps.push_back(keep_pair("PreSmoother", manager.GetFactory("PreSmoother").get())); + keeps.push_back(keep_pair("PostSmoother", manager.GetFactory("PostSmoother").get())); + + // We do keep_pair("PreSmoother", manager.GetFactory("CoarseSolver").get()) + // as the coarse solver factory is in fact a smoothing factory, so the + // only pieces of data it generates are PreSmoother and PostSmoother + keeps.push_back(keep_pair("PreSmoother", manager.GetFactory("CoarseSolver").get())); + } +} + +// ===================================================================================================== +// ====================================== Coarse Solvers =============================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_CoarseSolvers(ParameterList& paramList, const ParameterList& defaultList, + FactoryManager& manager, int /* levelID */, std::vector& /* keeps */) const { + // FIXME: should custom coarse solver check default list too? + bool isCustomCoarseSolver = + paramList.isParameter("coarse: type") || + paramList.isParameter("coarse: params"); + if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, "coarse: type", std::string, "none")) { + manager.SetFactory("CoarseSolver", Teuchos::null); + + } else if (isCustomCoarseSolver) { + // FIXME: get default values from the factory + // NOTE: none of the smoothers at the moment use parameter validation framework, so we + // cannot get the default values from it. + MUELU_SET_VAR_2LIST(paramList, defaultList, "coarse: type", std::string, coarseType); + + int overlap = 0; + if (paramList.isParameter("coarse: overlap")) + overlap = paramList.get("coarse: overlap"); + + ParameterList coarseParams; + if (paramList.isSublist("coarse: params")) + coarseParams = paramList.sublist("coarse: params"); + else if (defaultList.isSublist("coarse: params")) + coarseParams = defaultList.sublist("coarse: params"); - ParameterList RAPparams; - - RCP RAP; - RCP RAPs; - // Allow for Galerkin or shifted RAP - // FIXME: Should this not be some form of MUELU_SET_VAR_2LIST? - std::string alg = paramList.get("rap: algorithm", "galerkin"); - if (alg == "shift" || alg == "non-galerkin") { - RAPs = rcp(new RAPShiftFactory()); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift", double, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift diagonal M", bool, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift low storage", bool, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift array", Teuchos::Array, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: cfl array", Teuchos::Array, RAPparams); + using strings = std::unordered_set; + RCP coarseSmoother; + // TODO: this is not a proper place to check. If we consider direct solver to be a special + // case of smoother, we would like to unify Amesos and Ifpack2 smoothers in src/Smoothers, and + // have a single factory responsible for those. Then, this check would belong there. + if (strings({"RELAXATION", "CHEBYSHEV", "ILUT", "ILU", "RILUK", "SCHWARZ", "Amesos", + "BLOCK RELAXATION", "BLOCK_RELAXATION", "BLOCKRELAXATION", + "SPARSE BLOCK RELAXATION", "SPARSE_BLOCK_RELAXATION", "SPARSEBLOCKRELAXATION", + "LINESMOOTHING_BANDEDRELAXATION", "LINESMOOTHING_BANDED_RELAXATION", "LINESMOOTHING_BANDED RELAXATION", + "LINESMOOTHING_TRIDIRELAXATION", "LINESMOOTHING_TRIDI_RELAXATION", "LINESMOOTHING_TRIDI RELAXATION", + "LINESMOOTHING_TRIDIAGONALRELAXATION", "LINESMOOTHING_TRIDIAGONAL_RELAXATION", "LINESMOOTHING_TRIDIAGONAL RELAXATION", + "TOPOLOGICAL", "FAST_ILU", "FAST_IC", "FAST_ILDL", "HIPTMAIR"}) + .count(coarseType)) { + coarseSmoother = rcp(new TrilinosSmoother(coarseType, coarseParams, overlap)); } else { - RAP = rcp(new RAPFactory()); +#ifdef HAVE_MUELU_MATLAB + if (coarseType == "matlab") + coarseSmoother = rcp(new MatlabSmoother(coarseParams)); + else +#endif + coarseSmoother = rcp(new DirectSolver(coarseType, coarseParams)); } - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: relative diagonal floor", Teuchos::Array, RAPparams); - - if (paramList.isSublist("matrixmatrix: kernel params")) - RAPparams.sublist("matrixmatrix: kernel params", false) = paramList.sublist("matrixmatrix: kernel params"); - if (defaultList.isSublist("matrixmatrix: kernel params")) - RAPparams.sublist("matrixmatrix: kernel params", false) = defaultList.sublist("matrixmatrix: kernel params"); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "transpose: use implicit", bool, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: fix zero diagonals", bool, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: fix zero diagonals threshold", double, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: fix zero diagonals replacement", Scalar, RAPparams); - - // if "rap: triple product" has not been set and algorithm is "unsmoothed" switch triple product on - if (!paramList.isParameter("rap: triple product") && - paramList.isType("multigrid algorithm") && - paramList.get("multigrid algorithm") == "unsmoothed") - paramList.set("rap: triple product", true); - else - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: triple product", bool, RAPparams); + manager.SetFactory("CoarseSolver", rcp(new SmootherFactory(coarseSmoother))); + } +} + +// ===================================================================================================== +// ========================================= TentativeP================================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Reitzinger(ParameterList& paramList, const ParameterList& defaultList, + FactoryManager& manager, int levelID, std::vector& keeps) const { + ParameterList rParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: enable", bool, rParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, rParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: constant column sums", bool, rParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: calculate qr", bool, rParams); + + RCP rFactory = rcp(new ReitzingerPFactory()); + rFactory->SetParameterList(rParams); + + // These are all going to be user provided, so NoFactory + rFactory->SetFactory("Pnodal", NoFactory::getRCP()); + rFactory->SetFactory("NodeAggMatrix", NoFactory::getRCP()); + // rFactory->SetFactory("NodeMatrix", NoFactory::getRCP()); + + if (levelID > 1) + rFactory->SetFactory("D0", this->GetFactoryManager(levelID - 1)->GetFactory("D0")); + else + rFactory->SetFactory("D0", NoFactory::getRCP()); + + manager.SetFactory("Ptent", rFactory); + manager.SetFactory("D0", rFactory); + manager.SetFactory("InPlaceMap", rFactory); +} + +// ===================================================================================================== +// ========================================= TentativeP================================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Aggregation_TentativeP(ParameterList& paramList, const ParameterList& defaultList, + FactoryManager& manager, int levelID, std::vector& keeps) const { + using strings = std::unordered_set; + + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); + + MUELU_SET_VAR_2LIST(paramList, defaultList, "aggregation: type", std::string, aggType); + TEUCHOS_TEST_FOR_EXCEPTION(!strings({"uncoupled", "coupled", "brick", "matlab", "notay", "classical"}).count(aggType), + Exceptions::RuntimeError, "Unknown aggregation algorithm: \"" << aggType << "\". Please consult User's Guide."); + + // Only doing this for classical because otherwise, the gold tests get broken badly + RCP amalgFact; + if (aggType == "classical") { + amalgFact = rcp(new AmalgamationFactory()); + manager.SetFactory("UnAmalgamationInfo", amalgFact); + } - try { - if (paramList.isParameter("aggregation: allow empty prolongator columns")) { - RAPparams.set("CheckMainDiagonal", paramList.get("aggregation: allow empty prolongator columns")); - RAPparams.set("RepairMainDiagonal", paramList.get("aggregation: allow empty prolongator columns")); - } - else if (defaultList.isParameter("aggregation: allow empty prolongator columns")) { - RAPparams.set("CheckMainDiagonal", defaultList.get("aggregation: allow empty prolongator columns")); - RAPparams.set("RepairMainDiagonal", defaultList.get("aggregation: allow empty prolongator columns")); - } + // Aggregation graph + RCP dropFactory; - } catch (Teuchos::Exceptions::InvalidParameterType&) { - TEUCHOS_TEST_FOR_EXCEPTION_PURE_MSG(true, Teuchos::Exceptions::InvalidParameterType, - "Error: parameter \"aggregation: allow empty prolongator columns\" must be of type " << Teuchos::TypeNameTraits::name()); + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "matlab")) { +#ifdef HAVE_MUELU_MATLAB + dropFactory = rcp(new SingleLevelMatlabFactory()); + ParameterList socParams = paramList.sublist("strength-of-connection: params"); + dropFactory->SetParameterList(socParams); +#else + throw std::runtime_error("Cannot use MATLAB evolutionary strength-of-connection - MueLu was not configured with MATLAB support."); +#endif + } else if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "unsupported vector smoothing")) { + dropFactory = rcp(new MueLu::SmooVecCoalesceDropFactory()); + ParameterList dropParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: block diagonal: interleaved blocksize", int, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: number of random vectors", int, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: number of times to pre or post smooth", int, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: penalty parameters", Teuchos::Array, dropParams); + dropFactory->SetParameterList(dropParams); + } else { + MUELU_KOKKOS_FACTORY_NO_DECL(dropFactory, CoalesceDropFactory, CoalesceDropFactory_kokkos); + ParameterList dropParams; + if (!rcp_dynamic_cast(dropFactory).is_null()) + dropParams.set("lightweight wrap", true); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: row sum drop tol", double, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: block diagonal: interleaved blocksize", int, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop tol", double, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: use ml scaling of drop tol", bool, dropParams); + + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: Dirichlet threshold", double, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: greedy Dirichlet", bool, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: distance laplacian algo", std::string, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: classical algo", std::string, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: distance laplacian directional weights", Teuchos::Array, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: coloring: localize color graph", bool, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: dropping may create Dirichlet", bool, dropParams); + if (useKokkos_) { + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use lumping", bool, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse graph", bool, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse eigenvalue", bool, dropParams); } - if (!RAP.is_null()) { - RAP->SetParameterList(RAPparams); - RAP->SetFactory("P", manager.GetFactory("P")); - } else { - RAPs->SetParameterList(RAPparams); - RAPs->SetFactory("P", manager.GetFactory("P")); - } + if (!amalgFact.is_null()) + dropFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); - if (!this->implicitTranspose_) { - if (!RAP.is_null()) - RAP->SetFactory("R", manager.GetFactory("R")); - else - RAPs->SetFactory("R", manager.GetFactory("R")); + if (dropParams.isParameter("aggregation: drop scheme")) { + std::string drop_scheme = dropParams.get("aggregation: drop scheme"); + if (drop_scheme == "block diagonal colored signed classical") + manager.SetFactory("Coloring Graph", dropFactory); + if (drop_scheme.find("block diagonal") != std::string::npos || drop_scheme == "signed classical") { + if (levelID > 0) + dropFactory->SetFactory("BlockNumber", this->GetFactoryManager(levelID - 1)->GetFactory("BlockNumber")); + else + dropFactory->SetFactory("BlockNumber", manager.GetFactory("BlockNumber")); + } } - if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: export visualization data", bool, true)) { - RCP aggExport = rcp(new AggregationExportFactory()); - ParameterList aggExportParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output filename", std::string, aggExportParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: agg style", std::string, aggExportParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: iter", int, aggExportParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: time step", int, aggExportParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: fine graph edges", bool, aggExportParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: coarse graph edges", bool, aggExportParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: build colormap", bool, aggExportParams); - aggExport->SetParameterList(aggExportParams); - aggExport->SetFactory("DofsPerNode", manager.GetFactory("DofsPerNode")); + dropFactory->SetParameterList(dropParams); + } + manager.SetFactory("Graph", dropFactory); - if (!RAP.is_null()) - RAP->AddTransferFactory(aggExport); +// Aggregation scheme +#ifndef HAVE_MUELU_MATLAB + if (aggType == "matlab") + throw std::runtime_error("Cannot use MATLAB aggregation - MueLu was not configured with MATLAB support."); +#endif + RCP aggFactory; + if (aggType == "uncoupled") { + MUELU_KOKKOS_FACTORY_NO_DECL(aggFactory, UncoupledAggregationFactory, UncoupledAggregationFactory_kokkos); + ParameterList aggParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: mode", std::string, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: ordering", std::string, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: min agg size", int, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: max agg size", int, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: max selected neighbors", int, aggParams); + if (useKokkos_) { + // if not using kokkos refactor Uncoupled, there is no algorithm option (always Serial) + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: phase 1 algorithm", std::string, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: deterministic", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: coloring algorithm", std::string, aggParams); + } + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 1", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 2a", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 2b", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 3", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: match ML phase1", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: match ML phase2a", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: match ML phase2b", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: phase2a agg factor", double, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: preserve Dirichlet points", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: error on nodes with no on-rank neighbors", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: phase3 avoid singletons", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: compute aggregate qualities", bool, aggParams); + aggFactory->SetParameterList(aggParams); + // make sure that the aggregation factory has all necessary data + aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); + aggFactory->SetFactory("Graph", manager.GetFactory("Graph")); + // aggFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); + + } else if (aggType == "brick") { + aggFactory = rcp(new BrickAggregationFactory()); + ParameterList aggParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick x size", int, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick y size", int, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick z size", int, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick x Dirichlet", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick y Dirichlet", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick z Dirichlet", bool, aggParams); + aggFactory->SetParameterList(aggParams); + + // Unlike other factories, BrickAggregationFactory makes the Graph/DofsPerNode itself + manager.SetFactory("Graph", aggFactory); + manager.SetFactory("DofsPerNode", aggFactory); + manager.SetFactory("Filtering", aggFactory); + if (levelID > 1) { + // We check for levelID > 0, as in the interpreter aggFactory for + // levelID really corresponds to level 0. Managers are clunky, as they + // contain factories for two different levels + aggFactory->SetFactory("Coordinates", this->GetFactoryManager(levelID - 1)->GetFactory("Coordinates")); + } + } else if (aggType == "classical") { + // Map and coloring + RCP mapFact = rcp(new ClassicalMapFactory()); + ParameterList mapParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: deterministic", bool, mapParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: coloring algorithm", std::string, mapParams); + + ParameterList tempParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, tempParams); + std::string drop_algo = tempParams.get("aggregation: drop scheme"); + if (drop_algo == "block diagonal colored signed classical") { + mapParams.set("aggregation: coloring: use color graph", true); + mapFact->SetFactory("Coloring Graph", manager.GetFactory("Coloring Graph")); + } + mapFact->SetParameterList(mapParams); + mapFact->SetFactory("Graph", manager.GetFactory("Graph")); + mapFact->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); + + manager.SetFactory("FC Splitting", mapFact); + manager.SetFactory("CoarseMap", mapFact); + + aggFactory = rcp(new ClassicalPFactory()); + ParameterList aggParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: classical scheme", std::string, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, aggParams); + aggFactory->SetParameterList(aggParams); + aggFactory->SetFactory("FC Splitting", manager.GetFactory("FC Splitting")); + aggFactory->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); + aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); + aggFactory->SetFactory("Graph", manager.GetFactory("Graph")); + + if (drop_algo.find("block diagonal") != std::string::npos || drop_algo == "signed classical") { + if (levelID > 0) + aggFactory->SetFactory("BlockNumber", this->GetFactoryManager(levelID - 1)->GetFactory("BlockNumber")); else - RAPs->AddTransferFactory(aggExport); + aggFactory->SetFactory("BlockNumber", manager.GetFactory("BlockNumber")); } - if (!RAP.is_null()) - manager.SetFactory("A", RAP); - else - manager.SetFactory("A", RAPs); - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - MUELU_SET_VAR_2LIST(paramList, defaultList, "sa: use filtered matrix", bool, useFiltering); - bool filteringChangesMatrix = useFiltering && !MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: drop tol", double, 0); + // Now we short-circuit, because we neither need nor want TentativePFactory here + manager.SetFactory("Ptent", aggFactory); + manager.SetFactory("P Graph", aggFactory); - if (reuseType == "RP" || (reuseType == "tP" && !filteringChangesMatrix)) { - if (!RAP.is_null()) { - keeps.push_back(keep_pair("AP reuse data", RAP.get())); - keeps.push_back(keep_pair("RAP reuse data", RAP.get())); - - } else { - keeps.push_back(keep_pair("AP reuse data", RAPs.get())); - keeps.push_back(keep_pair("RAP reuse data", RAPs.get())); - } + if (reuseType == "tP" && levelID) { + // keeps.push_back(keep_pair("Nullspace", Ptent.get())); + keeps.push_back(keep_pair("Ptent", aggFactory.get())); } + return; + } else if (aggType == "notay") { + aggFactory = rcp(new NotayAggregationFactory()); + ParameterList aggParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: pairwise: size", int, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: pairwise: tie threshold", double, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: Dirichlet threshold", double, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: ordering", std::string, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: compute aggregate qualities", bool, aggParams); + aggFactory->SetParameterList(aggParams); + aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); + aggFactory->SetFactory("Graph", manager.GetFactory("Graph")); + } +#ifdef HAVE_MUELU_MATLAB + else if (aggType == "matlab") { + ParameterList aggParams = paramList.sublist("aggregation: params"); + aggFactory = rcp(new SingleLevelMatlabFactory()); + aggFactory->SetParameterList(aggParams); } +#endif - // ===================================================================================================== - // ======================================= Coordinates ================================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Coordinates(ParameterList& paramList, const ParameterList& /* defaultList */, - FactoryManager& manager, int /* levelID */, std::vector& /* keeps */) const - { - bool have_userCO = false; - if (paramList.isParameter("Coordinates") && !paramList.get >("Coordinates").is_null()) - have_userCO = true; + manager.SetFactory("Aggregates", aggFactory); + + // Coarse map + RCP coarseMap = rcp(new CoarseMapFactory()); + coarseMap->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + manager.SetFactory("CoarseMap", coarseMap); + + // Aggregate qualities + if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: compute aggregate qualities", bool, true)) { + RCP aggQualityFact = rcp(new AggregateQualityEstimateFactory()); + ParameterList aggQualityParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: good aggregate threshold", double, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: file output", bool, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: file base", std::string, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: check symmetry", bool, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: algorithm", std::string, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: zero threshold", double, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: percentiles", Teuchos::Array, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: mode", std::string, aggQualityParams); + aggQualityFact->SetParameterList(aggQualityParams); + manager.SetFactory("AggregateQualities", aggQualityFact); + + assert(aggType == "uncoupled"); + aggFactory->SetFactory("AggregateQualities", aggQualityFact); + } - if (useCoordinates_) { - if (have_userCO) { - manager.SetFactory("Coordinates", NoFactory::getRCP()); + // Tentative P + MUELU_KOKKOS_FACTORY(Ptent, TentativePFactory, TentativePFactory_kokkos); + ParameterList ptentParams; + if (paramList.isSublist("matrixmatrix: kernel params")) + ptentParams.sublist("matrixmatrix: kernel params", false) = paramList.sublist("matrixmatrix: kernel params"); + if (defaultList.isSublist("matrixmatrix: kernel params")) + ptentParams.sublist("matrixmatrix: kernel params", false) = defaultList.sublist("matrixmatrix: kernel params"); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: calculate qr", bool, ptentParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: build coarse coordinates", bool, ptentParams); + Ptent->SetParameterList(ptentParams); + Ptent->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + Ptent->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); + manager.SetFactory("Ptent", Ptent); + + if (reuseType == "tP" && levelID) { + keeps.push_back(keep_pair("Nullspace", Ptent.get())); + keeps.push_back(keep_pair("P", Ptent.get())); + } +} + +// ===================================================================================================== +// ============================================ RAP ==================================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_RAP(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const { + if (paramList.isParameter("A") && !paramList.get >("A").is_null()) { + // We have user matrix A + manager.SetFactory("A", NoFactory::getRCP()); + return; + } - } else { - RCP coords = rcp(new CoordinatesTransferFactory()); - coords->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - coords->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); - manager.SetFactory("Coordinates", coords); - - auto RAP = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); - if (!RAP.is_null()) { - RAP->AddTransferFactory(manager.GetFactory("Coordinates")); - } else { - auto RAPs = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); - RAPs->AddTransferFactory(manager.GetFactory("Coordinates")); - } - } + ParameterList RAPparams; + + RCP RAP; + RCP RAPs; + // Allow for Galerkin or shifted RAP + // FIXME: Should this not be some form of MUELU_SET_VAR_2LIST? + std::string alg = paramList.get("rap: algorithm", "galerkin"); + if (alg == "shift" || alg == "non-galerkin") { + RAPs = rcp(new RAPShiftFactory()); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift", double, RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift diagonal M", bool, RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift low storage", bool, RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift array", Teuchos::Array, RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: cfl array", Teuchos::Array, RAPparams); + + } else { + RAP = rcp(new RAPFactory()); + } + + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: relative diagonal floor", Teuchos::Array, RAPparams); + + if (paramList.isSublist("matrixmatrix: kernel params")) + RAPparams.sublist("matrixmatrix: kernel params", false) = paramList.sublist("matrixmatrix: kernel params"); + if (defaultList.isSublist("matrixmatrix: kernel params")) + RAPparams.sublist("matrixmatrix: kernel params", false) = defaultList.sublist("matrixmatrix: kernel params"); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "transpose: use implicit", bool, RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: fix zero diagonals", bool, RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: fix zero diagonals threshold", double, RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: fix zero diagonals replacement", Scalar, RAPparams); + + // if "rap: triple product" has not been set and algorithm is "unsmoothed" switch triple product on + if (!paramList.isParameter("rap: triple product") && + paramList.isType("multigrid algorithm") && + paramList.get("multigrid algorithm") == "unsmoothed") + paramList.set("rap: triple product", true); + else + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: triple product", bool, RAPparams); + + try { + if (paramList.isParameter("aggregation: allow empty prolongator columns")) { + RAPparams.set("CheckMainDiagonal", paramList.get("aggregation: allow empty prolongator columns")); + RAPparams.set("RepairMainDiagonal", paramList.get("aggregation: allow empty prolongator columns")); + } else if (defaultList.isParameter("aggregation: allow empty prolongator columns")) { + RAPparams.set("CheckMainDiagonal", defaultList.get("aggregation: allow empty prolongator columns")); + RAPparams.set("RepairMainDiagonal", defaultList.get("aggregation: allow empty prolongator columns")); } + + } catch (Teuchos::Exceptions::InvalidParameterType&) { + TEUCHOS_TEST_FOR_EXCEPTION_PURE_MSG(true, Teuchos::Exceptions::InvalidParameterType, + "Error: parameter \"aggregation: allow empty prolongator columns\" must be of type " << Teuchos::TypeNameTraits::name()); } - // ===================================================================================================== - // ================================= LocalOrdinalTransfer ============================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_LocalOrdinalTransfer(const std::string & VarName, const std::string &multigridAlgo,ParameterList& paramList, const ParameterList& /* defaultList */, - FactoryManager& manager, int levelID, std::vector& /* keeps */) const - { - // NOTE: You would think this would be levelID > 0, but you'd be wrong, since the FactoryManager is basically - // offset by a level from the things which actually do the work. - if (useBlockNumber_ && (levelID > 0)) { - auto RAP = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); - auto RAPs = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); - if (!RAP.is_null() || !RAPs.is_null()) { - RCP fact = rcp(new LocalOrdinalTransferFactory(VarName,multigridAlgo)); - if(multigridAlgo == "classical") - fact->SetFactory("P Graph", manager.GetFactory("P Graph")); - else - fact->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - fact->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); + if (!RAP.is_null()) { + RAP->SetParameterList(RAPparams); + RAP->SetFactory("P", manager.GetFactory("P")); + } else { + RAPs->SetParameterList(RAPparams); + RAPs->SetFactory("P", manager.GetFactory("P")); + } - fact->SetFactory(VarName, this->GetFactoryManager(levelID-1)->GetFactory(VarName)); + if (!this->implicitTranspose_) { + if (!RAP.is_null()) + RAP->SetFactory("R", manager.GetFactory("R")); + else + RAPs->SetFactory("R", manager.GetFactory("R")); + } - manager.SetFactory(VarName, fact); + if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: export visualization data", bool, true)) { + RCP aggExport = rcp(new AggregationExportFactory()); + ParameterList aggExportParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output filename", std::string, aggExportParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: agg style", std::string, aggExportParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: iter", int, aggExportParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: time step", int, aggExportParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: fine graph edges", bool, aggExportParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: coarse graph edges", bool, aggExportParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: build colormap", bool, aggExportParams); + aggExport->SetParameterList(aggExportParams); + aggExport->SetFactory("DofsPerNode", manager.GetFactory("DofsPerNode")); - if (!RAP.is_null()) - RAP->AddTransferFactory(manager.GetFactory(VarName)); - else - RAPs->AddTransferFactory(manager.GetFactory(VarName)); - } - } + if (!RAP.is_null()) + RAP->AddTransferFactory(aggExport); + else + RAPs->AddTransferFactory(aggExport); } + if (!RAP.is_null()) + manager.SetFactory("A", RAP); + else + manager.SetFactory("A", RAPs); + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); + MUELU_SET_VAR_2LIST(paramList, defaultList, "sa: use filtered matrix", bool, useFiltering); + bool filteringChangesMatrix = useFiltering && !MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: drop tol", double, 0); - // ====================================================================================================== - // ====================================== BlockNumber ================================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_BlockNumber(ParameterList& paramList, const ParameterList& defaultList, - FactoryManager& manager, int levelID , std::vector& keeps) const - { - if(useBlockNumber_) { - ParameterList myParams; - RCP fact = rcp(new InitialBlockNumberFactory()); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: block diagonal: interleaved blocksize", int, myParams); - fact->SetParameterList(myParams); - manager.SetFactory("BlockNumber",fact); - } + if (reuseType == "RP" || (reuseType == "tP" && !filteringChangesMatrix)) { + if (!RAP.is_null()) { + keeps.push_back(keep_pair("AP reuse data", RAP.get())); + keeps.push_back(keep_pair("RAP reuse data", RAP.get())); + } else { + keeps.push_back(keep_pair("AP reuse data", RAPs.get())); + keeps.push_back(keep_pair("RAP reuse data", RAPs.get())); + } } +} + +// ===================================================================================================== +// ======================================= Coordinates ================================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Coordinates(ParameterList& paramList, const ParameterList& /* defaultList */, + FactoryManager& manager, int /* levelID */, std::vector& /* keeps */) const { + bool have_userCO = false; + if (paramList.isParameter("Coordinates") && !paramList.get >("Coordinates").is_null()) + have_userCO = true; + + if (useCoordinates_) { + if (have_userCO) { + manager.SetFactory("Coordinates", NoFactory::getRCP()); + } else { + RCP coords = rcp(new CoordinatesTransferFactory()); + coords->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + coords->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); + manager.SetFactory("Coordinates", coords); - // ===================================================================================================== - // =========================================== Restriction ============================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Restriction(ParameterList& paramList, const ParameterList& defaultList , FactoryManager& manager, - int levelID, std::vector& /* keeps */) const - { - MUELU_SET_VAR_2LIST(paramList, defaultList, "multigrid algorithm", std::string, multigridAlgo); - bool have_userR = false; - if (paramList.isParameter("R") && !paramList.get >("R").is_null()) - have_userR = true; - - // === Restriction === - RCP R; - if (!this->implicitTranspose_) { - MUELU_SET_VAR_2LIST(paramList, defaultList, "problem: symmetric", bool, isSymmetric); - - if (isSymmetric == false && (multigridAlgo == "unsmoothed" || multigridAlgo == "emin")) { - this->GetOStream(Warnings0) << - "Switching \"problem: symmetric\" parameter to symmetric as multigrid algorithm. " << - multigridAlgo << " is primarily supposed to be used for symmetric problems.\n\n" << - "Please note: if you are using \"unsmoothed\" transfer operators the \"problem: symmetric\" parameter " << - "has no real mathematical meaning, i.e. you can use it for non-symmetric\n" << - "problems, too. With \"problem: symmetric\"=\"symmetric\" you can use implicit transpose for building " << - "the restriction operators which may drastically reduce the amount of consumed memory." << std::endl; - isSymmetric = true; + auto RAP = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); + if (!RAP.is_null()) { + RAP->AddTransferFactory(manager.GetFactory("Coordinates")); + } else { + auto RAPs = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); + RAPs->AddTransferFactory(manager.GetFactory("Coordinates")); } - TEUCHOS_TEST_FOR_EXCEPTION(multigridAlgo == "pg" && isSymmetric == true, Exceptions::RuntimeError, - "Petrov-Galerkin smoothed transfer operators are only allowed for non-symmetric problems: Set \"problem: symmetric\" to false!\n" \ - "While PG smoothed transfer operators generally would also work for symmetric problems this is an unusual use case. " \ - "You can use the factory-based xml interface though if you need PG-AMG for symmetric problems."); + } + } +} + +// ===================================================================================================== +// ================================= LocalOrdinalTransfer ============================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_LocalOrdinalTransfer(const std::string& VarName, const std::string& multigridAlgo, ParameterList& paramList, const ParameterList& /* defaultList */, + FactoryManager& manager, int levelID, std::vector& /* keeps */) const { + // NOTE: You would think this would be levelID > 0, but you'd be wrong, since the FactoryManager is basically + // offset by a level from the things which actually do the work. + if (useBlockNumber_ && (levelID > 0)) { + auto RAP = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); + auto RAPs = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); + if (!RAP.is_null() || !RAPs.is_null()) { + RCP fact = rcp(new LocalOrdinalTransferFactory(VarName, multigridAlgo)); + if (multigridAlgo == "classical") + fact->SetFactory("P Graph", manager.GetFactory("P Graph")); + else + fact->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + fact->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); - if (have_userR) { - manager.SetFactory("R", NoFactory::getRCP()); - } else { - if (isSymmetric) R = rcp(new TransPFactory()); - else R = rcp(new GenericRFactory()); + fact->SetFactory(VarName, this->GetFactoryManager(levelID - 1)->GetFactory(VarName)); - R->SetFactory("P", manager.GetFactory("P")); - manager.SetFactory("R", R); - } + manager.SetFactory(VarName, fact); - } else { - manager.SetFactory("R", Teuchos::null); + if (!RAP.is_null()) + RAP->AddTransferFactory(manager.GetFactory(VarName)); + else + RAPs->AddTransferFactory(manager.GetFactory(VarName)); } - - // === Restriction: Nullspace Scaling === - if (paramList.isParameter("restriction: scale nullspace") && paramList.get("restriction: scale nullspace")) { - RCP tentPFactory = rcp(new TentativePFactory()); - Teuchos::ParameterList tentPlist; - tentPlist.set("Nullspace name","Scaled Nullspace"); - tentPFactory->SetParameterList(tentPlist); - tentPFactory->SetFactory("Aggregates",manager.GetFactory("Aggregates")); - tentPFactory->SetFactory("CoarseMap",manager.GetFactory("CoarseMap")); - - if(R.is_null()) R = rcp(new TransPFactory()); - R->SetFactory("P",tentPFactory); + } +} + +// ====================================================================================================== +// ====================================== BlockNumber ================================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_BlockNumber(ParameterList& paramList, const ParameterList& defaultList, + FactoryManager& manager, int levelID, std::vector& keeps) const { + if (useBlockNumber_) { + ParameterList myParams; + RCP fact = rcp(new InitialBlockNumberFactory()); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: block diagonal: interleaved blocksize", int, myParams); + fact->SetParameterList(myParams); + manager.SetFactory("BlockNumber", fact); + } +} + +// ===================================================================================================== +// =========================================== Restriction ============================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Restriction(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& /* keeps */) const { + MUELU_SET_VAR_2LIST(paramList, defaultList, "multigrid algorithm", std::string, multigridAlgo); + bool have_userR = false; + if (paramList.isParameter("R") && !paramList.get >("R").is_null()) + have_userR = true; + + // === Restriction === + RCP R; + if (!this->implicitTranspose_) { + MUELU_SET_VAR_2LIST(paramList, defaultList, "problem: symmetric", bool, isSymmetric); + + if (isSymmetric == false && (multigridAlgo == "unsmoothed" || multigridAlgo == "emin")) { + this->GetOStream(Warnings0) << "Switching \"problem: symmetric\" parameter to symmetric as multigrid algorithm. " << multigridAlgo << " is primarily supposed to be used for symmetric problems.\n\n" + << "Please note: if you are using \"unsmoothed\" transfer operators the \"problem: symmetric\" parameter " + << "has no real mathematical meaning, i.e. you can use it for non-symmetric\n" + << "problems, too. With \"problem: symmetric\"=\"symmetric\" you can use implicit transpose for building " + << "the restriction operators which may drastically reduce the amount of consumed memory." << std::endl; + isSymmetric = true; } + TEUCHOS_TEST_FOR_EXCEPTION(multigridAlgo == "pg" && isSymmetric == true, Exceptions::RuntimeError, + "Petrov-Galerkin smoothed transfer operators are only allowed for non-symmetric problems: Set \"problem: symmetric\" to false!\n" + "While PG smoothed transfer operators generally would also work for symmetric problems this is an unusual use case. " + "You can use the factory-based xml interface though if you need PG-AMG for symmetric problems."); + if (have_userR) { + manager.SetFactory("R", NoFactory::getRCP()); + } else { + if (isSymmetric) + R = rcp(new TransPFactory()); + else + R = rcp(new GenericRFactory()); - } - - // ===================================================================================================== - // ========================================= Repartition =============================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Repartition(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps, RCP & nullSpaceFactory) const - { - // === Repartitioning === - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: enable", bool, enableRepart); - if (enableRepart) { -#if defined(HAVE_MPI) && (defined(HAVE_MUELU_ZOLTAN) || defined(HAVE_MUELU_ZOLTAN2)) // skip to the end, print warning, and turn off repartitioning if we don't have MPI and Zoltan/Zoltan2 - MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: use subcommunicators in place", bool, enableInPlace); - // Short summary of the issue: RebalanceTransferFactory shares ownership - // of "P" with SaPFactory, and therefore, changes the stored version. - // That means that if SaPFactory generated P, and stored it on the level, - // then after rebalancing the value in that storage changed. It goes - // against the concept of factories (I think), that every factory is - // responsible for its own objects, and they are immutable outside. - // - // In reuse, this is what happens: as we reuse Importer across setups, - // the order of factories changes, and coupled with shared ownership - // leads to problems. - // *First setup* - // SaP builds P [and stores it] - // TransP builds R [and stores it] - // RAP builds A [and stores it] - // RebalanceTransfer rebalances P [and changes the P stored by SaP] (*) - // RebalanceTransfer rebalances R - // RebalanceAc rebalances A - // *Second setup* ("RP" reuse) - // RebalanceTransfer rebalances P [which is incorrect due to (*)] - // RebalanceTransfer rebalances R - // RAP builds A [which is incorrect due to (*)] - // RebalanceAc rebalances A [which throws due to map inconsistency] - // ... - // *Second setup* ("tP" reuse) - // SaP builds P [and stores it] - // RebalanceTransfer rebalances P [and changes the P stored by SaP] (**) - // TransP builds R [which is incorrect due to (**)] - // RebalanceTransfer rebalances R - // ... - // - // Couple solutions to this: - // 1. [implemented] Requre "tP" and "PR" reuse to only be used with - // implicit rebalancing. - // 2. Do deep copy of P, and changed domain map and importer there. - // Need to investigate how expensive this is. - TEUCHOS_TEST_FOR_EXCEPTION(this->doPRrebalance_ && (reuseType == "tP" || reuseType == "RP"), Exceptions::InvalidArgument, - "Reuse types \"tP\" and \"PR\" require \"repartition: rebalance P and R\" set to \"false\""); - - // TEUCHOS_TEST_FOR_EXCEPTION(aggType == "brick", Exceptions::InvalidArgument, - // "Aggregation type \"brick\" requires \"repartition: enable\" set to \"false\""); - - MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: partitioner", std::string, partName); - TEUCHOS_TEST_FOR_EXCEPTION(partName != "zoltan" && partName != "zoltan2", Exceptions::InvalidArgument, - "Invalid partitioner name: \"" << partName << "\". Valid options: \"zoltan\", \"zoltan2\""); - -# ifndef HAVE_MUELU_ZOLTAN - bool switched = false; - if (partName == "zoltan") { - this->GetOStream(Warnings0) << "Zoltan interface is not available, trying to switch to Zoltan2" << std::endl; - partName = "zoltan2"; - switched = true; - } -# else -# ifndef HAVE_MUELU_ZOLTAN2 - bool switched = false; -# endif // HAVE_MUELU_ZOLTAN2 -# endif // HAVE_MUELU_ZOLTAN - -# ifndef HAVE_MUELU_ZOLTAN2 - if (partName == "zoltan2" && !switched) { - this->GetOStream(Warnings0) << "Zoltan2 interface is not available, trying to switch to Zoltan" << std::endl; - partName = "zoltan"; - } -# endif // HAVE_MUELU_ZOLTAN2 - - MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: node repartition level",int,nodeRepartitionLevel); - - // RepartitionHeuristic - auto repartheurFactory = rcp(new RepartitionHeuristicFactory()); - ParameterList repartheurParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: node repartition level", int, repartheurParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: start level", int, repartheurParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: min rows per proc", int, repartheurParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: target rows per proc", int, repartheurParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: min rows per thread", int, repartheurParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: target rows per thread", int, repartheurParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: max imbalance", double, repartheurParams); - repartheurFactory->SetParameterList(repartheurParams); - repartheurFactory->SetFactory("A", manager.GetFactory("A")); - manager.SetFactory("number of partitions", repartheurFactory); - manager.SetFactory("repartition: heuristic target rows per process", repartheurFactory); - - // Partitioner - RCP partitioner; - if (levelID == nodeRepartitionLevel) { - // partitioner = rcp(new NodePartitionInterface()); - partitioner = rcp(new MueLu::NodePartitionInterface()); - ParameterList partParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: node id" ,int,repartheurParams); - partitioner->SetParameterList(partParams); - partitioner->SetFactory("Node Comm", manager.GetFactory("Node Comm")); - } - else if (partName == "zoltan") { -# ifdef HAVE_MUELU_ZOLTAN - partitioner = rcp(new ZoltanInterface()); - // NOTE: ZoltanInterface ("zoltan") does not support external parameters through ParameterList -# else - throw Exceptions::RuntimeError("Zoltan interface is not available"); -# endif // HAVE_MUELU_ZOLTAN - } else if (partName == "zoltan2") { -# ifdef HAVE_MUELU_ZOLTAN2 - partitioner = rcp(new Zoltan2Interface()); - ParameterList partParams; - RCP partpartParams = rcp(new ParameterList(paramList.sublist("repartition: params", false))); - partParams.set("ParameterList", partpartParams); - partitioner->SetParameterList(partParams); - partitioner->SetFactory("repartition: heuristic target rows per process", - manager.GetFactory("repartition: heuristic target rows per process")); -# else - throw Exceptions::RuntimeError("Zoltan2 interface is not available"); -# endif // HAVE_MUELU_ZOLTAN2 - } + R->SetFactory("P", manager.GetFactory("P")); + manager.SetFactory("R", R); + } - partitioner->SetFactory("A", manager.GetFactory("A")); - partitioner->SetFactory("number of partitions", manager.GetFactory("number of partitions")); - if (useCoordinates_) - partitioner->SetFactory("Coordinates", manager.GetFactory("Coordinates")); - manager.SetFactory("Partition", partitioner); - - // Repartitioner - auto repartFactory = rcp(new RepartitionFactory()); - ParameterList repartParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: print partition distribution", bool, repartParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: remap parts", bool, repartParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: remap num values", int, repartParams); - repartFactory->SetParameterList(repartParams); - repartFactory->SetFactory("A", manager.GetFactory("A")); - repartFactory->SetFactory("number of partitions", manager.GetFactory("number of partitions")); - repartFactory->SetFactory("Partition", manager.GetFactory("Partition")); - manager.SetFactory("Importer", repartFactory); - if (reuseType != "none" && reuseType != "S" && levelID) - keeps.push_back(keep_pair("Importer", manager.GetFactory("Importer").get())); - - - if(enableInPlace) { - // Rebalanced A (in place) - // NOTE: This is for when we want to constrain repartitioning to match some other idea of what's going on. - // The major application is the (1,1) hierarchy in the Maxwell1 preconditioner. - auto newA = rcp(new RebalanceAcFactory()); - ParameterList rebAcParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, rebAcParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators in place", bool, rebAcParams); - newA->SetParameterList(rebAcParams); - newA->SetFactory("A", manager.GetFactory("A")); - newA->SetFactory("InPlaceMap", manager.GetFactory("InPlaceMap")); - manager.SetFactory("A",newA); - } - else { - // Rebalanced A - auto newA = rcp(new RebalanceAcFactory()); - ParameterList rebAcParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, rebAcParams); - newA->SetParameterList(rebAcParams); - newA->SetFactory("A", manager.GetFactory("A")); - newA->SetFactory("Importer", manager.GetFactory("Importer")); - manager.SetFactory("A", newA); - - // Rebalanced P - auto newP = rcp(new RebalanceTransferFactory()); - ParameterList newPparams; - newPparams.set("type", "Interpolation"); - if (changedPRrebalance_) - newPparams.set("repartition: rebalance P and R", this->doPRrebalance_); - if (changedPRViaCopyrebalance_) - newPparams.set("repartition: explicit via new copy rebalance P and R",true); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, newPparams); - newP-> SetParameterList(newPparams); - newP-> SetFactory("Importer", manager.GetFactory("Importer")); - newP-> SetFactory("P", manager.GetFactory("P")); - if (!paramList.isParameter("semicoarsen: number of levels")) - newP->SetFactory("Nullspace", manager.GetFactory("Ptent")); - else - newP->SetFactory("Nullspace", manager.GetFactory("P")); // TogglePFactory - if (useCoordinates_) - newP-> SetFactory("Coordinates", manager.GetFactory("Coordinates")); - manager.SetFactory("P", newP); - if (useCoordinates_) - manager.SetFactory("Coordinates", newP); - if (useBlockNumber_ && (levelID > 0)) { - newP->SetFactory("BlockNumber", manager.GetFactory("BlockNumber")); - manager.SetFactory("BlockNumber", newP); - } - - // Rebalanced R - auto newR = rcp(new RebalanceTransferFactory()); - ParameterList newRparams; - newRparams.set("type", "Restriction"); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, newRparams); - if (changedPRrebalance_) - newRparams.set("repartition: rebalance P and R", this->doPRrebalance_); - if (changedPRViaCopyrebalance_) - newPparams.set("repartition: explicit via new copy rebalance P and R",true); - if (changedImplicitTranspose_) - newRparams.set("transpose: use implicit", this->implicitTranspose_); - newR-> SetParameterList(newRparams); - newR-> SetFactory("Importer", manager.GetFactory("Importer")); - if (!this->implicitTranspose_) { - newR->SetFactory("R", manager.GetFactory("R")); - manager.SetFactory("R", newR); - } + } else { + manager.SetFactory("R", Teuchos::null); + } - // NOTE: the role of NullspaceFactory is to provide nullspace on the finest - // level if a user does not do that. For all other levels it simply passes - // nullspace from a real factory to whoever needs it. If we don't use - // repartitioning, that factory is "TentativePFactory"; if we do, it is - // "RebalanceTransferFactory". But we still have to have NullspaceFactory as - // the "Nullspace" of the manager - // NOTE: This really needs to be set on the *NullSpaceFactory*, not manager.get("Nullspace"). - ParameterList newNullparams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "nullspace: calculate rotations", bool, newNullparams); - nullSpaceFactory->SetFactory("Nullspace", newP); - nullSpaceFactory->SetParameterList(newNullparams); - } + // === Restriction: Nullspace Scaling === + if (paramList.isParameter("restriction: scale nullspace") && paramList.get("restriction: scale nullspace")) { + RCP tentPFactory = rcp(new TentativePFactory()); + Teuchos::ParameterList tentPlist; + tentPlist.set("Nullspace name", "Scaled Nullspace"); + tentPFactory->SetParameterList(tentPlist); + tentPFactory->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + tentPFactory->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); + + if (R.is_null()) R = rcp(new TransPFactory()); + R->SetFactory("P", tentPFactory); + } +} + +// ===================================================================================================== +// ========================================= Repartition =============================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Repartition(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps, RCP& nullSpaceFactory) const { + // === Repartitioning === + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); + MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: enable", bool, enableRepart); + if (enableRepart) { +#if defined(HAVE_MPI) && (defined(HAVE_MUELU_ZOLTAN) || defined(HAVE_MUELU_ZOLTAN2)) // skip to the end, print warning, and turn off repartitioning if we don't have MPI and Zoltan/Zoltan2 + MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: use subcommunicators in place", bool, enableInPlace); + // Short summary of the issue: RebalanceTransferFactory shares ownership + // of "P" with SaPFactory, and therefore, changes the stored version. + // That means that if SaPFactory generated P, and stored it on the level, + // then after rebalancing the value in that storage changed. It goes + // against the concept of factories (I think), that every factory is + // responsible for its own objects, and they are immutable outside. + // + // In reuse, this is what happens: as we reuse Importer across setups, + // the order of factories changes, and coupled with shared ownership + // leads to problems. + // *First setup* + // SaP builds P [and stores it] + // TransP builds R [and stores it] + // RAP builds A [and stores it] + // RebalanceTransfer rebalances P [and changes the P stored by SaP] (*) + // RebalanceTransfer rebalances R + // RebalanceAc rebalances A + // *Second setup* ("RP" reuse) + // RebalanceTransfer rebalances P [which is incorrect due to (*)] + // RebalanceTransfer rebalances R + // RAP builds A [which is incorrect due to (*)] + // RebalanceAc rebalances A [which throws due to map inconsistency] + // ... + // *Second setup* ("tP" reuse) + // SaP builds P [and stores it] + // RebalanceTransfer rebalances P [and changes the P stored by SaP] (**) + // TransP builds R [which is incorrect due to (**)] + // RebalanceTransfer rebalances R + // ... + // + // Couple solutions to this: + // 1. [implemented] Requre "tP" and "PR" reuse to only be used with + // implicit rebalancing. + // 2. Do deep copy of P, and changed domain map and importer there. + // Need to investigate how expensive this is. + TEUCHOS_TEST_FOR_EXCEPTION(this->doPRrebalance_ && (reuseType == "tP" || reuseType == "RP"), Exceptions::InvalidArgument, + "Reuse types \"tP\" and \"PR\" require \"repartition: rebalance P and R\" set to \"false\""); + + // TEUCHOS_TEST_FOR_EXCEPTION(aggType == "brick", Exceptions::InvalidArgument, + // "Aggregation type \"brick\" requires \"repartition: enable\" set to \"false\""); + + MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: partitioner", std::string, partName); + TEUCHOS_TEST_FOR_EXCEPTION(partName != "zoltan" && partName != "zoltan2", Exceptions::InvalidArgument, + "Invalid partitioner name: \"" << partName << "\". Valid options: \"zoltan\", \"zoltan2\""); + +#ifndef HAVE_MUELU_ZOLTAN + bool switched = false; + if (partName == "zoltan") { + this->GetOStream(Warnings0) << "Zoltan interface is not available, trying to switch to Zoltan2" << std::endl; + partName = "zoltan2"; + switched = true; + } #else - paramList.set("repartition: enable",false); -# ifndef HAVE_MPI - this->GetOStream(Warnings0) << "No repartitioning available for a serial run\n"; -# else - this->GetOStream(Warnings0) << "Zoltan/Zoltan2 are unavailable for repartitioning\n"; -# endif // HAVE_MPI -#endif // defined(HAVE_MPI) && (defined(HAVE_MUELU_ZOLTAN) || defined(HAVE_MUELU_ZOLTAN2)) +#ifndef HAVE_MUELU_ZOLTAN2 + bool switched = false; +#endif // HAVE_MUELU_ZOLTAN2 +#endif // HAVE_MUELU_ZOLTAN + +#ifndef HAVE_MUELU_ZOLTAN2 + if (partName == "zoltan2" && !switched) { + this->GetOStream(Warnings0) << "Zoltan2 interface is not available, trying to switch to Zoltan" << std::endl; + partName = "zoltan"; + } +#endif // HAVE_MUELU_ZOLTAN2 + + MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: node repartition level", int, nodeRepartitionLevel); + + // RepartitionHeuristic + auto repartheurFactory = rcp(new RepartitionHeuristicFactory()); + ParameterList repartheurParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: node repartition level", int, repartheurParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: start level", int, repartheurParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: min rows per proc", int, repartheurParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: target rows per proc", int, repartheurParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: min rows per thread", int, repartheurParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: target rows per thread", int, repartheurParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: max imbalance", double, repartheurParams); + repartheurFactory->SetParameterList(repartheurParams); + repartheurFactory->SetFactory("A", manager.GetFactory("A")); + manager.SetFactory("number of partitions", repartheurFactory); + manager.SetFactory("repartition: heuristic target rows per process", repartheurFactory); + + // Partitioner + RCP partitioner; + if (levelID == nodeRepartitionLevel) { + // partitioner = rcp(new NodePartitionInterface()); + partitioner = rcp(new MueLu::NodePartitionInterface()); + ParameterList partParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: node id", int, repartheurParams); + partitioner->SetParameterList(partParams); + partitioner->SetFactory("Node Comm", manager.GetFactory("Node Comm")); + } else if (partName == "zoltan") { +#ifdef HAVE_MUELU_ZOLTAN + partitioner = rcp(new ZoltanInterface()); + // NOTE: ZoltanInterface ("zoltan") does not support external parameters through ParameterList +#else + throw Exceptions::RuntimeError("Zoltan interface is not available"); +#endif // HAVE_MUELU_ZOLTAN + } else if (partName == "zoltan2") { +#ifdef HAVE_MUELU_ZOLTAN2 + partitioner = rcp(new Zoltan2Interface()); + ParameterList partParams; + RCP partpartParams = rcp(new ParameterList(paramList.sublist("repartition: params", false))); + partParams.set("ParameterList", partpartParams); + partitioner->SetParameterList(partParams); + partitioner->SetFactory("repartition: heuristic target rows per process", + manager.GetFactory("repartition: heuristic target rows per process")); +#else + throw Exceptions::RuntimeError("Zoltan2 interface is not available"); +#endif // HAVE_MUELU_ZOLTAN2 } - } - - // ===================================================================================================== - // ========================================= Low precision transfers =================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_LowPrecision(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const - { - MUELU_SET_VAR_2LIST(paramList, defaultList, "transfers: half precision", bool, enableLowPrecision); - if (enableLowPrecision) { - // Low precision P - auto newP = rcp(new LowPrecisionFactory()); + partitioner->SetFactory("A", manager.GetFactory("A")); + partitioner->SetFactory("number of partitions", manager.GetFactory("number of partitions")); + if (useCoordinates_) + partitioner->SetFactory("Coordinates", manager.GetFactory("Coordinates")); + manager.SetFactory("Partition", partitioner); + + // Repartitioner + auto repartFactory = rcp(new RepartitionFactory()); + ParameterList repartParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: print partition distribution", bool, repartParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: remap parts", bool, repartParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: remap num values", int, repartParams); + repartFactory->SetParameterList(repartParams); + repartFactory->SetFactory("A", manager.GetFactory("A")); + repartFactory->SetFactory("number of partitions", manager.GetFactory("number of partitions")); + repartFactory->SetFactory("Partition", manager.GetFactory("Partition")); + manager.SetFactory("Importer", repartFactory); + if (reuseType != "none" && reuseType != "S" && levelID) + keeps.push_back(keep_pair("Importer", manager.GetFactory("Importer").get())); + + if (enableInPlace) { + // Rebalanced A (in place) + // NOTE: This is for when we want to constrain repartitioning to match some other idea of what's going on. + // The major application is the (1,1) hierarchy in the Maxwell1 preconditioner. + auto newA = rcp(new RebalanceAcFactory()); + ParameterList rebAcParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, rebAcParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators in place", bool, rebAcParams); + newA->SetParameterList(rebAcParams); + newA->SetFactory("A", manager.GetFactory("A")); + newA->SetFactory("InPlaceMap", manager.GetFactory("InPlaceMap")); + manager.SetFactory("A", newA); + } else { + // Rebalanced A + auto newA = rcp(new RebalanceAcFactory()); + ParameterList rebAcParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, rebAcParams); + newA->SetParameterList(rebAcParams); + newA->SetFactory("A", manager.GetFactory("A")); + newA->SetFactory("Importer", manager.GetFactory("Importer")); + manager.SetFactory("A", newA); + + // Rebalanced P + auto newP = rcp(new RebalanceTransferFactory()); ParameterList newPparams; - newPparams.set("matrix key", "P"); - newP-> SetParameterList(newPparams); - newP-> SetFactory("P", manager.GetFactory("P")); + newPparams.set("type", "Interpolation"); + if (changedPRrebalance_) + newPparams.set("repartition: rebalance P and R", this->doPRrebalance_); + if (changedPRViaCopyrebalance_) + newPparams.set("repartition: explicit via new copy rebalance P and R", true); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, newPparams); + newP->SetParameterList(newPparams); + newP->SetFactory("Importer", manager.GetFactory("Importer")); + newP->SetFactory("P", manager.GetFactory("P")); + if (!paramList.isParameter("semicoarsen: number of levels")) + newP->SetFactory("Nullspace", manager.GetFactory("Ptent")); + else + newP->SetFactory("Nullspace", manager.GetFactory("P")); // TogglePFactory + if (useCoordinates_) + newP->SetFactory("Coordinates", manager.GetFactory("Coordinates")); manager.SetFactory("P", newP); + if (useCoordinates_) + manager.SetFactory("Coordinates", newP); + if (useBlockNumber_ && (levelID > 0)) { + newP->SetFactory("BlockNumber", manager.GetFactory("BlockNumber")); + manager.SetFactory("BlockNumber", newP); + } + // Rebalanced R + auto newR = rcp(new RebalanceTransferFactory()); + ParameterList newRparams; + newRparams.set("type", "Restriction"); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, newRparams); + if (changedPRrebalance_) + newRparams.set("repartition: rebalance P and R", this->doPRrebalance_); + if (changedPRViaCopyrebalance_) + newPparams.set("repartition: explicit via new copy rebalance P and R", true); + if (changedImplicitTranspose_) + newRparams.set("transpose: use implicit", this->implicitTranspose_); + newR->SetParameterList(newRparams); + newR->SetFactory("Importer", manager.GetFactory("Importer")); if (!this->implicitTranspose_) { - // Low precision R - auto newR = rcp(new LowPrecisionFactory()); - ParameterList newRparams; - newRparams.set("matrix key", "R"); - newR-> SetParameterList(newRparams); - newR-> SetFactory("R", manager.GetFactory("R")); + newR->SetFactory("R", manager.GetFactory("R")); manager.SetFactory("R", newR); } - } - } - - // ===================================================================================================== - // =========================================== Nullspace =============================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Nullspace(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int /* levelID */, std::vector& /* keeps */, RCP & nullSpaceFactory) const - { - // Nullspace - MUELU_KOKKOS_FACTORY(nullSpace, NullspaceFactory, NullspaceFactory_kokkos); - - bool have_userNS = false; - if (paramList.isParameter("Nullspace") && !paramList.get >("Nullspace").is_null()) - have_userNS = true; - if (!have_userNS) { + // NOTE: the role of NullspaceFactory is to provide nullspace on the finest + // level if a user does not do that. For all other levels it simply passes + // nullspace from a real factory to whoever needs it. If we don't use + // repartitioning, that factory is "TentativePFactory"; if we do, it is + // "RebalanceTransferFactory". But we still have to have NullspaceFactory as + // the "Nullspace" of the manager + // NOTE: This really needs to be set on the *NullSpaceFactory*, not manager.get("Nullspace"). ParameterList newNullparams; MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "nullspace: calculate rotations", bool, newNullparams); - nullSpace->SetParameterList(newNullparams); - nullSpace->SetFactory("Nullspace", manager.GetFactory("Ptent")); - manager.SetFactory("Nullspace", nullSpace); + nullSpaceFactory->SetFactory("Nullspace", newP); + nullSpaceFactory->SetParameterList(newNullparams); } - nullSpaceFactory = nullSpace; - - if (paramList.isParameter("restriction: scale nullspace") && paramList.get("restriction: scale nullspace")) { - RCP scaledNSfactory = rcp(new ScaledNullspaceFactory()); - scaledNSfactory->SetFactory("Nullspace",nullSpaceFactory); - manager.SetFactory("Scaled Nullspace",scaledNSfactory); - } - +#else + paramList.set("repartition: enable", false); +#ifndef HAVE_MPI + this->GetOStream(Warnings0) << "No repartitioning available for a serial run\n"; +#else + this->GetOStream(Warnings0) << "Zoltan/Zoltan2 are unavailable for repartitioning\n"; +#endif // HAVE_MPI +#endif // defined(HAVE_MPI) && (defined(HAVE_MUELU_ZOLTAN) || defined(HAVE_MUELU_ZOLTAN2)) } +} + +// ===================================================================================================== +// ========================================= Low precision transfers =================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_LowPrecision(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const { + MUELU_SET_VAR_2LIST(paramList, defaultList, "transfers: half precision", bool, enableLowPrecision); + + if (enableLowPrecision) { + // Low precision P + auto newP = rcp(new LowPrecisionFactory()); + ParameterList newPparams; + newPparams.set("matrix key", "P"); + newP->SetParameterList(newPparams); + newP->SetFactory("P", manager.GetFactory("P")); + manager.SetFactory("P", newP); - // ===================================================================================================== - // ================================= Algorithm: SemiCoarsening ========================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_SemiCoarsen(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int /* levelID */, std::vector& /* keeps */) const - { - // === Semi-coarsening === - RCP semicoarsenFactory = Teuchos::null; - if (paramList.isParameter("semicoarsen: number of levels") && - paramList.get("semicoarsen: number of levels") > 0) { - - ParameterList togglePParams; - ParameterList semicoarsenPParams; - ParameterList linedetectionParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: number of levels", int, togglePParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: coarsen rate", int, semicoarsenPParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: piecewise constant", bool, semicoarsenPParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: piecewise linear", bool, semicoarsenPParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: calculate nonsym restriction", bool, semicoarsenPParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "linedetection: orientation", std::string, linedetectionParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "linedetection: num layers", int, linedetectionParams); - - MUELU_KOKKOS_FACTORY_NO_DECL(semicoarsenFactory, SemiCoarsenPFactory, SemiCoarsenPFactory_kokkos); - RCP linedetectionFactory = rcp(new LineDetectionFactory()); - RCP togglePFactory = rcp(new TogglePFactory()); - - linedetectionFactory->SetParameterList(linedetectionParams); - semicoarsenFactory ->SetParameterList(semicoarsenPParams); - togglePFactory ->SetParameterList(togglePParams); - - togglePFactory->AddCoarseNullspaceFactory (semicoarsenFactory); - togglePFactory->AddProlongatorFactory (semicoarsenFactory); - togglePFactory->AddPtentFactory (semicoarsenFactory); - togglePFactory->AddCoarseNullspaceFactory (manager.GetFactory("Ptent")); - togglePFactory->AddProlongatorFactory (manager.GetFactory("P")); - togglePFactory->AddPtentFactory (manager.GetFactory("Ptent")); - - manager.SetFactory("CoarseNumZLayers", linedetectionFactory); - manager.SetFactory("LineDetection_Layers", linedetectionFactory); - manager.SetFactory("LineDetection_VertLineIds", linedetectionFactory); - - manager.SetFactory("P", togglePFactory); - manager.SetFactory("Ptent", togglePFactory); - manager.SetFactory("Nullspace", togglePFactory); + if (!this->implicitTranspose_) { + // Low precision R + auto newR = rcp(new LowPrecisionFactory()); + ParameterList newRparams; + newRparams.set("matrix key", "R"); + newR->SetParameterList(newRparams); + newR->SetFactory("R", manager.GetFactory("R")); + manager.SetFactory("R", newR); } + } +} + +// ===================================================================================================== +// =========================================== Nullspace =============================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Nullspace(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int /* levelID */, std::vector& /* keeps */, RCP& nullSpaceFactory) const { + // Nullspace + MUELU_KOKKOS_FACTORY(nullSpace, NullspaceFactory, NullspaceFactory_kokkos); + + bool have_userNS = false; + if (paramList.isParameter("Nullspace") && !paramList.get >("Nullspace").is_null()) + have_userNS = true; + + if (!have_userNS) { + ParameterList newNullparams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "nullspace: calculate rotations", bool, newNullparams); + nullSpace->SetParameterList(newNullparams); + nullSpace->SetFactory("Nullspace", manager.GetFactory("Ptent")); + manager.SetFactory("Nullspace", nullSpace); + } + nullSpaceFactory = nullSpace; - if (paramList.isParameter("semicoarsen: number of levels")) { - auto tf = rcp(new ToggleCoordinatesTransferFactory()); - tf->SetFactory("Chosen P", manager.GetFactory("P")); - tf->AddCoordTransferFactory(semicoarsenFactory); - - RCP coords = rcp(new CoordinatesTransferFactory()); - coords->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - coords->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); - tf->AddCoordTransferFactory(coords); - manager.SetFactory("Coordinates", tf); - } + if (paramList.isParameter("restriction: scale nullspace") && paramList.get("restriction: scale nullspace")) { + RCP scaledNSfactory = rcp(new ScaledNullspaceFactory()); + scaledNSfactory->SetFactory("Nullspace", nullSpaceFactory); + manager.SetFactory("Scaled Nullspace", scaledNSfactory); + } +} + +// ===================================================================================================== +// ================================= Algorithm: SemiCoarsening ========================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_SemiCoarsen(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int /* levelID */, std::vector& /* keeps */) const { + // === Semi-coarsening === + RCP semicoarsenFactory = Teuchos::null; + if (paramList.isParameter("semicoarsen: number of levels") && + paramList.get("semicoarsen: number of levels") > 0) { + ParameterList togglePParams; + ParameterList semicoarsenPParams; + ParameterList linedetectionParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: number of levels", int, togglePParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: coarsen rate", int, semicoarsenPParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: piecewise constant", bool, semicoarsenPParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: piecewise linear", bool, semicoarsenPParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: calculate nonsym restriction", bool, semicoarsenPParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "linedetection: orientation", std::string, linedetectionParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "linedetection: num layers", int, linedetectionParams); + + MUELU_KOKKOS_FACTORY_NO_DECL(semicoarsenFactory, SemiCoarsenPFactory, SemiCoarsenPFactory_kokkos); + RCP linedetectionFactory = rcp(new LineDetectionFactory()); + RCP togglePFactory = rcp(new TogglePFactory()); + + linedetectionFactory->SetParameterList(linedetectionParams); + semicoarsenFactory->SetParameterList(semicoarsenPParams); + togglePFactory->SetParameterList(togglePParams); + + togglePFactory->AddCoarseNullspaceFactory(semicoarsenFactory); + togglePFactory->AddProlongatorFactory(semicoarsenFactory); + togglePFactory->AddPtentFactory(semicoarsenFactory); + togglePFactory->AddCoarseNullspaceFactory(manager.GetFactory("Ptent")); + togglePFactory->AddProlongatorFactory(manager.GetFactory("P")); + togglePFactory->AddPtentFactory(manager.GetFactory("Ptent")); + + manager.SetFactory("CoarseNumZLayers", linedetectionFactory); + manager.SetFactory("LineDetection_Layers", linedetectionFactory); + manager.SetFactory("LineDetection_VertLineIds", linedetectionFactory); + + manager.SetFactory("P", togglePFactory); + manager.SetFactory("Ptent", togglePFactory); + manager.SetFactory("Nullspace", togglePFactory); } + if (paramList.isParameter("semicoarsen: number of levels")) { + auto tf = rcp(new ToggleCoordinatesTransferFactory()); + tf->SetFactory("Chosen P", manager.GetFactory("P")); + tf->AddCoordTransferFactory(semicoarsenFactory); - // ===================================================================================================== - // ================================== Algorithm: P-Coarsening ========================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_PCoarsen(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const - { + RCP coords = rcp(new CoordinatesTransferFactory()); + coords->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + coords->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); + tf->AddCoordTransferFactory(coords); + manager.SetFactory("Coordinates", tf); + } +} + +// ===================================================================================================== +// ================================== Algorithm: P-Coarsening ========================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_PCoarsen(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const { #ifdef HAVE_MUELU_INTREPID2 - // This only makes sense to invoke from the default list. - if (defaultList.isParameter("pcoarsen: schedule") && defaultList.isParameter("pcoarsen: element")) { - // P-Coarsening by schedule (new interface) - // NOTE: levelID represents the *coarse* level in this case - auto pcoarsen_schedule = Teuchos::getArrayFromStringParameter(defaultList,"pcoarsen: schedule"); - auto pcoarsen_element = defaultList.get("pcoarsen: element"); - - if (levelID >= (int)pcoarsen_schedule.size()) { - // Past the p-coarsening levels, we do Smoothed Aggregation - // NOTE: We should probably consider allowing other options past p-coarsening - UpdateFactoryManager_SA(paramList, defaultList, manager, levelID, keeps); - - } else { - // P-Coarsening - ParameterList Pparams; - auto P = rcp(new IntrepidPCoarsenFactory()); - std::string lo = pcoarsen_element + std::to_string(pcoarsen_schedule[levelID]); - std::string hi = (levelID ? pcoarsen_element + std::to_string(pcoarsen_schedule[levelID-1]) : lo); - Pparams.set("pcoarsen: hi basis", hi); - Pparams.set("pcoarsen: lo basis", lo); - P->SetParameterList(Pparams); - manager.SetFactory("P", P); - - // Add special nullspace handling - rcp_dynamic_cast(manager.GetFactoryNonConst("Nullspace"))->SetFactory("Nullspace", manager.GetFactory("P")); - } + // This only makes sense to invoke from the default list. + if (defaultList.isParameter("pcoarsen: schedule") && defaultList.isParameter("pcoarsen: element")) { + // P-Coarsening by schedule (new interface) + // NOTE: levelID represents the *coarse* level in this case + auto pcoarsen_schedule = Teuchos::getArrayFromStringParameter(defaultList, "pcoarsen: schedule"); + auto pcoarsen_element = defaultList.get("pcoarsen: element"); + + if (levelID >= (int)pcoarsen_schedule.size()) { + // Past the p-coarsening levels, we do Smoothed Aggregation + // NOTE: We should probably consider allowing other options past p-coarsening + UpdateFactoryManager_SA(paramList, defaultList, manager, levelID, keeps); } else { - // P-Coarsening by manual specification (old interface) + // P-Coarsening ParameterList Pparams; - auto P = rcp(new IntrepidPCoarsenFactory()); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "pcoarsen: hi basis", std::string, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "pcoarsen: lo basis", std::string, Pparams); + auto P = rcp(new IntrepidPCoarsenFactory()); + std::string lo = pcoarsen_element + std::to_string(pcoarsen_schedule[levelID]); + std::string hi = (levelID ? pcoarsen_element + std::to_string(pcoarsen_schedule[levelID - 1]) : lo); + Pparams.set("pcoarsen: hi basis", hi); + Pparams.set("pcoarsen: lo basis", lo); P->SetParameterList(Pparams); manager.SetFactory("P", P); @@ -1968,225 +1919,230 @@ namespace MueLu { rcp_dynamic_cast(manager.GetFactoryNonConst("Nullspace"))->SetFactory("Nullspace", manager.GetFactory("P")); } -#endif - } - - // ===================================================================================================== - // ============================== Algorithm: Smoothed Aggregation ====================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_SA(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, int /* levelID */, std::vector& keeps) const { - // Smoothed aggregation - MUELU_KOKKOS_FACTORY(P, SaPFactory, SaPFactory_kokkos); + } else { + // P-Coarsening by manual specification (old interface) ParameterList Pparams; - if (paramList.isSublist("matrixmatrix: kernel params")) - Pparams.sublist("matrixmatrix: kernel params", false) = paramList.sublist("matrixmatrix: kernel params"); - if (defaultList.isSublist("matrixmatrix: kernel params")) - Pparams.sublist("matrixmatrix: kernel params", false) = defaultList.sublist("matrixmatrix: kernel params"); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: damping factor", double, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: calculate eigenvalue estimate", bool, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: max eigenvalue", double, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: eigenvalue estimate num iterations", int, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: use rowsumabs diagonal scaling", bool, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: rowsumabs diagonal replacement tolerance", double, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: rowsumabs diagonal replacement value", double, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: rowsumabs use automatic diagonal tolerance", bool, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: enforce constraints", bool, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: eigen-analysis type", std::string, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: calculate qr", bool, Pparams); - + auto P = rcp(new IntrepidPCoarsenFactory()); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "pcoarsen: hi basis", std::string, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "pcoarsen: lo basis", std::string, Pparams); P->SetParameterList(Pparams); - - - // Filtering - MUELU_SET_VAR_2LIST(paramList, defaultList, "sa: use filtered matrix", bool, useFiltering); - if (useFiltering) { - // NOTE: Here, non-Kokkos and Kokkos versions diverge in the way the - // dependency tree is setup. The Kokkos version has merged the the - // FilteredAFactory into the CoalesceDropFactory. - if (!useKokkos_) { - RCP filterFactory = rcp(new FilteredAFactory()); - - ParameterList fParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use lumping", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse graph", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse eigenvalue", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use root stencil", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: Dirichlet threshold", double, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use spread lumping", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom growth factor", double, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom cap", double, fParams); - filterFactory->SetParameterList(fParams); - filterFactory->SetFactory("Graph", manager.GetFactory("Graph")); - filterFactory->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - filterFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); - // I'm not sure why we need this line. See comments for DofsPerNode for UncoupledAggregation above - filterFactory->SetFactory("Filtering", manager.GetFactory("Graph")); - - P->SetFactory("A", filterFactory); - - } else { - P->SetFactory("A", manager.GetFactory("Graph")); - } - } - - P->SetFactory("P", manager.GetFactory("Ptent")); manager.SetFactory("P", P); - bool filteringChangesMatrix = useFiltering && !MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: drop tol", double, 0); - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - if (reuseType == "tP" && !filteringChangesMatrix) - keeps.push_back(keep_pair("AP reuse data", P.get())); + // Add special nullspace handling + rcp_dynamic_cast(manager.GetFactoryNonConst("Nullspace"))->SetFactory("Nullspace", manager.GetFactory("P")); } - // ===================================================================================================== - // =============================== Algorithm: Energy Minimization ====================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Emin(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int /* levelID */, std::vector& /* keeps */) const - { - MUELU_SET_VAR_2LIST(paramList, defaultList, "emin: pattern", std::string, patternType); - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - TEUCHOS_TEST_FOR_EXCEPTION(patternType != "AkPtent", Exceptions::InvalidArgument, - "Invalid pattern name: \"" << patternType << "\". Valid options: \"AkPtent\""); - // Pattern - auto patternFactory = rcp(new PatternFactory()); - ParameterList patternParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: pattern order", int, patternParams); - patternFactory->SetParameterList(patternParams); - patternFactory->SetFactory("P", manager.GetFactory("Ptent")); - manager.SetFactory("Ppattern", patternFactory); - - // Constraint - auto constraintFactory = rcp(new ConstraintFactory()); - constraintFactory->SetFactory("Ppattern", manager.GetFactory("Ppattern")); - constraintFactory->SetFactory("CoarseNullspace", manager.GetFactory("Ptent")); - manager.SetFactory("Constraint", constraintFactory); - - // Emin Factory - auto P = rcp(new EminPFactory()); - // Filtering - MUELU_SET_VAR_2LIST(paramList, defaultList, "emin: use filtered matrix", bool, useFiltering); - if(useFiltering) { - // NOTE: Here, non-Kokkos and Kokkos versions diverge in the way the - // dependency tree is setup. The Kokkos version has merged the the - // FilteredAFactory into the CoalesceDropFactory. - if (!useKokkos_) { - RCP filterFactory = rcp(new FilteredAFactory()); - - ParameterList fParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use lumping", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse graph", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse eigenvalue", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use root stencil", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: Dirichlet threshold", double, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use spread lumping", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom growth factor", double, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom cap", double, fParams); - filterFactory->SetParameterList(fParams); - filterFactory->SetFactory("Graph", manager.GetFactory("Graph")); - filterFactory->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - filterFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); - // I'm not sure why we need this line. See comments for DofsPerNode for UncoupledAggregation above - filterFactory->SetFactory("Filtering", manager.GetFactory("Graph")); - - P->SetFactory("A", filterFactory); - - } else { - P->SetFactory("A", manager.GetFactory("Graph")); - } - } +#endif +} + +// ===================================================================================================== +// ============================== Algorithm: Smoothed Aggregation ====================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_SA(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, int /* levelID */, std::vector& keeps) const { + // Smoothed aggregation + MUELU_KOKKOS_FACTORY(P, SaPFactory, SaPFactory_kokkos); + ParameterList Pparams; + if (paramList.isSublist("matrixmatrix: kernel params")) + Pparams.sublist("matrixmatrix: kernel params", false) = paramList.sublist("matrixmatrix: kernel params"); + if (defaultList.isSublist("matrixmatrix: kernel params")) + Pparams.sublist("matrixmatrix: kernel params", false) = defaultList.sublist("matrixmatrix: kernel params"); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: damping factor", double, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: calculate eigenvalue estimate", bool, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: max eigenvalue", double, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: eigenvalue estimate num iterations", int, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: use rowsumabs diagonal scaling", bool, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: rowsumabs diagonal replacement tolerance", double, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: rowsumabs diagonal replacement value", double, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: rowsumabs use automatic diagonal tolerance", bool, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: enforce constraints", bool, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: eigen-analysis type", std::string, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: calculate qr", bool, Pparams); + + P->SetParameterList(Pparams); + + // Filtering + MUELU_SET_VAR_2LIST(paramList, defaultList, "sa: use filtered matrix", bool, useFiltering); + if (useFiltering) { + // NOTE: Here, non-Kokkos and Kokkos versions diverge in the way the + // dependency tree is setup. The Kokkos version has merged the the + // FilteredAFactory into the CoalesceDropFactory. + if (!useKokkos_) { + RCP filterFactory = rcp(new FilteredAFactory()); + + ParameterList fParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use lumping", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse graph", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse eigenvalue", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use root stencil", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: Dirichlet threshold", double, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use spread lumping", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom growth factor", double, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom cap", double, fParams); + filterFactory->SetParameterList(fParams); + filterFactory->SetFactory("Graph", manager.GetFactory("Graph")); + filterFactory->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + filterFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); + // I'm not sure why we need this line. See comments for DofsPerNode for UncoupledAggregation above + filterFactory->SetFactory("Filtering", manager.GetFactory("Graph")); + + P->SetFactory("A", filterFactory); - // Energy minimization - ParameterList Pparams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: num iterations", int, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: iterative method", std::string, Pparams); - if (reuseType == "emin") { - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: num reuse iterations", int, Pparams); - Pparams.set("Keep P0", true); - Pparams.set("Keep Constraint0", true); + } else { + P->SetFactory("A", manager.GetFactory("Graph")); } - P->SetParameterList(Pparams); - P->SetFactory("P", manager.GetFactory("Ptent")); - P->SetFactory("Constraint", manager.GetFactory("Constraint")); - manager.SetFactory("P", P); - } - - // ===================================================================================================== - // ================================= Algorithm: Petrov-Galerkin ======================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_PG(ParameterList& /* paramList */, const ParameterList& /* defaultList */, FactoryManager& manager, - int /* levelID */, std::vector& /* keeps */) const - { - TEUCHOS_TEST_FOR_EXCEPTION(this->implicitTranspose_, Exceptions::RuntimeError, - "Implicit transpose not supported with Petrov-Galerkin smoothed transfer operators: Set \"transpose: use implicit\" to false!\n" \ - "Petrov-Galerkin transfer operator smoothing for non-symmetric problems requires a separate handling of the restriction operator which " \ - "does not allow the usage of implicit transpose easily."); - - // Petrov-Galerkin - auto P = rcp(new PgPFactory()); - P->SetFactory("P", manager.GetFactory("Ptent")); - manager.SetFactory("P", P); } - // ===================================================================================================== - // ================================= Algorithm: Replicate ======================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Replicate(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, int /* levelID */, std::vector& keeps) const - { - auto P = rcp(new MueLu::ReplicatePFactory()); - - ParameterList Pparams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "replicate: npdes", int, Pparams); - - P->SetParameterList(Pparams); - manager.SetFactory("P", P); + P->SetFactory("P", manager.GetFactory("Ptent")); + manager.SetFactory("P", P); + + bool filteringChangesMatrix = useFiltering && !MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: drop tol", double, 0); + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); + if (reuseType == "tP" && !filteringChangesMatrix) + keeps.push_back(keep_pair("AP reuse data", P.get())); +} + +// ===================================================================================================== +// =============================== Algorithm: Energy Minimization ====================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Emin(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int /* levelID */, std::vector& /* keeps */) const { + MUELU_SET_VAR_2LIST(paramList, defaultList, "emin: pattern", std::string, patternType); + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); + TEUCHOS_TEST_FOR_EXCEPTION(patternType != "AkPtent", Exceptions::InvalidArgument, + "Invalid pattern name: \"" << patternType << "\". Valid options: \"AkPtent\""); + // Pattern + auto patternFactory = rcp(new PatternFactory()); + ParameterList patternParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: pattern order", int, patternParams); + patternFactory->SetParameterList(patternParams); + patternFactory->SetFactory("P", manager.GetFactory("Ptent")); + manager.SetFactory("Ppattern", patternFactory); + + // Constraint + auto constraintFactory = rcp(new ConstraintFactory()); + constraintFactory->SetFactory("Ppattern", manager.GetFactory("Ppattern")); + constraintFactory->SetFactory("CoarseNullspace", manager.GetFactory("Ptent")); + manager.SetFactory("Constraint", constraintFactory); + + // Emin Factory + auto P = rcp(new EminPFactory()); + // Filtering + MUELU_SET_VAR_2LIST(paramList, defaultList, "emin: use filtered matrix", bool, useFiltering); + if (useFiltering) { + // NOTE: Here, non-Kokkos and Kokkos versions diverge in the way the + // dependency tree is setup. The Kokkos version has merged the the + // FilteredAFactory into the CoalesceDropFactory. + if (!useKokkos_) { + RCP filterFactory = rcp(new FilteredAFactory()); + + ParameterList fParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use lumping", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse graph", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse eigenvalue", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use root stencil", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: Dirichlet threshold", double, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use spread lumping", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom growth factor", double, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom cap", double, fParams); + filterFactory->SetParameterList(fParams); + filterFactory->SetFactory("Graph", manager.GetFactory("Graph")); + filterFactory->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + filterFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); + // I'm not sure why we need this line. See comments for DofsPerNode for UncoupledAggregation above + filterFactory->SetFactory("Filtering", manager.GetFactory("Graph")); + + P->SetFactory("A", filterFactory); + } else { + P->SetFactory("A", manager.GetFactory("Graph")); + } } - // ===================================================================================================== - // ====================================== Algorithm: Combine ============================================ - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Combine(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, int /* levelID */, std::vector& keeps) const - { - auto P = rcp(new MueLu::CombinePFactory()); - - ParameterList Pparams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "combine: numBlks", int, Pparams); - - P->SetParameterList(Pparams); - manager.SetFactory("P", P); - + // Energy minimization + ParameterList Pparams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: num iterations", int, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: iterative method", std::string, Pparams); + if (reuseType == "emin") { + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: num reuse iterations", int, Pparams); + Pparams.set("Keep P0", true); + Pparams.set("Keep Constraint0", true); } - - - // ===================================================================================================== - // ====================================== Algorithm: Matlab ============================================ - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Matlab(ParameterList& paramList, const ParameterList& /* defaultList */, FactoryManager& manager, - int /* levelID */, std::vector& /* keeps */) const { + P->SetParameterList(Pparams); + P->SetFactory("P", manager.GetFactory("Ptent")); + P->SetFactory("Constraint", manager.GetFactory("Constraint")); + manager.SetFactory("P", P); +} + +// ===================================================================================================== +// ================================= Algorithm: Petrov-Galerkin ======================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_PG(ParameterList& /* paramList */, const ParameterList& /* defaultList */, FactoryManager& manager, + int /* levelID */, std::vector& /* keeps */) const { + TEUCHOS_TEST_FOR_EXCEPTION(this->implicitTranspose_, Exceptions::RuntimeError, + "Implicit transpose not supported with Petrov-Galerkin smoothed transfer operators: Set \"transpose: use implicit\" to false!\n" + "Petrov-Galerkin transfer operator smoothing for non-symmetric problems requires a separate handling of the restriction operator which " + "does not allow the usage of implicit transpose easily."); + + // Petrov-Galerkin + auto P = rcp(new PgPFactory()); + P->SetFactory("P", manager.GetFactory("Ptent")); + manager.SetFactory("P", P); +} + +// ===================================================================================================== +// ================================= Algorithm: Replicate ======================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Replicate(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, int /* levelID */, std::vector& keeps) const { + auto P = rcp(new MueLu::ReplicatePFactory()); + + ParameterList Pparams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "replicate: npdes", int, Pparams); + + P->SetParameterList(Pparams); + manager.SetFactory("P", P); +} + +// ===================================================================================================== +// ====================================== Algorithm: Combine ============================================ +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Combine(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, int /* levelID */, std::vector& keeps) const { + auto P = rcp(new MueLu::CombinePFactory()); + + ParameterList Pparams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "combine: numBlks", int, Pparams); + + P->SetParameterList(Pparams); + manager.SetFactory("P", P); +} + +// ===================================================================================================== +// ====================================== Algorithm: Matlab ============================================ +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Matlab(ParameterList& paramList, const ParameterList& /* defaultList */, FactoryManager& manager, + int /* levelID */, std::vector& /* keeps */) const { #ifdef HAVE_MUELU_MATLAB - ParameterList Pparams = paramList.sublist("transfer: params"); - auto P = rcp(new TwoLevelMatlabFactory()); - P->SetParameterList(Pparams); - P->SetFactory("P", manager.GetFactory("Ptent")); - manager.SetFactory("P", P); + ParameterList Pparams = paramList.sublist("transfer: params"); + auto P = rcp(new TwoLevelMatlabFactory()); + P->SetParameterList(Pparams); + P->SetFactory("P", manager.GetFactory("Ptent")); + manager.SetFactory("P", P); #else - (void)paramList; - (void)manager; + (void)paramList; + (void)manager; #endif - } +} #undef MUELU_SET_VAR_2LIST #undef MUELU_TEST_AND_SET_VAR @@ -2194,546 +2150,550 @@ namespace MueLu { #undef MUELU_TEST_PARAM_2LIST #undef MUELU_KOKKOS_FACTORY - size_t LevenshteinDistance(const char* s, size_t len_s, const char* t, size_t len_t); - - template - void ParameterListInterpreter::Validate(const ParameterList& constParamList) const { - ParameterList paramList = constParamList; - const ParameterList& validList = *MasterList::List(); - // Validate up to maxLevels level specific parameter sublists - const int maxLevels = 100; - - // Extract level specific list - std::vector paramLists; - for (int levelID = 0; levelID < maxLevels; levelID++) { - std::string sublistName = "level " + toString(levelID); - if (paramList.isSublist(sublistName)) { - paramLists.push_back(paramList.sublist(sublistName)); - // paramLists.back().setName(sublistName); - paramList.remove(sublistName); - } +size_t LevenshteinDistance(const char* s, size_t len_s, const char* t, size_t len_t); + +template +void ParameterListInterpreter::Validate(const ParameterList& constParamList) const { + ParameterList paramList = constParamList; + const ParameterList& validList = *MasterList::List(); + // Validate up to maxLevels level specific parameter sublists + const int maxLevels = 100; + + // Extract level specific list + std::vector paramLists; + for (int levelID = 0; levelID < maxLevels; levelID++) { + std::string sublistName = "level " + toString(levelID); + if (paramList.isSublist(sublistName)) { + paramLists.push_back(paramList.sublist(sublistName)); + // paramLists.back().setName(sublistName); + paramList.remove(sublistName); } - paramLists.push_back(paramList); - // paramLists.back().setName("main"); + } + paramLists.push_back(paramList); + // paramLists.back().setName("main"); #ifdef HAVE_MUELU_MATLAB - // If Muemex is supported, hide custom level variables from validator by removing them from paramList's sublists - for (size_t i = 0; i < paramLists.size(); i++) { - std::vector customVars; // list of names (keys) to be removed from list - - for(Teuchos::ParameterList::ConstIterator it = paramLists[i].begin(); it != paramLists[i].end(); it++) { - std::string paramName = paramLists[i].name(it); + // If Muemex is supported, hide custom level variables from validator by removing them from paramList's sublists + for (size_t i = 0; i < paramLists.size(); i++) { + std::vector customVars; // list of names (keys) to be removed from list - if (IsParamMuemexVariable(paramName)) - customVars.push_back(paramName); - } + for (Teuchos::ParameterList::ConstIterator it = paramLists[i].begin(); it != paramLists[i].end(); it++) { + std::string paramName = paramLists[i].name(it); - // Remove the keys - for (size_t j = 0; j < customVars.size(); j++) - paramLists[i].remove(customVars[j], false); + if (IsParamMuemexVariable(paramName)) + customVars.push_back(paramName); } + + // Remove the keys + for (size_t j = 0; j < customVars.size(); j++) + paramLists[i].remove(customVars[j], false); + } #endif - const int maxDepth = 0; - for (size_t i = 0; i < paramLists.size(); i++) { - // validate every sublist - try { - paramLists[i].validateParameters(validList, maxDepth); - - } catch (const Teuchos::Exceptions::InvalidParameterName& e) { - std::string eString = e.what(); - - // Parse name from: - size_t nameStart = eString.find_first_of('"') + 1; - size_t nameEnd = eString.find_first_of('"', nameStart); - std::string name = eString.substr(nameStart, nameEnd - nameStart); - - size_t bestScore = 100; - std::string bestName = ""; - for (ParameterList::ConstIterator it = validList.begin(); it != validList.end(); it++) { - const std::string& pName = validList.name(it); - this->GetOStream(Runtime1) << "| " << pName; - size_t score = LevenshteinDistance(name.c_str(), name.length(), pName.c_str(), pName.length()); - this->GetOStream(Runtime1) << " -> " << score << std::endl; - if (score < bestScore) { - bestScore = score; - bestName = pName; - } + const int maxDepth = 0; + for (size_t i = 0; i < paramLists.size(); i++) { + // validate every sublist + try { + paramLists[i].validateParameters(validList, maxDepth); + + } catch (const Teuchos::Exceptions::InvalidParameterName& e) { + std::string eString = e.what(); + + // Parse name from: + size_t nameStart = eString.find_first_of('"') + 1; + size_t nameEnd = eString.find_first_of('"', nameStart); + std::string name = eString.substr(nameStart, nameEnd - nameStart); + + size_t bestScore = 100; + std::string bestName = ""; + for (ParameterList::ConstIterator it = validList.begin(); it != validList.end(); it++) { + const std::string& pName = validList.name(it); + this->GetOStream(Runtime1) << "| " << pName; + size_t score = LevenshteinDistance(name.c_str(), name.length(), pName.c_str(), pName.length()); + this->GetOStream(Runtime1) << " -> " << score << std::endl; + if (score < bestScore) { + bestScore = score; + bestName = pName; } - if (bestScore < 10 && bestName != "") { - TEUCHOS_TEST_FOR_EXCEPTION(true, Teuchos::Exceptions::InvalidParameterName, - eString << "The parameter name \"" + name + "\" is not valid. Did you mean \"" + bestName << "\"?\n"); + } + if (bestScore < 10 && bestName != "") { + TEUCHOS_TEST_FOR_EXCEPTION(true, Teuchos::Exceptions::InvalidParameterName, + eString << "The parameter name \"" + name + "\" is not valid. Did you mean \"" + bestName << "\"?\n"); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(true, Teuchos::Exceptions::InvalidParameterName, - eString << "The parameter name \"" + name + "\" is not valid.\n"); - } + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Teuchos::Exceptions::InvalidParameterName, + eString << "The parameter name \"" + name + "\" is not valid.\n"); } } } +} + +// ===================================================================================================== +// ==================================== FACTORY interpreter ============================================ +// ===================================================================================================== +template +void ParameterListInterpreter:: + SetFactoryParameterList(const ParameterList& constParamList) { + // Create a non const copy of the parameter list + // Working with a modifiable list is much much easier than with original one + ParameterList paramList = constParamList; + + // Parameter List Parsing: + // --------- + // + // + // + if (paramList.isSublist("Matrix")) { + blockSize_ = paramList.sublist("Matrix").get("PDE equations", MasterList::getDefault("number of equations")); + dofOffset_ = paramList.sublist("Matrix").get("DOF offset", 0); // undocumented parameter allowing to define a DOF offset of the global dofs of an operator (defaul = 0) + } - // ===================================================================================================== - // ==================================== FACTORY interpreter ============================================ - // ===================================================================================================== - template - void ParameterListInterpreter:: - SetFactoryParameterList(const ParameterList& constParamList) { - // Create a non const copy of the parameter list - // Working with a modifiable list is much much easier than with original one - ParameterList paramList = constParamList; - - // Parameter List Parsing: - // --------- - // - // - // - if (paramList.isSublist("Matrix")) { - blockSize_ = paramList.sublist("Matrix").get("PDE equations", MasterList::getDefault("number of equations")); - dofOffset_ = paramList.sublist("Matrix").get("DOF offset", 0); // undocumented parameter allowing to define a DOF offset of the global dofs of an operator (defaul = 0) + // create new FactoryFactory object if necessary + if (factFact_ == Teuchos::null) + factFact_ = Teuchos::rcp(new FactoryFactory()); + + // Parameter List Parsing: + // --------- + // + // <== call BuildFactoryMap() on this parameter list + // ... + // + // + FactoryMap factoryMap; + FactoryManagerMap factoryManagers; + if (paramList.isSublist("Factories")) + this->BuildFactoryMap(paramList.sublist("Factories"), factoryMap, factoryMap, factoryManagers); + + // Parameter List Parsing: + // --------- + // + // + // <== get + // <== get + // + // <== parse first args and call BuildFactoryMap() on the rest of this parameter list + // ... + // + // + // + if (paramList.isSublist("Hierarchy")) { + ParameterList hieraList = paramList.sublist("Hierarchy"); // copy because list temporally modified (remove 'id') + + // Get hierarchy options + if (hieraList.isParameter("max levels")) { + this->numDesiredLevel_ = hieraList.get("max levels"); + hieraList.remove("max levels"); } - // create new FactoryFactory object if necessary - if (factFact_ == Teuchos::null) - factFact_ = Teuchos::rcp(new FactoryFactory()); - - // Parameter List Parsing: - // --------- - // - // <== call BuildFactoryMap() on this parameter list - // ... - // - // - FactoryMap factoryMap; - FactoryManagerMap factoryManagers; - if (paramList.isSublist("Factories")) - this->BuildFactoryMap(paramList.sublist("Factories"), factoryMap, factoryMap, factoryManagers); - - // Parameter List Parsing: - // --------- - // - // - // <== get - // <== get - // - // <== parse first args and call BuildFactoryMap() on the rest of this parameter list - // ... - // - // - // - if (paramList.isSublist("Hierarchy")) { - ParameterList hieraList = paramList.sublist("Hierarchy"); // copy because list temporally modified (remove 'id') - - // Get hierarchy options - if (hieraList.isParameter("max levels")) { - this->numDesiredLevel_ = hieraList.get("max levels"); - hieraList.remove("max levels"); - } + if (hieraList.isParameter("coarse: max size")) { + this->maxCoarseSize_ = hieraList.get("coarse: max size"); + hieraList.remove("coarse: max size"); + } - if (hieraList.isParameter("coarse: max size")) { - this->maxCoarseSize_ = hieraList.get("coarse: max size"); - hieraList.remove("coarse: max size"); - } + if (hieraList.isParameter("repartition: rebalance P and R")) { + this->doPRrebalance_ = hieraList.get("repartition: rebalance P and R"); + hieraList.remove("repartition: rebalance P and R"); + } - if (hieraList.isParameter("repartition: rebalance P and R")) { - this->doPRrebalance_ = hieraList.get("repartition: rebalance P and R"); - hieraList.remove("repartition: rebalance P and R"); - } + if (hieraList.isParameter("transpose: use implicit")) { + this->implicitTranspose_ = hieraList.get("transpose: use implicit"); + hieraList.remove("transpose: use implicit"); + } - if (hieraList.isParameter("transpose: use implicit")) { - this->implicitTranspose_ = hieraList.get("transpose: use implicit"); - hieraList.remove("transpose: use implicit"); - } + if (hieraList.isParameter("fuse prolongation and update")) { + this->fuseProlongationAndUpdate_ = hieraList.get("fuse prolongation and update"); + hieraList.remove("fuse prolongation and update"); + } - if (hieraList.isParameter("fuse prolongation and update")) { - this->fuseProlongationAndUpdate_ = hieraList.get("fuse prolongation and update"); - hieraList.remove("fuse prolongation and update"); - } + if (hieraList.isParameter("nullspace: suppress dimension check")) { + this->suppressNullspaceDimensionCheck_ = hieraList.get("nullspace: suppress dimension check"); + hieraList.remove("nullspace: suppress dimension check"); + } - if (hieraList.isParameter("nullspace: suppress dimension check")) { - this->suppressNullspaceDimensionCheck_ = hieraList.get("nullspace: suppress dimension check"); - hieraList.remove("nullspace: suppress dimension check"); - } + if (hieraList.isParameter("number of vectors")) { + this->numDesiredLevel_ = hieraList.get("number of vectors"); + hieraList.remove("number of vectors"); + } - if (hieraList.isParameter("number of vectors")) { - this->numDesiredLevel_ = hieraList.get("number of vectors"); - hieraList.remove("number of vectors"); - } + if (hieraList.isSublist("matvec params")) + this->matvecParams_ = Teuchos::parameterList(hieraList.sublist("matvec params")); - if (hieraList.isSublist("matvec params")) - this->matvecParams_ = Teuchos::parameterList(hieraList.sublist("matvec params")); + if (hieraList.isParameter("coarse grid correction scaling factor")) { + this->scalingFactor_ = hieraList.get("coarse grid correction scaling factor"); + hieraList.remove("coarse grid correction scaling factor"); + } + // Translate cycle type parameter + if (hieraList.isParameter("cycle type")) { + std::map cycleMap; + cycleMap["V"] = VCYCLE; + cycleMap["W"] = WCYCLE; - if (hieraList.isParameter("coarse grid correction scaling factor")) { - this->scalingFactor_ = hieraList.get("coarse grid correction scaling factor"); - hieraList.remove("coarse grid correction scaling factor"); - } + std::string cycleType = hieraList.get("cycle type"); + TEUCHOS_TEST_FOR_EXCEPTION(cycleMap.count(cycleType) == 0, Exceptions::RuntimeError, "Invalid cycle type: \"" << cycleType << "\""); + this->Cycle_ = cycleMap[cycleType]; + } - // Translate cycle type parameter - if (hieraList.isParameter("cycle type")) { - std::map cycleMap; - cycleMap["V"] = VCYCLE; - cycleMap["W"] = WCYCLE; + if (hieraList.isParameter("W cycle start level")) { + this->WCycleStartLevel_ = hieraList.get("W cycle start level"); + } - std::string cycleType = hieraList.get("cycle type"); - TEUCHOS_TEST_FOR_EXCEPTION(cycleMap.count(cycleType) == 0, Exceptions::RuntimeError, "Invalid cycle type: \"" << cycleType << "\""); - this->Cycle_ = cycleMap[cycleType]; - } + if (hieraList.isParameter("verbosity")) { + std::string vl = hieraList.get("verbosity"); + hieraList.remove("verbosity"); + this->verbosity_ = toVerbLevel(vl); + } - if (hieraList.isParameter("W cycle start level")) { - this->WCycleStartLevel_ = hieraList.get("W cycle start level"); - } + if (hieraList.isParameter("output filename")) + VerboseObject::SetMueLuOFileStream(hieraList.get("output filename")); + + if (hieraList.isParameter("dependencyOutputLevel")) + this->graphOutputLevel_ = hieraList.get("dependencyOutputLevel"); + + // Check for the reuse case + if (hieraList.isParameter("reuse")) + Factory::DisableMultipleCheckGlobally(); + + if (hieraList.isSublist("DataToWrite")) { + // TODO We should be able to specify any data. If it exists, write it. + // TODO This would requires something like std::set > + ParameterList foo = hieraList.sublist("DataToWrite"); + std::string dataName = "Matrices"; + if (foo.isParameter(dataName)) + this->matricesToPrint_["A"] = Teuchos::getArrayFromStringParameter(foo, dataName); + dataName = "Prolongators"; + if (foo.isParameter(dataName)) + this->matricesToPrint_["P"] = Teuchos::getArrayFromStringParameter(foo, dataName); + dataName = "Restrictors"; + if (foo.isParameter(dataName)) + this->matricesToPrint_["R"] = Teuchos::getArrayFromStringParameter(foo, dataName); + dataName = "D0"; + if (foo.isParameter(dataName)) + this->matricesToPrint_["D0"] = Teuchos::getArrayFromStringParameter(foo, dataName); + } - if (hieraList.isParameter("verbosity")) { - std::string vl = hieraList.get("verbosity"); - hieraList.remove("verbosity"); - this->verbosity_ = toVerbLevel(vl); - } + // Get level configuration + for (ParameterList::ConstIterator param = hieraList.begin(); param != hieraList.end(); ++param) { + const std::string& paramName = hieraList.name(param); - if (hieraList.isParameter("output filename")) - VerboseObject::SetMueLuOFileStream(hieraList.get("output filename")); - - if (hieraList.isParameter("dependencyOutputLevel")) - this->graphOutputLevel_ = hieraList.get("dependencyOutputLevel"); - - // Check for the reuse case - if (hieraList.isParameter("reuse")) - Factory::DisableMultipleCheckGlobally(); - - if (hieraList.isSublist("DataToWrite")) { - //TODO We should be able to specify any data. If it exists, write it. - //TODO This would requires something like std::set > - ParameterList foo = hieraList.sublist("DataToWrite"); - std::string dataName = "Matrices"; - if (foo.isParameter(dataName)) - this->matricesToPrint_["A"] = Teuchos::getArrayFromStringParameter(foo, dataName); - dataName = "Prolongators"; - if (foo.isParameter(dataName)) - this->matricesToPrint_["P"] = Teuchos::getArrayFromStringParameter(foo, dataName); - dataName = "Restrictors"; - if (foo.isParameter(dataName)) - this->matricesToPrint_["R"] = Teuchos::getArrayFromStringParameter(foo, dataName); - dataName = "D0"; - if (foo.isParameter(dataName)) - this->matricesToPrint_["D0"] = Teuchos::getArrayFromStringParameter(foo, dataName); - } + if (paramName != "DataToWrite" && hieraList.isSublist(paramName)) { + ParameterList levelList = hieraList.sublist(paramName); // copy because list temporally modified (remove 'id') - // Get level configuration - for (ParameterList::ConstIterator param = hieraList.begin(); param != hieraList.end(); ++param) { - const std::string & paramName = hieraList.name(param); - - if (paramName != "DataToWrite" && hieraList.isSublist(paramName)) { - ParameterList levelList = hieraList.sublist(paramName); // copy because list temporally modified (remove 'id') - - int startLevel = 0; if(levelList.isParameter("startLevel")) { startLevel = levelList.get("startLevel"); levelList.remove("startLevel"); } - int numDesiredLevel = 1; if(levelList.isParameter("numDesiredLevel")) { numDesiredLevel = levelList.get("numDesiredLevel"); levelList.remove("numDesiredLevel"); } - - // Parameter List Parsing: - // --------- - // - // - // - // - // - // [] <== call BuildFactoryMap() on the rest of the parameter list - // - // - FactoryMap levelFactoryMap; - BuildFactoryMap(levelList, factoryMap, levelFactoryMap, factoryManagers); - - RCP m = rcp(new FactoryManager(levelFactoryMap)); - if (hieraList.isParameter("use kokkos refactor")) - m->SetKokkosRefactor(hieraList.get("use kokkos refactor")); - - if (startLevel >= 0) - this->AddFactoryManager(startLevel, numDesiredLevel, m); - else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::ParameterListInterpreter():: invalid level id"); - } /* TODO: else { } */ - } + int startLevel = 0; + if (levelList.isParameter("startLevel")) { + startLevel = levelList.get("startLevel"); + levelList.remove("startLevel"); + } + int numDesiredLevel = 1; + if (levelList.isParameter("numDesiredLevel")) { + numDesiredLevel = levelList.get("numDesiredLevel"); + levelList.remove("numDesiredLevel"); + } + + // Parameter List Parsing: + // --------- + // + // + // + // + // + // [] <== call BuildFactoryMap() on the rest of the parameter list + // + // + FactoryMap levelFactoryMap; + BuildFactoryMap(levelList, factoryMap, levelFactoryMap, factoryManagers); + + RCP m = rcp(new FactoryManager(levelFactoryMap)); + if (hieraList.isParameter("use kokkos refactor")) + m->SetKokkosRefactor(hieraList.get("use kokkos refactor")); + + if (startLevel >= 0) + this->AddFactoryManager(startLevel, numDesiredLevel, m); + else + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::ParameterListInterpreter():: invalid level id"); + } /* TODO: else { } */ } } +} + +// TODO: static? +/// \brief Interpret "Factories" sublist +/// +/// \param paramList [in]: "Factories" ParameterList +/// \param factoryMapIn [in]: FactoryMap maps variable names to factories. This factory map is used to resolve data dependencies of previously defined factories. +/// \param factoryMapOut [out]: FactoryMap maps variable names to factories. New factory entries are added to that FactoryMap. Usually, factoryMapIn and factoryMapOut should use the same object, such that new factories are added. We have to distinguish input and output if we build sub-factory managers, though. +/// \param factoryManagers [in/out]: FacotryManagerMap maps group names to a FactoryManager object. +/// +/// Interpret "Factories" parameter list. For each "factory" entry, add a new entry in the factoryMapOut map or create a new FacotryManager +/// +/// Parameter List Parsing: +/// Create an entry in factoryMapOut for each parameter of the list paramList +/// --------- +/// +/// +/// +/// +/// +/// ... +/// +/// +/// +/// --------- +/// Group factories +/// We can group factories using parameter sublists with the "group" parameter +/// +/// +/// +/// +/// +/// +/// +/// +/// + +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// These factory groups can be used with factories for blocked operators (such as the BlockedPFactory) +/// to easily define the operations on the sub-blocks. +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// As an alternative one can also directly specify the factories in the sublists "block1", "block2", etc..., of course. +/// But using blocks has the advantage that one can reuse them in all blocked factories. +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// As an alternative one can also directly specify the factories in the sublists "block1", "block2", etc..., of course. +/// +/// + +/// --------- +/// add more dependencies (circular dependencies) +/// +/// The NullspaceFactory needs to know which factory generates the null space on the coarse level (e.g., the TentativePFactory or the RebalancedPFactory). +/// However, we cannot set the information in this place in the xml file, since the tentative prolongator facotry is typically defined later. +/// We have to add that dependency later to the NullspaceFactory: +/// +/// +/// +/// +/// +/// +/// +/// +/// <...> +/// +/// +/// +/// +/// +/// +/// <...> +/// +/// +/// +/// After the definition of the generating factory for the nullspace (in this case myRebalanceProlongatorFact) +/// we add that dependency to the NullspaceFactory instance myNspFact +/// +/// +/// +/// +/// +/// +/// We have to create a new block (with a different name than myNspFact). In the example we use "myNspFactDeps". +/// It should contain a parameter "dependency for" with the name of the factory that we want the dependencies to be addded to. +/// With above block we do not need the entry for the Nullspace in the global FactoryManager any more. +template +void ParameterListInterpreter:: + BuildFactoryMap(const ParameterList& paramList, const FactoryMap& factoryMapIn, FactoryMap& factoryMapOut, FactoryManagerMap& factoryManagers) const { + for (ParameterList::ConstIterator param = paramList.begin(); param != paramList.end(); ++param) { + const std::string& paramName = paramList.name(param); //< paramName contains the user chosen factory name (e.g., "smootherFact1") + const Teuchos::ParameterEntry& paramValue = paramList.entry(param); //< for factories, paramValue should be either a list or just a MueLu Factory (e.g., TrilinosSmoother) + + // TODO: do not allow name of existing MueLu classes (can be tested using FactoryFactory) + + if (paramValue.isList()) { + ParameterList paramList1 = Teuchos::getValue(paramValue); + if (paramList1.isParameter("factory")) { // default: just a factory definition + // New Factory is a sublist with internal parameters and/or data dependencies + TEUCHOS_TEST_FOR_EXCEPTION(paramList1.isParameter("dependency for") == true, Exceptions::RuntimeError, + "MueLu::ParameterListInterpreter(): It seems that in the parameter lists for defining " << paramName << " there is both a 'factory' and 'dependency for' parameter. This is not allowed. Please remove the 'dependency for' parameter."); + factoryMapOut[paramName] = factFact_->BuildFactory(paramValue, factoryMapIn, factoryManagers); - //TODO: static? - /// \brief Interpret "Factories" sublist - /// - /// \param paramList [in]: "Factories" ParameterList - /// \param factoryMapIn [in]: FactoryMap maps variable names to factories. This factory map is used to resolve data dependencies of previously defined factories. - /// \param factoryMapOut [out]: FactoryMap maps variable names to factories. New factory entries are added to that FactoryMap. Usually, factoryMapIn and factoryMapOut should use the same object, such that new factories are added. We have to distinguish input and output if we build sub-factory managers, though. - /// \param factoryManagers [in/out]: FacotryManagerMap maps group names to a FactoryManager object. - /// - /// Interpret "Factories" parameter list. For each "factory" entry, add a new entry in the factoryMapOut map or create a new FacotryManager - /// - /// Parameter List Parsing: - /// Create an entry in factoryMapOut for each parameter of the list paramList - /// --------- - /// - /// - /// - /// - /// - /// ... - /// - /// - /// - /// --------- - /// Group factories - /// We can group factories using parameter sublists with the "group" parameter - /// - /// - /// - /// - /// - /// - /// - /// - /// - - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// These factory groups can be used with factories for blocked operators (such as the BlockedPFactory) - /// to easily define the operations on the sub-blocks. - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// As an alternative one can also directly specify the factories in the sublists "block1", "block2", etc..., of course. - /// But using blocks has the advantage that one can reuse them in all blocked factories. - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// As an alternative one can also directly specify the factories in the sublists "block1", "block2", etc..., of course. - /// - /// - - /// --------- - /// add more dependencies (circular dependencies) - /// - /// The NullspaceFactory needs to know which factory generates the null space on the coarse level (e.g., the TentativePFactory or the RebalancedPFactory). - /// However, we cannot set the information in this place in the xml file, since the tentative prolongator facotry is typically defined later. - /// We have to add that dependency later to the NullspaceFactory: - /// - /// - /// - /// - /// - /// - /// - /// - /// <...> - /// - /// - /// - /// - /// - /// - /// <...> - /// - /// - /// - /// After the definition of the generating factory for the nullspace (in this case myRebalanceProlongatorFact) - /// we add that dependency to the NullspaceFactory instance myNspFact - /// - /// - /// - /// - /// - /// - /// We have to create a new block (with a different name than myNspFact). In the example we use "myNspFactDeps". - /// It should contain a parameter "dependency for" with the name of the factory that we want the dependencies to be addded to. - /// With above block we do not need the entry for the Nullspace in the global FactoryManager any more. - template - void ParameterListInterpreter:: - BuildFactoryMap(const ParameterList& paramList, const FactoryMap& factoryMapIn, FactoryMap& factoryMapOut, FactoryManagerMap& factoryManagers) const { - for (ParameterList::ConstIterator param = paramList.begin(); param != paramList.end(); ++param) { - const std::string & paramName = paramList.name(param); //< paramName contains the user chosen factory name (e.g., "smootherFact1") - const Teuchos::ParameterEntry & paramValue = paramList.entry(param); //< for factories, paramValue should be either a list or just a MueLu Factory (e.g., TrilinosSmoother) - - //TODO: do not allow name of existing MueLu classes (can be tested using FactoryFactory) - - if (paramValue.isList()) { - ParameterList paramList1 = Teuchos::getValue(paramValue); - if (paramList1.isParameter("factory")) { // default: just a factory definition - // New Factory is a sublist with internal parameters and/or data dependencies - TEUCHOS_TEST_FOR_EXCEPTION(paramList1.isParameter("dependency for") == true, Exceptions::RuntimeError, - "MueLu::ParameterListInterpreter(): It seems that in the parameter lists for defining " << paramName << - " there is both a 'factory' and 'dependency for' parameter. This is not allowed. Please remove the 'dependency for' parameter."); - - factoryMapOut[paramName] = factFact_->BuildFactory(paramValue, factoryMapIn, factoryManagers); - - } else if (paramList1.isParameter("dependency for")) { // add more data dependencies to existing factory - TEUCHOS_TEST_FOR_EXCEPTION(paramList1.isParameter("factory") == true, Exceptions::RuntimeError, - "MueLu::ParameterListInterpreter(): It seems that in the parameter lists for defining " << paramName << - " there is both a 'factory' and 'dependency for' parameter. This is not allowed."); - - std::string factoryName = paramList1.get("dependency for"); - - RCP factbase = factoryMapIn.find(factoryName /*paramName*/)->second; // access previously defined factory - TEUCHOS_TEST_FOR_EXCEPTION(factbase.is_null() == true, Exceptions::RuntimeError, - "MueLu::ParameterListInterpreter(): could not find factory " + factoryName + " in factory map. Did you define it before?"); - - RCP factoryconst = Teuchos::rcp_dynamic_cast(factbase); - RCP< Factory> factory = Teuchos::rcp_const_cast(factoryconst); - - // Read the RCP parameters of the class T - RCP validParamList = factory->GetValidParameterList(); - for (ParameterList::ConstIterator vparam = validParamList->begin(); vparam != validParamList->end(); ++vparam) { - const std::string& pName = validParamList->name(vparam); - - if (!paramList1.isParameter(pName)) { - // Ignore unknown parameters - continue; - } + } else if (paramList1.isParameter("dependency for")) { // add more data dependencies to existing factory + TEUCHOS_TEST_FOR_EXCEPTION(paramList1.isParameter("factory") == true, Exceptions::RuntimeError, + "MueLu::ParameterListInterpreter(): It seems that in the parameter lists for defining " << paramName << " there is both a 'factory' and 'dependency for' parameter. This is not allowed."); - if (validParamList->isType< RCP >(pName)) { - // Generate or get factory described by pName and set dependency - RCP generatingFact = factFact_->BuildFactory(paramList1.getEntry(pName), factoryMapIn, factoryManagers); - factory->SetFactory(pName, generatingFact.create_weak()); - - } else if (validParamList->isType >(pName)) { - if (pName == "ParameterList") { - // NOTE: we cannot use - // subList = sublist(rcpFromRef(paramList), pName) - // here as that would result in sublist also being a reference to a temporary object. - // The resulting dereferencing in the corresponding factory would then segfault - RCP subList = Teuchos::sublist(rcp(new ParameterList(paramList1)), pName); - factory->SetParameter(pName, ParameterEntry(subList)); - } - } else { - factory->SetParameter(pName, paramList1.getEntry(pName)); - } + std::string factoryName = paramList1.get("dependency for"); + + RCP factbase = factoryMapIn.find(factoryName /*paramName*/)->second; // access previously defined factory + TEUCHOS_TEST_FOR_EXCEPTION(factbase.is_null() == true, Exceptions::RuntimeError, + "MueLu::ParameterListInterpreter(): could not find factory " + factoryName + " in factory map. Did you define it before?"); + + RCP factoryconst = Teuchos::rcp_dynamic_cast(factbase); + RCP factory = Teuchos::rcp_const_cast(factoryconst); + + // Read the RCP parameters of the class T + RCP validParamList = factory->GetValidParameterList(); + for (ParameterList::ConstIterator vparam = validParamList->begin(); vparam != validParamList->end(); ++vparam) { + const std::string& pName = validParamList->name(vparam); + + if (!paramList1.isParameter(pName)) { + // Ignore unknown parameters + continue; } - } else if (paramList1.isParameter("group")) { // definitiion of a factory group (for a factory manager) - // Define a new (sub) FactoryManager - std::string groupType = paramList1.get("group"); - TEUCHOS_TEST_FOR_EXCEPTION(groupType!="FactoryManager", Exceptions::RuntimeError, - "group must be of type \"FactoryManager\"."); - - ParameterList groupList = paramList1; // copy because list temporally modified (remove 'id') - groupList.remove("group"); - - bool setKokkosRefactor = false; - bool kokkosRefactor = useKokkos_; - if (groupList.isParameter("use kokkos refactor")) { - kokkosRefactor = groupList.get("use kokkos refactor"); - groupList.remove("use kokkos refactor"); - setKokkosRefactor = true; + if (validParamList->isType >(pName)) { + // Generate or get factory described by pName and set dependency + RCP generatingFact = factFact_->BuildFactory(paramList1.getEntry(pName), factoryMapIn, factoryManagers); + factory->SetFactory(pName, generatingFact.create_weak()); + + } else if (validParamList->isType >(pName)) { + if (pName == "ParameterList") { + // NOTE: we cannot use + // subList = sublist(rcpFromRef(paramList), pName) + // here as that would result in sublist also being a reference to a temporary object. + // The resulting dereferencing in the corresponding factory would then segfault + RCP subList = Teuchos::sublist(rcp(new ParameterList(paramList1)), pName); + factory->SetParameter(pName, ParameterEntry(subList)); + } + } else { + factory->SetParameter(pName, paramList1.getEntry(pName)); } + } - FactoryMap groupFactoryMap; - BuildFactoryMap(groupList, factoryMapIn, groupFactoryMap, factoryManagers); + } else if (paramList1.isParameter("group")) { // definitiion of a factory group (for a factory manager) + // Define a new (sub) FactoryManager + std::string groupType = paramList1.get("group"); + TEUCHOS_TEST_FOR_EXCEPTION(groupType != "FactoryManager", Exceptions::RuntimeError, + "group must be of type \"FactoryManager\"."); + + ParameterList groupList = paramList1; // copy because list temporally modified (remove 'id') + groupList.remove("group"); + + bool setKokkosRefactor = false; + bool kokkosRefactor = useKokkos_; + if (groupList.isParameter("use kokkos refactor")) { + kokkosRefactor = groupList.get("use kokkos refactor"); + groupList.remove("use kokkos refactor"); + setKokkosRefactor = true; + } - // do not store groupFactoryMap in factoryMapOut - // Create a factory manager object from groupFactoryMap - RCP m = rcp(new FactoryManager(groupFactoryMap)); - if (setKokkosRefactor) - m->SetKokkosRefactor(kokkosRefactor); - factoryManagers[paramName] = m; + FactoryMap groupFactoryMap; + BuildFactoryMap(groupList, factoryMapIn, groupFactoryMap, factoryManagers); + + // do not store groupFactoryMap in factoryMapOut + // Create a factory manager object from groupFactoryMap + RCP m = rcp(new FactoryManager(groupFactoryMap)); + if (setKokkosRefactor) + m->SetKokkosRefactor(kokkosRefactor); + factoryManagers[paramName] = m; - } else { - this->GetOStream(Warnings0) << "Could not interpret parameter list " << paramList1 << std::endl; - TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, - "XML Parameter list must either be of type \"factory\" or of type \"group\"."); - } } else { - // default: just a factory (no parameter list) - factoryMapOut[paramName] = factFact_->BuildFactory(paramValue, factoryMapIn, factoryManagers); + this->GetOStream(Warnings0) << "Could not interpret parameter list " << paramList1 << std::endl; + TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, + "XML Parameter list must either be of type \"factory\" or of type \"group\"."); } + } else { + // default: just a factory (no parameter list) + factoryMapOut[paramName] = factFact_->BuildFactory(paramValue, factoryMapIn, factoryManagers); } } - - // ===================================================================================================== - // ======================================= MISC functions ============================================== - // ===================================================================================================== - template - void ParameterListInterpreter::SetupOperator(Operator& Op) const { - try { - Matrix& A = dynamic_cast(Op); - if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blockSize_)) - this->GetOStream(Warnings0) << "Setting matrix block size to " << blockSize_ << " (value of the parameter in the list) " - << "instead of " << A.GetFixedBlockSize() << " (provided matrix)." << std::endl - << "You may want to check \"number of equations\" (or \"PDE equations\" for factory style list) parameter." << std::endl; - - A.SetFixedBlockSize(blockSize_, dofOffset_); +} + +// ===================================================================================================== +// ======================================= MISC functions ============================================== +// ===================================================================================================== +template +void ParameterListInterpreter::SetupOperator(Operator& Op) const { + try { + Matrix& A = dynamic_cast(Op); + if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blockSize_)) + this->GetOStream(Warnings0) << "Setting matrix block size to " << blockSize_ << " (value of the parameter in the list) " + << "instead of " << A.GetFixedBlockSize() << " (provided matrix)." << std::endl + << "You may want to check \"number of equations\" (or \"PDE equations\" for factory style list) parameter." << std::endl; + + A.SetFixedBlockSize(blockSize_, dofOffset_); #ifdef HAVE_MUELU_DEBUG - MatrixUtils::checkLocalRowMapMatchesColMap(A); -#endif // HAVE_MUELU_DEBUG - - } catch (std::bad_cast&) { - this->GetOStream(Warnings0) << "Skipping setting block size as the operator is not a matrix" << std::endl; - } - } + MatrixUtils::checkLocalRowMapMatchesColMap(A); +#endif // HAVE_MUELU_DEBUG - template - void ParameterListInterpreter::SetupHierarchy(Hierarchy& H) const { - H.SetCycle(Cycle_); - H.SetCycleStartLevel(WCycleStartLevel_); - H.SetProlongatorScalingFactor(scalingFactor_); - HierarchyManager::SetupHierarchy(H); + } catch (std::bad_cast&) { + this->GetOStream(Warnings0) << "Skipping setting block size as the operator is not a matrix" << std::endl; } - - static bool compare(const ParameterList& list1, const ParameterList& list2) { - // First loop through and validate the parameters at this level. - // In addition, we generate a list of sublists that we will search next - for (ParameterList::ConstIterator it = list1.begin(); it != list1.end(); it++) { - const std::string& name = it->first; - const Teuchos::ParameterEntry& entry1 = it->second; - - const Teuchos::ParameterEntry *entry2 = list2.getEntryPtr(name); - if (!entry2) // entry is not present in the second list - return false; - if (entry1.isList() && entry2->isList()) { // sublist check - compare(Teuchos::getValue(entry1), Teuchos::getValue(*entry2)); - continue; - } - if (entry1.getAny(false) != entry2->getAny(false)) // entries have different types or different values - return false; +} + +template +void ParameterListInterpreter::SetupHierarchy(Hierarchy& H) const { + H.SetCycle(Cycle_); + H.SetCycleStartLevel(WCycleStartLevel_); + H.SetProlongatorScalingFactor(scalingFactor_); + HierarchyManager::SetupHierarchy(H); +} + +static bool compare(const ParameterList& list1, const ParameterList& list2) { + // First loop through and validate the parameters at this level. + // In addition, we generate a list of sublists that we will search next + for (ParameterList::ConstIterator it = list1.begin(); it != list1.end(); it++) { + const std::string& name = it->first; + const Teuchos::ParameterEntry& entry1 = it->second; + + const Teuchos::ParameterEntry* entry2 = list2.getEntryPtr(name); + if (!entry2) // entry is not present in the second list + return false; + if (entry1.isList() && entry2->isList()) { // sublist check + compare(Teuchos::getValue(entry1), Teuchos::getValue(*entry2)); + continue; } - - return true; + if (entry1.getAny(false) != entry2->getAny(false)) // entries have different types or different values + return false; } - static inline bool areSame(const ParameterList& list1, const ParameterList& list2) { - return compare(list1, list2) && compare(list2, list1); - } + return true; +} + +static inline bool areSame(const ParameterList& list1, const ParameterList& list2) { + return compare(list1, list2) && compare(list2, list1); +} -} // namespace MueLu +} // namespace MueLu #define MUELU_PARAMETERLISTINTERPRETER_SHORT #endif /* MUELU_PARAMETERLISTINTERPRETER_DEF_HPP */ diff --git a/packages/muelu/src/Interface/MueLu_ParameterListUtils.cpp b/packages/muelu/src/Interface/MueLu_ParameterListUtils.cpp index 051a2dfd622e..6a0b847ca311 100644 --- a/packages/muelu/src/Interface/MueLu_ParameterListUtils.cpp +++ b/packages/muelu/src/Interface/MueLu_ParameterListUtils.cpp @@ -49,137 +49,133 @@ namespace MueLu { - /* See also: ML_Epetra::UpdateList */ - //! @brief: merge two parameter lists - //! - //! @param source [in]: parameter lists with source parameters which are to be merged in into the dest parameter list - //! @param dest [in,out]: parameter list with, e.g., default parameters which is extended by parameters from source parameter list - //! @param overWrite (bool): if true, overwrite parameters in dest with entries from source - void MergeParameterList(const Teuchos::ParameterList &source, Teuchos::ParameterList &dest, bool overWrite){ - for(Teuchos::ParameterList::ConstIterator param=source.begin(); param!=source.end(); ++param) - if (dest.isParameter(source.name(param)) == false || overWrite) - dest.setEntry(source.name(param),source.entry(param)); - } - - void CreateSublists(const Teuchos::ParameterList &List, Teuchos::ParameterList &newList) - { - using Teuchos::ParameterList; - using std::string; +/* See also: ML_Epetra::UpdateList */ +//! @brief: merge two parameter lists +//! +//! @param source [in]: parameter lists with source parameters which are to be merged in into the dest parameter list +//! @param dest [in,out]: parameter list with, e.g., default parameters which is extended by parameters from source parameter list +//! @param overWrite (bool): if true, overwrite parameters in dest with entries from source +void MergeParameterList(const Teuchos::ParameterList &source, Teuchos::ParameterList &dest, bool overWrite) { + for (Teuchos::ParameterList::ConstIterator param = source.begin(); param != source.end(); ++param) + if (dest.isParameter(source.name(param)) == false || overWrite) + dest.setEntry(source.name(param), source.entry(param)); +} + +void CreateSublists(const Teuchos::ParameterList &List, Teuchos::ParameterList &newList) { + using std::string; + using Teuchos::ParameterList; + + newList.setName(List.name()); + + // Copy general (= not level-specific) options and sublists to the new list. + // - Coarse and level-specific parameters are not copied yet. They will be moved to sublists later. + // - Already existing level-specific lists are copied to the new list but the coarse list is not copied + // yet because it has to be modified before copy (s/coarse/smoother/) + for (ParameterList::ConstIterator param = List.begin(); param != List.end(); ++param) { + const string &pname = List.name(param); + + if ((pname.find(" (level", 0) == string::npos || pname.find("smoother: list (level", 0) == 0 || pname.find("aggregation: list (level", 0) == 0) && + (pname.find("coarse: ", 0) == string::npos)) { + newList.setEntry(pname, List.entry(param)); + } + } // for + + // Copy of the sublist "coarse: list" to the new list. Change "coarse:" to "smoother:" along the way. + if (List.isSublist("coarse: list")) { + const ParameterList &coarseList = List.sublist("coarse: list"); + ParameterList &newCoarseList = newList.sublist("coarse: list"); + for (ParameterList::ConstIterator param = coarseList.begin(); param != coarseList.end(); ++param) { + const string &pname = coarseList.name(param); + + if (pname.find("coarse:", 0) == 0) { + // change "coarse: " to "smoother:" + newCoarseList.setEntry("smoother: " + pname.substr(8), coarseList.entry(param)); + } else { + newCoarseList.setEntry(pname, coarseList.entry(param)); + } + } + } // if - newList.setName(List.name()); + // Copy of level-specific parameters and coarse parameters to sublist + for (ParameterList::ConstIterator param = List.begin(); param != List.end(); ++param) { + const string &pname = List.name(param); + if (pname.find(" (level", 0) != string::npos && pname.find("smoother: list (level", 0) != 0 && pname.find("aggregation: list (level", 0) != 0) { + // Copy level-specific parameters (smoother and aggregation) - // Copy general (= not level-specific) options and sublists to the new list. - // - Coarse and level-specific parameters are not copied yet. They will be moved to sublists later. - // - Already existing level-specific lists are copied to the new list but the coarse list is not copied - // yet because it has to be modified before copy (s/coarse/smoother/) - for (ParameterList::ConstIterator param=List.begin(); param!=List.end(); ++param) + // Scan pname (ex: pname="smoother: type (level 2)") + string type, option; + int levelID = -1; { - const string & pname=List.name(param); - - if ((pname.find(" (level",0) == string::npos || pname.find("smoother: list (level",0) == 0 || pname.find("aggregation: list (level",0) == 0) && - (pname.find("coarse: ",0) == string::npos)) - { - newList.setEntry(pname,List.entry(param)); - } - } // for - - // Copy of the sublist "coarse: list" to the new list. Change "coarse:" to "smoother:" along the way. - if (List.isSublist("coarse: list")) { - const ParameterList &coarseList = List.sublist("coarse: list"); - ParameterList &newCoarseList = newList.sublist("coarse: list"); - for (ParameterList::ConstIterator param=coarseList.begin(); param!=coarseList.end() ; ++param) { - const string & pname=coarseList.name(param); - - if (pname.find("coarse:",0) == 0) { - // change "coarse: " to "smoother:" - newCoarseList.setEntry("smoother: "+pname.substr(8),coarseList.entry(param)); - } else { - newCoarseList.setEntry(pname,coarseList.entry(param)); + typedef Teuchos::ArrayRCP::size_type size_type; // (!) + Teuchos::Array ctype(size_type(pname.size() + 1)); + Teuchos::Array coption(size_type(pname.size() + 1)); + + int matched = sscanf(pname.c_str(), "%s %[^(](level %d)", ctype.getRawPtr(), coption.getRawPtr(), &levelID); // use [^(] instead of %s to allow for strings with white-spaces (ex: "ifpack list") + type = string(ctype.getRawPtr()); + option = string(coption.getRawPtr()); + option.resize(option.size() - 1); // remove final white-space + + if (matched != 3 || (type != "smoother:" && type != "aggregation:")) { + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "MueLu::CreateSublist(), Line " << __LINE__ << ". " + << "Error in creating level-specific sublists" << std::endl + << "Offending parameter: " << pname << std::endl); } } - } // if - // Copy of level-specific parameters and coarse parameters to sublist - for (ParameterList::ConstIterator param=List.begin(); param!=List.end(); ++param) - { - const string & pname=List.name(param); - if (pname.find(" (level",0) != string::npos && pname.find("smoother: list (level",0) != 0 && pname.find("aggregation: list (level",0) != 0) - { - // Copy level-specific parameters (smoother and aggregation) - - // Scan pname (ex: pname="smoother: type (level 2)") - string type, option; - int levelID=-1; - { - typedef Teuchos::ArrayRCP::size_type size_type; // (!) - Teuchos::Array ctype (size_type(pname.size()+1)); - Teuchos::Array coption(size_type(pname.size()+1)); - - int matched = sscanf(pname.c_str(),"%s %[^(](level %d)", ctype.getRawPtr(), coption.getRawPtr(), &levelID); // use [^(] instead of %s to allow for strings with white-spaces (ex: "ifpack list") - type = string(ctype.getRawPtr()); - option = string(coption.getRawPtr()); option.resize(option.size () - 1); // remove final white-space - - if (matched != 3 || (type != "smoother:" && type != "aggregation:")) { - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "MueLu::CreateSublist(), Line " << __LINE__ << ". " - << "Error in creating level-specific sublists" << std::endl - << "Offending parameter: " << pname << std::endl); - } - } - - // Create/grab the corresponding sublist of newList - ParameterList &newSubList = newList.sublist(type + " list (level " + Teuchos::toString(levelID) + ")"); - // Shove option w/o level number into sublist - newSubList.setEntry(type + " " + option,List.entry(param)); - - } else if (pname.find("coarse:",0) == 0 && pname != "coarse: list") { - // Copy coarse parameters - ParameterList &newCoarseList = newList.sublist("coarse: list"); // the coarse sublist is created only if there is at least one "coarse:" parameter - newCoarseList.setEntry("smoother: "+pname.substr(8),List.entry(param)); // change "coarse: " to "smoother:" - } // end if - - } // for - - } //MueLu::CreateSublist() - - // Usage: GetMLSubList(paramList, "smoother", 2); - const Teuchos::ParameterList & GetMLSubList(const Teuchos::ParameterList & paramList, const std::string & type, int levelID) { - static const Teuchos::ParameterList emptyParamList; - - char levelChar[11]; - sprintf(levelChar, "(level %d)", levelID); - std::string levelStr(levelChar); - - if (paramList.isSublist(type + ": list " + levelStr)) { - return paramList.sublist(type + ": list " + levelStr); - } else { - return emptyParamList; - } - } + // Create/grab the corresponding sublist of newList + ParameterList &newSubList = newList.sublist(type + " list (level " + Teuchos::toString(levelID) + ")"); + // Shove option w/o level number into sublist + newSubList.setEntry(type + " " + option, List.entry(param)); - // Extract all the parameters that begin with "str:" (but skip sublist) - Teuchos::RCP ExtractSetOfParameters(const Teuchos::ParameterList & paramList, const std::string & str) { - Teuchos::RCP subList = rcp(new Teuchos::ParameterList()); + } else if (pname.find("coarse:", 0) == 0 && pname != "coarse: list") { + // Copy coarse parameters + ParameterList &newCoarseList = newList.sublist("coarse: list"); // the coarse sublist is created only if there is at least one "coarse:" parameter + newCoarseList.setEntry("smoother: " + pname.substr(8), List.entry(param)); // change "coarse: " to "smoother:" + } // end if - for (Teuchos::ParameterList::ConstIterator param = paramList.begin(); param != paramList.end(); ++param) { - const Teuchos::ParameterEntry & entry = paramList.entry(param); - const std::string & pname = paramList.name(param); - if (pname.find(str+":",0) == 0 && !entry.isList()) { - subList->setEntry(pname,entry); - } - } + } // for + +} // MueLu::CreateSublist() + +// Usage: GetMLSubList(paramList, "smoother", 2); +const Teuchos::ParameterList &GetMLSubList(const Teuchos::ParameterList ¶mList, const std::string &type, int levelID) { + static const Teuchos::ParameterList emptyParamList; - return subList; + char levelChar[11]; + sprintf(levelChar, "(level %d)", levelID); + std::string levelStr(levelChar); + + if (paramList.isSublist(type + ": list " + levelStr)) { + return paramList.sublist(type + ": list " + levelStr); + } else { + return emptyParamList; } +} + +// Extract all the parameters that begin with "str:" (but skip sublist) +Teuchos::RCP ExtractSetOfParameters(const Teuchos::ParameterList ¶mList, const std::string &str) { + Teuchos::RCP subList = rcp(new Teuchos::ParameterList()); - // replace all string occurrences "from" with "to" in "str" - void replaceAll(std::string& str, const std::string& from, const std::string& to) { - if(from.empty()) - return; - size_t start_pos = 0; - while((start_pos = str.find(from, start_pos)) != std::string::npos) { - str.replace(start_pos, from.length(), to); - start_pos += to.length(); // In case 'to' contains 'from', like replacing 'x' with 'yx' + for (Teuchos::ParameterList::ConstIterator param = paramList.begin(); param != paramList.end(); ++param) { + const Teuchos::ParameterEntry &entry = paramList.entry(param); + const std::string &pname = paramList.name(param); + if (pname.find(str + ":", 0) == 0 && !entry.isList()) { + subList->setEntry(pname, entry); } } -} // namespace MueLu + return subList; +} + +// replace all string occurrences "from" with "to" in "str" +void replaceAll(std::string &str, const std::string &from, const std::string &to) { + if (from.empty()) + return; + size_t start_pos = 0; + while ((start_pos = str.find(from, start_pos)) != std::string::npos) { + str.replace(start_pos, from.length(), to); + start_pos += to.length(); // In case 'to' contains 'from', like replacing 'x' with 'yx' + } +} + +} // namespace MueLu diff --git a/packages/muelu/src/Interface/MueLu_ParameterListUtils.hpp b/packages/muelu/src/Interface/MueLu_ParameterListUtils.hpp index 9d55b472ad02..e99129d3305d 100644 --- a/packages/muelu/src/Interface/MueLu_ParameterListUtils.hpp +++ b/packages/muelu/src/Interface/MueLu_ParameterListUtils.hpp @@ -56,47 +56,46 @@ namespace MueLu { - /* See also: ML_Epetra::UpdateList */ - void MergeParameterList(const Teuchos::ParameterList &source, Teuchos::ParameterList &dest, bool overWrite); +/* See also: ML_Epetra::UpdateList */ +void MergeParameterList(const Teuchos::ParameterList& source, Teuchos::ParameterList& dest, bool overWrite); - void CreateSublists(const Teuchos::ParameterList &List, Teuchos::ParameterList &newList); +void CreateSublists(const Teuchos::ParameterList& List, Teuchos::ParameterList& newList); - // Usage: GetMLSubList(paramList, "smoother", 2); - const Teuchos::ParameterList & GetMLSubList(const Teuchos::ParameterList & paramList, const std::string & type, int levelID); +// Usage: GetMLSubList(paramList, "smoother", 2); +const Teuchos::ParameterList& GetMLSubList(const Teuchos::ParameterList& paramList, const std::string& type, int levelID); - // Extract all the parameters that begin with "str:" (but skip sublist) - Teuchos::RCP ExtractSetOfParameters(const Teuchos::ParameterList & paramList, const std::string & str); +// Extract all the parameters that begin with "str:" (but skip sublist) +Teuchos::RCP ExtractSetOfParameters(const Teuchos::ParameterList& paramList, const std::string& str); - //! replace all string occurrences "from" with "to" in "str" - //! - //! @param str: input and output string - //! @param from: search string - //! @param to: replace with "to" - void replaceAll(std::string& str, const std::string& from, const std::string& to); +//! replace all string occurrences "from" with "to" in "str" +//! +//! @param str: input and output string +//! @param from: search string +//! @param to: replace with "to" +void replaceAll(std::string& str, const std::string& from, const std::string& to); - //! templated version to replace placeholder by data in "str" - template - bool replacePlaceholder(std::string& str, const std::string& placeholder, Type data) { - std::stringstream s; - s << data; - replaceAll(str, placeholder, s.str()); - return true; - } +//! templated version to replace placeholder by data in "str" +template +bool replacePlaceholder(std::string& str, const std::string& placeholder, Type data) { + std::stringstream s; + s << data; + replaceAll(str, placeholder, s.str()); + return true; +} - template - bool actionInterpretParameter(Teuchos::ParameterList& mlParams, const std::string& paramName, std::string& str) { +template +bool actionInterpretParameter(Teuchos::ParameterList& mlParams, const std::string& paramName, std::string& str) { + // MUELU_READ_PARAM(mlParams, paramName, int, 0, data); - //MUELU_READ_PARAM(mlParams, paramName, int, 0, data); + Type varName; // = defaultValue; // extract from master list + if (mlParams.isParameter(paramName)) varName = mlParams.get(paramName); - Type varName; // = defaultValue; // extract from master list - if (mlParams.isParameter(paramName)) varName = mlParams.get(paramName); + std::stringstream placeholder; + placeholder << "$" << paramName << "$"; - std::stringstream placeholder; - placeholder << "$" << paramName << "$"; + return MueLu::replacePlaceholder(str, placeholder.str(), varName); +} - return MueLu::replacePlaceholder(str, placeholder.str(), varName); - } +} // namespace MueLu -} // namespace MueLu - -#endif // MUELU_PARAMETERLISTUTILS_HPP +#endif // MUELU_PARAMETERLISTUTILS_HPP diff --git a/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_decl.hpp index a41ee8b9ae26..a126e4f3ec08 100644 --- a/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_decl.hpp @@ -57,103 +57,97 @@ #include #include - #include "MueLu_Aggregates_fwd.hpp" #include "MueLu_Level_fwd.hpp" namespace MueLu { - /*! - @class AggregateQualityEstimateFactory class. - @brief An factory which assigns each aggregate a quality - estimate. Originally developed by Napov and Notay in the - context of plain aggregation, while this quality estimate - does not correspond to a robust convergence guarentee (as - it does for plain aggregation), we find empirically that - it is a good way of discovering poorly constructed aggregates - even in the smoothed aggregation context. - - Napov, A., & Notay, Y. (2012). An algebraic multigrid method - with guaranteed convergence rate. SIAM journal on scientific - computing, 34(2), A1079-A1109. - */ - - template - class AggregateQualityEstimateFactory : public SingleLevelFactoryBase { +/*! + @class AggregateQualityEstimateFactory class. + @brief An factory which assigns each aggregate a quality + estimate. Originally developed by Napov and Notay in the + context of plain aggregation, while this quality estimate + does not correspond to a robust convergence guarentee (as + it does for plain aggregation), we find empirically that + it is a good way of discovering poorly constructed aggregates + even in the smoothed aggregation context. + + Napov, A., & Notay, Y. (2012). An algebraic multigrid method + with guaranteed convergence rate. SIAM journal on scientific + computing, 34(2), A1079-A1109. +*/ + +template +class AggregateQualityEstimateFactory : public SingleLevelFactoryBase { #undef MUELU_AGGREGATEQUALITYESTIMATEFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - typedef typename Teuchos::ScalarTraits::magnitudeType magnitudeType; + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregateQualityEstimateFactory(); + typedef typename Teuchos::ScalarTraits::magnitudeType magnitudeType; - //! Destructor. - virtual ~AggregateQualityEstimateFactory(); + //! Constructor. + AggregateQualityEstimateFactory(); - //@} + //! Destructor. + virtual ~AggregateQualityEstimateFactory(); - RCP GetValidParameterList() const; + //@} - //! @name Input - //@{ + RCP GetValidParameterList() const; - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + //! @name Input + //@{ - If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class - will fall back to the settings in FactoryManager. - */ - void DeclareInput(Level ¤tLevel) const; + /*! @brief Specifies the data that this class needs, and the factories that generate that data. - //@} - - //! @name Build methods. - //@{ - - //! Build aggregate quality esimates with this factory. - void Build(Level & currentLevel) const; - - //@} + If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class + will fall back to the settings in FactoryManager. + */ + void DeclareInput(Level& currentLevel) const; - //! @name Utility method to convert aggregate data to a convenient format. - //@{ + //@} - //! Build aggregate quality esimates with this factory. - static void ConvertAggregatesData(RCP aggs, ArrayRCP& aggSortedVertices, ArrayRCP& aggsToIndices, ArrayRCP& aggSizes); + //! @name Build methods. + //@{ - //@} + //! Build aggregate quality esimates with this factory. + void Build(Level& currentLevel) const; - private: + //@} - //! @name Internal method for computing aggregate quality. - //@{ + //! @name Utility method to convert aggregate data to a convenient format. + //@{ - void ComputeAggregateQualities(RCP A, RCP aggs, RCP> agg_qualities) const; + //! Build aggregate quality esimates with this factory. + static void ConvertAggregatesData(RCP aggs, ArrayRCP& aggSortedVertices, ArrayRCP& aggsToIndices, ArrayRCP& aggSizes); - void ComputeAggregateSizes(RCP A, RCP aggs, RCP agg_sizes) const; + //@} - //@} + private: + //! @name Internal method for computing aggregate quality. + //@{ - //! @name Internal method for outputting aggregate quality - //@{ + void ComputeAggregateQualities(RCP A, RCP aggs, RCP> agg_qualities) const; - void OutputAggQualities(const Level& level, RCP> agg_qualities) const; + void ComputeAggregateSizes(RCP A, RCP aggs, RCP agg_sizes) const; - - void OutputAggSizes(const Level& level, RCP agg_sizes) const; + //@} + //! @name Internal method for outputting aggregate quality + //@{ + void OutputAggQualities(const Level& level, RCP> agg_qualities) const; - //@} + void OutputAggSizes(const Level& level, RCP agg_sizes) const; + //@} - }; // class AggregateQualityEsimateFactory(); +}; // class AggregateQualityEsimateFactory(); -} // namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATEQUALITYESTIMATEFACTORY_SHORT -#endif // MUELU_DEMOFACTORY_DECL_HPP +#endif // MUELU_DEMOFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_def.hpp b/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_def.hpp index 44b760ea9ee0..e1117f1b6033 100644 --- a/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_def.hpp @@ -63,481 +63,448 @@ namespace MueLu { - template - AggregateQualityEstimateFactory::AggregateQualityEstimateFactory() - { } - - template - AggregateQualityEstimateFactory::~AggregateQualityEstimateFactory() {} - - template - void AggregateQualityEstimateFactory::DeclareInput(Level& currentLevel) const { +template +AggregateQualityEstimateFactory::AggregateQualityEstimateFactory() {} - Input(currentLevel, "A"); - Input(currentLevel, "Aggregates"); - Input(currentLevel, "CoarseMap"); +template +AggregateQualityEstimateFactory::~AggregateQualityEstimateFactory() {} - } +template +void AggregateQualityEstimateFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + Input(currentLevel, "Aggregates"); + Input(currentLevel, "CoarseMap"); +} - template - RCP AggregateQualityEstimateFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP AggregateQualityEstimateFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregate qualities: good aggregate threshold"); - SET_VALID_ENTRY("aggregate qualities: file output"); - SET_VALID_ENTRY("aggregate qualities: file base"); - SET_VALID_ENTRY("aggregate qualities: check symmetry"); - SET_VALID_ENTRY("aggregate qualities: algorithm"); - SET_VALID_ENTRY("aggregate qualities: zero threshold"); - SET_VALID_ENTRY("aggregate qualities: percentiles"); - SET_VALID_ENTRY("aggregate qualities: mode"); - -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< RCP >("Aggregates", Teuchos::null, "Generating factory of the aggregates"); - validParamList->set< RCP >("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); - - return validParamList; - } + SET_VALID_ENTRY("aggregate qualities: good aggregate threshold"); + SET_VALID_ENTRY("aggregate qualities: file output"); + SET_VALID_ENTRY("aggregate qualities: file base"); + SET_VALID_ENTRY("aggregate qualities: check symmetry"); + SET_VALID_ENTRY("aggregate qualities: algorithm"); + SET_VALID_ENTRY("aggregate qualities: zero threshold"); + SET_VALID_ENTRY("aggregate qualities: percentiles"); + SET_VALID_ENTRY("aggregate qualities: mode"); + +#undef SET_VALID_ENTRY + + validParamList->set>("A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set>("Aggregates", Teuchos::null, "Generating factory of the aggregates"); + validParamList->set>("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); + + return validParamList; +} +template +void AggregateQualityEstimateFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); - template - void AggregateQualityEstimateFactory::Build(Level & currentLevel) const { + RCP A = Get>(currentLevel, "A"); + RCP aggregates = Get>(currentLevel, "Aggregates"); - FactoryMonitor m(*this, "Build", currentLevel); + RCP map = Get>(currentLevel, "CoarseMap"); - RCP A = Get>(currentLevel, "A"); - RCP aggregates = Get>(currentLevel, "Aggregates"); + assert(!aggregates->AggregatesCrossProcessors()); + ParameterList pL = GetParameterList(); + std::string mode = pL.get("aggregate qualities: mode"); + GetOStream(Statistics1) << "AggregateQuality: mode " << mode << std::endl; - RCP map = Get< RCP >(currentLevel, "CoarseMap"); + RCP> aggregate_qualities; + if (mode == "eigenvalue" || mode == "both") { + aggregate_qualities = Xpetra::MultiVectorFactory::Build(map, 1); + ComputeAggregateQualities(A, aggregates, aggregate_qualities); + OutputAggQualities(currentLevel, aggregate_qualities); + } + if (mode == "size" || mode == "both") { + RCP aggregate_sizes = Xpetra::VectorFactory::Build(map); + ComputeAggregateSizes(A, aggregates, aggregate_sizes); + Set(currentLevel, "AggregateSizes", aggregate_sizes); + OutputAggSizes(currentLevel, aggregate_sizes); + } + Set(currentLevel, "AggregateQualities", aggregate_qualities); +} +template +void AggregateQualityEstimateFactory::ConvertAggregatesData(RCP aggs, ArrayRCP& aggSortedVertices, ArrayRCP& aggsToIndices, ArrayRCP& aggSizes) { + // Reorder local aggregate information into a format amenable to computing + // per-aggregate quantities. Specifically, we compute a format + // similar to compressed sparse row format for sparse matrices in which + // we store all the local vertices in a single array in blocks corresponding + // to aggregates. (This array is aggSortedVertices.) We then store a second + // array (aggsToIndices) whose k-th element stores the index of the first + // vertex in aggregate k in the array aggSortedVertices. - assert(!aggregates->AggregatesCrossProcessors()); - ParameterList pL = GetParameterList(); - std::string mode = pL.get("aggregate qualities: mode"); - GetOStream(Statistics1) << "AggregateQuality: mode "<::zero(); + const LO LO_ONE = Teuchos::OrdinalTraits::one(); - RCP> aggregate_qualities; - if(mode == "eigenvalue" || mode == "both") { - aggregate_qualities = Xpetra::MultiVectorFactory::Build(map, 1); - ComputeAggregateQualities(A, aggregates, aggregate_qualities); - OutputAggQualities(currentLevel, aggregate_qualities); - } - if(mode == "size" || mode =="both") { - RCP aggregate_sizes = Xpetra::VectorFactory::Build(map); - ComputeAggregateSizes(A,aggregates,aggregate_sizes); - Set(currentLevel, "AggregateSizes",aggregate_sizes); - OutputAggSizes(currentLevel, aggregate_sizes); - } - Set(currentLevel, "AggregateQualities", aggregate_qualities); + LO numAggs = aggs->GetNumAggregates(); + aggSizes = aggs->ComputeAggregateSizesArrayRCP(); + aggsToIndices = ArrayRCP(numAggs + LO_ONE, LO_ZERO); + for (LO i = 0; i < numAggs; ++i) { + aggsToIndices[i + LO_ONE] = aggsToIndices[i] + aggSizes[i]; } - template - void AggregateQualityEstimateFactory::ConvertAggregatesData(RCP aggs, ArrayRCP& aggSortedVertices, ArrayRCP& aggsToIndices, ArrayRCP& aggSizes) { + const RCP vertex2AggId = aggs->GetVertex2AggId(); + const ArrayRCP vertex2AggIdData = vertex2AggId->getData(0); - // Reorder local aggregate information into a format amenable to computing - // per-aggregate quantities. Specifically, we compute a format - // similar to compressed sparse row format for sparse matrices in which - // we store all the local vertices in a single array in blocks corresponding - // to aggregates. (This array is aggSortedVertices.) We then store a second - // array (aggsToIndices) whose k-th element stores the index of the first - // vertex in aggregate k in the array aggSortedVertices. + LO numNodes = vertex2AggId->getLocalLength(); + aggSortedVertices = ArrayRCP(numNodes, -LO_ONE); + std::vector vertexInsertionIndexByAgg(numNodes, LO_ZERO); - const LO LO_ZERO = Teuchos::OrdinalTraits::zero(); - const LO LO_ONE = Teuchos::OrdinalTraits::one(); + for (LO i = 0; i < numNodes; ++i) { + LO aggId = vertex2AggIdData[i]; + if (aggId < 0 || aggId >= numAggs) continue; - LO numAggs = aggs->GetNumAggregates(); - aggSizes = aggs->ComputeAggregateSizesArrayRCP(); + aggSortedVertices[aggsToIndices[aggId] + vertexInsertionIndexByAgg[aggId]] = i; + vertexInsertionIndexByAgg[aggId]++; + } +} - aggsToIndices = ArrayRCP(numAggs+LO_ONE,LO_ZERO); +template +void AggregateQualityEstimateFactory::ComputeAggregateQualities(RCP A, RCP aggs, RCP> agg_qualities) const { + const SC SCALAR_ONE = Teuchos::ScalarTraits::one(); + const SC SCALAR_TWO = SCALAR_ONE + SCALAR_ONE; + + const LO LO_ZERO = Teuchos::OrdinalTraits::zero(); + const LO LO_ONE = Teuchos::OrdinalTraits::one(); + + using MT = magnitudeType; + const MT MT_ZERO = Teuchos::ScalarTraits::zero(); + const MT MT_ONE = Teuchos::ScalarTraits::one(); + ParameterList pL = GetParameterList(); + + RCP AT = A; + + // Algorithm check + std::string algostr = pL.get("aggregate qualities: algorithm"); + MT zeroThreshold = Teuchos::as(pL.get("aggregate qualities: zero threshold")); + enum AggAlgo { ALG_FORWARD = 0, + ALG_REVERSE }; + AggAlgo algo; + if (algostr == "forward") { + algo = ALG_FORWARD; + GetOStream(Statistics1) << "AggregateQuality: Using 'forward' algorithm" << std::endl; + } else if (algostr == "reverse") { + algo = ALG_REVERSE; + GetOStream(Statistics1) << "AggregateQuality: Using 'reverse' algorithm" << std::endl; + } else { + TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError, "\"algorithm\" must be one of (forward|reverse)"); + } - for (LO i=0;i("aggregate qualities: check symmetry"); + if (check_symmetry) { + RCP x = MultiVectorFactory::Build(A->getMap(), 1, false); + x->Xpetra_randomize(); - const RCP vertex2AggId = aggs->GetVertex2AggId(); - const ArrayRCP vertex2AggIdData = vertex2AggId->getData(0); + RCP tmp = MultiVectorFactory::Build(A->getMap(), 1, false); - LO numNodes = vertex2AggId->getLocalLength(); - aggSortedVertices = ArrayRCP(numNodes,-LO_ONE); - std::vector vertexInsertionIndexByAgg(numNodes,LO_ZERO); + A->apply(*x, *tmp, Teuchos::NO_TRANS); // tmp now stores A*x + A->apply(*x, *tmp, Teuchos::TRANS, -SCALAR_ONE, SCALAR_ONE); // tmp now stores A*x - A^T*x - for (LO i=0;i tmp_norm(1); + tmp->norm2(tmp_norm()); - LO aggId = vertex2AggIdData[i]; - if (aggId<0 || aggId>=numAggs) continue; + Array x_norm(1); + tmp->norm2(x_norm()); - aggSortedVertices[aggsToIndices[aggId]+vertexInsertionIndexByAgg[aggId]] = i; - vertexInsertionIndexByAgg[aggId]++; + if (tmp_norm[0] > 1e-10 * x_norm[0]) { + std::string transpose_string = "transpose"; + RCP whatever; + AT = Utilities::Transpose(*rcp_const_cast(A), true, transpose_string, whatever); + assert(A->getMap()->isSameAs(*(AT->getMap()))); } - - } - template - void AggregateQualityEstimateFactory::ComputeAggregateQualities(RCP A, RCP aggs, RCP> agg_qualities) const { + // Reorder local aggregate information into a format amenable to computing + // per-aggregate quantities. Specifically, we compute a format + // similar to compressed sparse row format for sparse matrices in which + // we store all the local vertices in a single array in blocks corresponding + // to aggregates. (This array is aggSortedVertices.) We then store a second + // array (aggsToIndices) whose k-th element stores the index of the first + // vertex in aggregate k in the array aggSortedVertices. - const SC SCALAR_ONE = Teuchos::ScalarTraits::one(); - const SC SCALAR_TWO = SCALAR_ONE + SCALAR_ONE; + ArrayRCP aggSortedVertices, aggsToIndices, aggSizes; + ConvertAggregatesData(aggs, aggSortedVertices, aggsToIndices, aggSizes); - const LO LO_ZERO = Teuchos::OrdinalTraits::zero(); - const LO LO_ONE = Teuchos::OrdinalTraits::one(); + LO numAggs = aggs->GetNumAggregates(); - using MT = magnitudeType; - const MT MT_ZERO = Teuchos::ScalarTraits::zero(); - const MT MT_ONE = Teuchos::ScalarTraits::one(); - ParameterList pL = GetParameterList(); + // Compute the per-aggregate quality estimate - RCP AT = A; + typedef Teuchos::SerialDenseMatrix DenseMatrix; + typedef Teuchos::SerialDenseVector DenseVector; - // Algorithm check - std::string algostr = pL.get("aggregate qualities: algorithm"); - MT zeroThreshold = Teuchos::as(pL.get("aggregate qualities: zero threshold")); - enum AggAlgo {ALG_FORWARD=0, ALG_REVERSE}; - AggAlgo algo; - if(algostr == "forward") {algo = ALG_FORWARD; GetOStream(Statistics1) << "AggregateQuality: Using 'forward' algorithm" << std::endl;} - else if(algostr == "reverse") {algo = ALG_REVERSE; GetOStream(Statistics1) << "AggregateQuality: Using 'reverse' algorithm" << std::endl;} - else { - TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError, "\"algorithm\" must be one of (forward|reverse)"); - } + ArrayView rowIndices; + ArrayView rowValues; + ArrayView colValues; + Teuchos::LAPACK myLapack; - bool check_symmetry = pL.get("aggregate qualities: check symmetry"); - if (check_symmetry) { + // Iterate over each aggregate to compute the quality estimate + for (LO aggId = LO_ZERO; aggId < numAggs; ++aggId) { + LO aggSize = aggSizes[aggId]; + DenseMatrix A_aggPart(aggSize, aggSize, true); + DenseVector offDiagonalAbsoluteSums(aggSize, true); - RCP x = MultiVectorFactory::Build(A->getMap(), 1, false); - x->Xpetra_randomize(); - - RCP tmp = MultiVectorFactory::Build(A->getMap(), 1, false); - - A->apply(*x, *tmp, Teuchos::NO_TRANS); // tmp now stores A*x - A->apply(*x, *tmp, Teuchos::TRANS, -SCALAR_ONE, SCALAR_ONE); // tmp now stores A*x - A^T*x + // Iterate over each node in the aggregate + for (LO idx = LO_ZERO; idx < aggSize; ++idx) { + LO nodeId = aggSortedVertices[idx + aggsToIndices[aggId]]; + A->getLocalRowView(nodeId, rowIndices, rowValues); + AT->getLocalRowView(nodeId, rowIndices, colValues); + + // Iterate over each element in the row corresponding to the current node + for (LO elem = LO_ZERO; elem < rowIndices.size(); ++elem) { + LO nodeId2 = rowIndices[elem]; + SC val = (rowValues[elem] + colValues[elem]) / SCALAR_TWO; + + LO idxInAgg = -LO_ONE; // -1 if element is not in aggregate + + // Check whether the element belongs in the aggregate. If it does + // find, its index. Otherwise, add it's value to the off diagonal + // sums + for (LO idx2 = LO_ZERO; idx2 < aggSize; ++idx2) { + if (aggSortedVertices[idx2 + aggsToIndices[aggId]] == nodeId2) { + // Element does belong to aggregate + idxInAgg = idx2; + break; + } + } - Array tmp_norm(1); - tmp->norm2(tmp_norm()); + if (idxInAgg == -LO_ONE) { // Element does not belong to aggregate - Array x_norm(1); - tmp->norm2(x_norm()); + offDiagonalAbsoluteSums[idx] += Teuchos::ScalarTraits::magnitude(val); - if (tmp_norm[0] > 1e-10*x_norm[0]) { - std::string transpose_string = "transpose"; - RCP whatever; - AT = Utilities::Transpose(*rcp_const_cast(A), true, transpose_string, whatever); + } else { // Element does belong to aggregate - assert(A->getMap()->isSameAs( *(AT->getMap()) )); + A_aggPart(idx, idxInAgg) = Teuchos::ScalarTraits::real(val); + } } - } - // Reorder local aggregate information into a format amenable to computing - // per-aggregate quantities. Specifically, we compute a format - // similar to compressed sparse row format for sparse matrices in which - // we store all the local vertices in a single array in blocks corresponding - // to aggregates. (This array is aggSortedVertices.) We then store a second - // array (aggsToIndices) whose k-th element stores the index of the first - // vertex in aggregate k in the array aggSortedVertices. - - ArrayRCP aggSortedVertices, aggsToIndices, aggSizes; - ConvertAggregatesData(aggs, aggSortedVertices, aggsToIndices, aggSizes); - - LO numAggs = aggs->GetNumAggregates(); - - // Compute the per-aggregate quality estimate - - typedef Teuchos::SerialDenseMatrix DenseMatrix; - typedef Teuchos::SerialDenseVector DenseVector; - - ArrayView rowIndices; - ArrayView rowValues; - ArrayView colValues; - Teuchos::LAPACK myLapack; - - // Iterate over each aggregate to compute the quality estimate - for (LO aggId=LO_ZERO; aggIdgetLocalRowView(nodeId, rowIndices, rowValues); - AT->getLocalRowView(nodeId, rowIndices, colValues); - - // Iterate over each element in the row corresponding to the current node - for (LO elem=LO_ZERO; elem::real(A_aggPart(i, i)); + diag_sum += Teuchos::ScalarTraits::real(A_aggPart(i, i)); + } - if (idxInAgg == -LO_ONE) { // Element does not belong to aggregate + DenseMatrix ones(aggSize, aggSize, false); + ones.putScalar(MT_ONE); - offDiagonalAbsoluteSums[idx] += Teuchos::ScalarTraits::magnitude(val); + // Compute matrix on top of generalized Rayleigh quotient + // topMatrix = A_aggPartDiagonal - A_aggPartDiagonal*ones*A_aggPartDiagonal/diag_sum; + DenseMatrix tmp(aggSize, aggSize, false); + DenseMatrix topMatrix(A_aggPartDiagonal); - } else { // Element does belong to aggregate + tmp.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, MT_ONE, ones, A_aggPartDiagonal, MT_ZERO); + topMatrix.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -MT_ONE / diag_sum, A_aggPartDiagonal, tmp, MT_ONE); - A_aggPart(idx,idxInAgg) = Teuchos::ScalarTraits::real(val); + // Compute matrix on bottom of generalized Rayleigh quotient + DenseMatrix bottomMatrix(A_aggPart); + MT matrixNorm = A_aggPart.normInf(); - } + // Forward mode: Include a small perturbation to the bottom matrix to make it nonsingular + const MT boost = (algo == ALG_FORWARD) ? (-1e4 * Teuchos::ScalarTraits::eps() * matrixNorm) : MT_ZERO; - } + for (int i = 0; i < aggSize; ++i) { + bottomMatrix(i, i) -= offDiagonalAbsoluteSums(i) + boost; + } + // Compute generalized eigenvalues + LO sdim, info; + DenseVector alpha_real(aggSize, false); + DenseVector alpha_imag(aggSize, false); + DenseVector beta(aggSize, false); + + DenseVector workArray(14 * (aggSize + 1), false); + + LO(*ptr2func) + (MT*, MT*, MT*); + ptr2func = NULL; + LO* bwork = NULL; + MT* vl = NULL; + MT* vr = NULL; + + const char compute_flag = 'N'; + if (algo == ALG_FORWARD) { + // Forward: Solve the generalized eigenvalue problem as is + myLapack.GGES(compute_flag, compute_flag, compute_flag, ptr2func, aggSize, + topMatrix.values(), aggSize, bottomMatrix.values(), aggSize, &sdim, + alpha_real.values(), alpha_imag.values(), beta.values(), vl, aggSize, + vr, aggSize, workArray.values(), workArray.length(), bwork, + &info); + TEUCHOS_ASSERT(info == LO_ZERO); + + MT maxEigenVal = MT_ZERO; + for (int i = LO_ZERO; i < aggSize; ++i) { + // NOTE: In theory, the eigenvalues should be nearly real + // TEUCHOS_ASSERT(fabs(alpha_imag[i]) <= 1e-8*fabs(alpha_real[i])); // Eigenvalues should be nearly real + maxEigenVal = std::max(maxEigenVal, alpha_real[i] / beta[i]); } - // Construct a diagonal matrix consisting of the diagonal - // of A_aggPart - DenseMatrix A_aggPartDiagonal(aggSize, aggSize, true); - MT diag_sum = MT_ZERO; - for (int i=0;i::real(A_aggPart(i,i)); - diag_sum += Teuchos::ScalarTraits::real(A_aggPart(i,i)); + (agg_qualities->getDataNonConst(0))[aggId] = (MT_ONE + MT_ONE) * maxEigenVal; + } else { + // Reverse: Swap the top and bottom matrices for the generalized eigenvalue problem + // This is trickier, since we need to grab the smallest non-zero eigenvalue and invert it. + myLapack.GGES(compute_flag, compute_flag, compute_flag, ptr2func, aggSize, + bottomMatrix.values(), aggSize, topMatrix.values(), aggSize, &sdim, + alpha_real.values(), alpha_imag.values(), beta.values(), vl, aggSize, + vr, aggSize, workArray.values(), workArray.length(), bwork, + &info); + + TEUCHOS_ASSERT(info == LO_ZERO); + + MT minEigenVal = MT_ZERO; + + for (int i = LO_ZERO; i < aggSize; ++i) { + MT ev = alpha_real[i] / beta[i]; + if (ev > zeroThreshold) { + if (minEigenVal == MT_ZERO) + minEigenVal = ev; + else + minEigenVal = std::min(minEigenVal, ev); + } } + if (minEigenVal == MT_ZERO) + (agg_qualities->getDataNonConst(0))[aggId] = Teuchos::ScalarTraits::rmax(); + else + (agg_qualities->getDataNonConst(0))[aggId] = (MT_ONE + MT_ONE) / minEigenVal; + } + } // end aggId loop +} - DenseMatrix ones(aggSize, aggSize, false); - ones.putScalar(MT_ONE); - - // Compute matrix on top of generalized Rayleigh quotient - // topMatrix = A_aggPartDiagonal - A_aggPartDiagonal*ones*A_aggPartDiagonal/diag_sum; - DenseMatrix tmp(aggSize, aggSize, false); - DenseMatrix topMatrix(A_aggPartDiagonal); +template +void AggregateQualityEstimateFactory::OutputAggQualities(const Level& level, RCP> agg_qualities) const { + ParameterList pL = GetParameterList(); - tmp.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, MT_ONE, ones, A_aggPartDiagonal, MT_ZERO); - topMatrix.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -MT_ONE/diag_sum, A_aggPartDiagonal, tmp, MT_ONE); + magnitudeType good_agg_thresh = Teuchos::as(pL.get("aggregate qualities: good aggregate threshold")); + using MT = magnitudeType; - // Compute matrix on bottom of generalized Rayleigh quotient - DenseMatrix bottomMatrix(A_aggPart); - MT matrixNorm = A_aggPart.normInf(); + ArrayRCP data = agg_qualities->getData(0); - // Forward mode: Include a small perturbation to the bottom matrix to make it nonsingular - const MT boost = (algo == ALG_FORWARD) ? (-1e4*Teuchos::ScalarTraits::eps()*matrixNorm) : MT_ZERO; + LO num_bad_aggs = 0; + MT worst_agg = 0.0; - for (int i=0;igetDataNonConst(0))[aggId] = (MT_ONE+MT_ONE)*maxEigenVal; - } - else { - // Reverse: Swap the top and bottom matrices for the generalized eigenvalue problem - // This is trickier, since we need to grab the smallest non-zero eigenvalue and invert it. - myLapack.GGES(compute_flag,compute_flag,compute_flag,ptr2func,aggSize, - bottomMatrix.values(),aggSize,topMatrix.values(),aggSize,&sdim, - alpha_real.values(),alpha_imag.values(),beta.values(),vl,aggSize, - vr,aggSize,workArray.values(),workArray.length(),bwork, - &info); - - TEUCHOS_ASSERT(info == LO_ZERO); - - MT minEigenVal = MT_ZERO; - - for (int i=LO_ZERO;i zeroThreshold) { - if (minEigenVal == MT_ZERO) minEigenVal = ev; - else minEigenVal = std::min(minEigenVal,ev); - } - } - if(minEigenVal == MT_ZERO) (agg_qualities->getDataNonConst(0))[aggId] = Teuchos::ScalarTraits::rmax(); - else (agg_qualities->getDataNonConst(0))[aggId] = (MT_ONE+MT_ONE) / minEigenVal; - } - }//end aggId loop + for (size_t i = 0; i < agg_qualities->getLocalLength(); ++i) { + if (data[i] > good_agg_thresh) { + num_bad_aggs++; + mean_bad_agg += data[i]; + } else { + mean_good_agg += data[i]; + } + worst_agg = std::max(worst_agg, data[i]); } - template - void AggregateQualityEstimateFactory::OutputAggQualities(const Level& level, RCP> agg_qualities) const { - - ParameterList pL = GetParameterList(); + if (num_bad_aggs > 0) mean_bad_agg /= num_bad_aggs; + mean_good_agg /= agg_qualities->getLocalLength() - num_bad_aggs; - magnitudeType good_agg_thresh = Teuchos::as(pL.get("aggregate qualities: good aggregate threshold")); - using MT = magnitudeType; - - ArrayRCP data = agg_qualities->getData(0); - - LO num_bad_aggs = 0; - MT worst_agg = 0.0; + if (num_bad_aggs == 0) { + GetOStream(Statistics1) << "All aggregates passed the quality measure. Worst aggregate had quality " << worst_agg << ". Mean aggregate quality " << mean_good_agg << "." << std::endl; + } else { + GetOStream(Statistics1) << num_bad_aggs << " out of " << agg_qualities->getLocalLength() << " did not pass the quality measure. Worst aggregate had quality " << worst_agg << ". " + << "Mean bad aggregate quality " << mean_bad_agg << ". Mean good aggregate quality " << mean_good_agg << "." << std::endl; + } - MT mean_bad_agg = 0.0; - MT mean_good_agg = 0.0; + if (pL.get("aggregate qualities: file output")) { + std::string filename = pL.get("aggregate qualities: file base") + "." + std::to_string(level.GetLevelID()); + Xpetra::IO::Write(filename, *agg_qualities); + } + { + const auto n = size_t(agg_qualities->getLocalLength()); - for (size_t i=0;igetLocalLength();++i) { + std::vector tmp; + tmp.reserve(n); - if (data[i] > good_agg_thresh) { - num_bad_aggs++; - mean_bad_agg += data[i]; - } - else { - mean_good_agg += data[i]; - } - worst_agg = std::max(worst_agg, data[i]); + for (size_t i = 0; i < n; ++i) { + tmp.push_back(data[i]); } + std::sort(tmp.begin(), tmp.end()); - if (num_bad_aggs > 0) mean_bad_agg /= num_bad_aggs; - mean_good_agg /= agg_qualities->getLocalLength() - num_bad_aggs; + Teuchos::ArrayView percents = pL.get>("aggregate qualities: percentiles")(); - if (num_bad_aggs == 0) { - GetOStream(Statistics1) << "All aggregates passed the quality measure. Worst aggregate had quality " << worst_agg << ". Mean aggregate quality " << mean_good_agg << "." << std::endl; - } else { - GetOStream(Statistics1) << num_bad_aggs << " out of " << agg_qualities->getLocalLength() << " did not pass the quality measure. Worst aggregate had quality " << worst_agg << ". " - << "Mean bad aggregate quality " << mean_bad_agg << ". Mean good aggregate quality " << mean_good_agg << "." << std::endl; + GetOStream(Statistics1) << "AGG QUALITY HEADER : | LEVEL | TOTAL |"; + for (auto percent : percents) { + GetOStream(Statistics1) << std::fixed << std::setprecision(4) << 100.0 * percent << "% |"; } - - if (pL.get("aggregate qualities: file output")) { - std::string filename = pL.get("aggregate qualities: file base")+"."+std::to_string(level.GetLevelID()); - Xpetra::IO::Write(filename, *agg_qualities); - } - - { - const auto n = size_t(agg_qualities->getLocalLength()); - - std::vector tmp; - tmp.reserve(n); - - for (size_t i=0; i percents = pL.get >("aggregate qualities: percentiles")(); - - GetOStream(Statistics1) << "AGG QUALITY HEADER : | LEVEL | TOTAL |"; - for (auto percent : percents) { - GetOStream(Statistics1) << std::fixed << std::setprecision(4) <<100.0*percent << "% |"; - } - GetOStream(Statistics1) << std::endl; - - GetOStream(Statistics1) << "AGG QUALITY PERCENTILES: | " << level.GetLevelID() << " | " << n << "|"; - for (auto percent : percents) { - size_t i = size_t(n*percent); - i = i < n ? i : n-1u; - i = i > 0u ? i : 0u; - GetOStream(Statistics1) << std::fixed < 0u ? i : 0u; + GetOStream(Statistics1) << std::fixed << std::setprecision(4) << tmp[i] << " |"; } + GetOStream(Statistics1) << std::endl; } - - +} template - void AggregateQualityEstimateFactory::ComputeAggregateSizes(RCP A, RCP aggs, RCP agg_sizes) const { - - ArrayRCP aggSortedVertices, aggsToIndices, aggSizes; - ConvertAggregatesData(aggs, aggSortedVertices, aggsToIndices, aggSizes); - - // Iterate over each node in the aggregate - auto data = agg_sizes->getDataNonConst(0); - for (LO i=0; i<(LO)aggSizes.size(); i++) - data[i] = aggSizes[i]; +void AggregateQualityEstimateFactory::ComputeAggregateSizes(RCP A, RCP aggs, RCP agg_sizes) const { + ArrayRCP aggSortedVertices, aggsToIndices, aggSizes; + ConvertAggregatesData(aggs, aggSortedVertices, aggsToIndices, aggSizes); + + // Iterate over each node in the aggregate + auto data = agg_sizes->getDataNonConst(0); + for (LO i = 0; i < (LO)aggSizes.size(); i++) + data[i] = aggSizes[i]; } - - template - void AggregateQualityEstimateFactory::OutputAggSizes(const Level& level, RCP agg_sizes) const { +void AggregateQualityEstimateFactory::OutputAggSizes(const Level& level, RCP agg_sizes) const { + ParameterList pL = GetParameterList(); + using MT = magnitudeType; - ParameterList pL = GetParameterList(); - using MT = magnitudeType; + ArrayRCP data = agg_sizes->getData(0); - ArrayRCP data = agg_sizes->getData(0); - - - if (pL.get("aggregate qualities: file output")) { - std::string filename = pL.get("aggregate qualities: file base")+".sizes."+std::to_string(level.GetLevelID()); - Xpetra::IO::Write(filename, *agg_sizes); - } + if (pL.get("aggregate qualities: file output")) { + std::string filename = pL.get("aggregate qualities: file base") + ".sizes." + std::to_string(level.GetLevelID()); + Xpetra::IO::Write(filename, *agg_sizes); + } - { - size_t n = (size_t)agg_sizes->getLocalLength(); + { + size_t n = (size_t)agg_sizes->getLocalLength(); - std::vector tmp; - tmp.reserve(n); + std::vector tmp; + tmp.reserve(n); - for (size_t i=0; i(data[i])); - } + for (size_t i = 0; i < n; ++i) { + tmp.push_back(Teuchos::as(data[i])); + } - std::sort(tmp.begin(), tmp.end()); + std::sort(tmp.begin(), tmp.end()); - Teuchos::ArrayView percents = pL.get >("aggregate qualities: percentiles")(); - - GetOStream(Statistics1) << "AGG SIZE HEADER : | LEVEL | TOTAL |"; - for (auto percent : percents) { - GetOStream(Statistics1) << std::fixed << std::setprecision(4) <<100.0*percent << "% |"; - } - GetOStream(Statistics1) << std::endl; - - GetOStream(Statistics1) << "AGG SIZE PERCENTILES: | " << level.GetLevelID() << " | " << n << "|"; - for (auto percent : percents) { - size_t i = size_t(n*percent); - i = i < n ? i : n-1u; - i = i > 0u ? i : 0u; - GetOStream(Statistics1) << std::fixed < percents = pL.get>("aggregate qualities: percentiles")(); + GetOStream(Statistics1) << "AGG SIZE HEADER : | LEVEL | TOTAL |"; + for (auto percent : percents) { + GetOStream(Statistics1) << std::fixed << std::setprecision(4) << 100.0 * percent << "% |"; + } + GetOStream(Statistics1) << std::endl; + + GetOStream(Statistics1) << "AGG SIZE PERCENTILES: | " << level.GetLevelID() << " | " << n << "|"; + for (auto percent : percents) { + size_t i = size_t(n * percent); + i = i < n ? i : n - 1u; + i = i > 0u ? i : 0u; + GetOStream(Statistics1) << std::fixed << std::setprecision(4) << tmp[i] << " |"; } + GetOStream(Statistics1) << std::endl; } +} +} // namespace MueLu - -} // namespace MueLu - -#endif // MUELU_AGGREGATEQUALITYESTIMATE_DEF_HPP +#endif // MUELU_AGGREGATEQUALITYESTIMATE_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_decl.hpp index 06c3a5ed6917..cb5ad7d07c98 100644 --- a/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_decl.hpp @@ -86,73 +86,72 @@ namespace MueLu { ----------|--------------|------------ | Coordinates | BlockedCoordinatesTransferFactory | coarse level coordinates (unified) */ - template - class BlockedCoordinatesTransferFactory : public TwoLevelFactoryBase { +template +class BlockedCoordinatesTransferFactory : public TwoLevelFactoryBase { #undef MUELU_BLOCKEDCOORDINATESTRANSFERFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - /*! @brief Constructor. + /*! @brief Constructor. - @param vectorName The name of the quantity to be restricted. - @param restrictionName The name of the restriction Matrix. + @param vectorName The name of the quantity to be restricted. + @param restrictionName The name of the restriction Matrix. - The operator associated with projectionName will be applied to the MultiVector associated with - vectorName. - */ - BlockedCoordinatesTransferFactory() { } + The operator associated with projectionName will be applied to the MultiVector associated with + vectorName. + */ + BlockedCoordinatesTransferFactory() {} - //! Destructor. - virtual ~BlockedCoordinatesTransferFactory() { } + //! Destructor. + virtual ~BlockedCoordinatesTransferFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! @name Input - //@{ + //! @name Input + //@{ - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + /*! @brief Specifies the data that this class needs, and the factories that generate that data. - If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class - will fall back to the settings in FactoryManager. - */ - void DeclareInput(Level &finelevel, Level &coarseLevel) const; + If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class + will fall back to the settings in FactoryManager. + */ + void DeclareInput(Level &finelevel, Level &coarseLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - //! Build an object with this factory. - void Build(Level & fineLevel, Level &coarseLevel) const; + //! Build an object with this factory. + void Build(Level &fineLevel, Level &coarseLevel) const; - //@} + //@} - //@{ - /*! @brief Add (sub) coords factory in the end of list of factories in BlockedCoordinatesTransferFactory. + //@{ + /*! @brief Add (sub) coords factory in the end of list of factories in BlockedCoordinatesTransferFactory. - */ - void AddFactory(const RCP& factory); + */ + void AddFactory(const RCP &factory); + //! Returns number of sub factories. + size_t NumFactories() const { return subFactories_.size(); } - //! Returns number of sub factories. - size_t NumFactories() const { return subFactories_.size(); } + //@} + private: + //! list of user-defined sub Factories + std::vector > subFactories_; - //@} - private: - //! list of user-defined sub Factories - std::vector > subFactories_; +}; // class BlockedCoordinatesTransferFactory - }; // class BlockedCoordinatesTransferFactory - -} // namespace MueLu +} // namespace MueLu #define MUELU_BLOCKEDCOORDINATESTRANSFERFACTORY_SHORT -#endif // MUELU_BLOCKEDCOORDINATESTRANSFER_FACTORY_DECL_HPP +#endif // MUELU_BLOCKEDCOORDINATESTRANSFER_FACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_def.hpp b/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_def.hpp index e83e92696def..2c8150b4a969 100644 --- a/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_def.hpp @@ -58,110 +58,108 @@ namespace MueLu { - template - RCP BlockedCoordinatesTransferFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set >("Coordinates", Teuchos::null, "Factory for coordinates generation"); - validParamList->set >("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); - return validParamList; +template +RCP BlockedCoordinatesTransferFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->set >("Coordinates", Teuchos::null, "Factory for coordinates generation"); + validParamList->set >("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); + return validParamList; +} + +template +void BlockedCoordinatesTransferFactory::DeclareInput(Level& /* fineLevel */, Level& coarseLevel) const { + Input(coarseLevel, "CoarseMap"); + + // Make sure the Level knows I need these sub-Factories + const size_t numSubFactories = NumFactories(); + for (size_t i = 0; i < numSubFactories; i++) { + const RCP& myFactory = subFactories_[i]; + coarseLevel.DeclareInput("Coordinates", myFactory.getRawPtr(), this); } - template - void BlockedCoordinatesTransferFactory::DeclareInput(Level& /* fineLevel */, Level& coarseLevel) const { - Input(coarseLevel, "CoarseMap"); - - // Make sure the Level knows I need these sub-Factories - const size_t numSubFactories = NumFactories(); - for(size_t i=0; i& myFactory = subFactories_[i]; - coarseLevel.DeclareInput("Coordinates", myFactory.getRawPtr(), this); - } - - // call DeclareInput of all user-given transfer factories - for (std::vector >::const_iterator it = subFactories_.begin(); it != subFactories_.end(); ++it) - (*it)->CallDeclareInput(coarseLevel); - } - - template - void BlockedCoordinatesTransferFactory::Build(Level & /* fineLevel */, Level &coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); - - typedef Xpetra::MultiVector::coordinateType,LO,GO,NO> dMV; - typedef Xpetra::BlockedMultiVector::coordinateType,LO,GO,NO> dBV; + // call DeclareInput of all user-given transfer factories + for (std::vector >::const_iterator it = subFactories_.begin(); it != subFactories_.end(); ++it) + (*it)->CallDeclareInput(coarseLevel); +} - GetOStream(Runtime0) << "Transferring (blocked) coordinates" << std::endl; +template +void BlockedCoordinatesTransferFactory::Build(Level& /* fineLevel */, Level& coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); - const size_t numSubFactories = NumFactories(); - std::vector > subBlockMaps(numSubFactories); - std::vector > subBlockCoords(numSubFactories); + typedef Xpetra::MultiVector::coordinateType, LO, GO, NO> dMV; + typedef Xpetra::BlockedMultiVector::coordinateType, LO, GO, NO> dBV; - if (coarseLevel.IsAvailable("Coordinates", this)) { - GetOStream(Runtime0) << "Reusing coordinates" << std::endl; - return; - } + GetOStream(Runtime0) << "Transferring (blocked) coordinates" << std::endl; - // Get components - for(size_t i=0; i& myFactory = subFactories_[i]; - myFactory->CallBuild(coarseLevel); - subBlockCoords[i] = coarseLevel.Get >("Coordinates", myFactory.get()); - subBlockMaps[i] = subBlockCoords[i]->getMap(); - } + const size_t numSubFactories = NumFactories(); + std::vector > subBlockMaps(numSubFactories); + std::vector > subBlockCoords(numSubFactories); - // Blocked Map - RCP coarseCoordMapBlocked; + if (coarseLevel.IsAvailable("Coordinates", this)) { + GetOStream(Runtime0) << "Reusing coordinates" << std::endl; + return; + } - { - // coarseMap is being used to set up the domain map of tentative P, and therefore, the row map of Ac - // Therefore, if we amalgamate coarseMap, logical nodes in the coordinates vector would correspond to - // logical blocks in the matrix - RCP coarseMap = Get< RCP >(coarseLevel, "CoarseMap"); - bool thyraMode = coarseMap->getThyraMode(); + // Get components + for (size_t i = 0; i < numSubFactories; i++) { + GetOStream(Runtime1) << "Generating Coordinates for block " << i << "/" << numSubFactories << std::endl; + const RCP& myFactory = subFactories_[i]; + myFactory->CallBuild(coarseLevel); + subBlockCoords[i] = coarseLevel.Get >("Coordinates", myFactory.get()); + subBlockMaps[i] = subBlockCoords[i]->getMap(); + } - ArrayView elementAList = coarseMap->getFullMap()->getLocalElementList(); + // Blocked Map + RCP coarseCoordMapBlocked; - LO blkSize = 1; - if (rcp_dynamic_cast(coarseMap->getMap(0, thyraMode)) != Teuchos::null) - blkSize = rcp_dynamic_cast(coarseMap->getMap(0, thyraMode))->getFixedBlockSize(); + { + // coarseMap is being used to set up the domain map of tentative P, and therefore, the row map of Ac + // Therefore, if we amalgamate coarseMap, logical nodes in the coordinates vector would correspond to + // logical blocks in the matrix + RCP coarseMap = Get >(coarseLevel, "CoarseMap"); + bool thyraMode = coarseMap->getThyraMode(); - for(size_t i=1; i(coarseMap->getMap(i, thyraMode)) != Teuchos::null) - otherBlkSize = rcp_dynamic_cast(coarseMap->getMap(i, thyraMode))->getFixedBlockSize(); - TEUCHOS_TEST_FOR_EXCEPTION(otherBlkSize != blkSize, Exceptions::RuntimeError, "BlockedCoordinatesTransferFactory: Subblocks have different Block sizes. This is not yet supported."); - } + ArrayView elementAList = coarseMap->getFullMap()->getLocalElementList(); - GO indexBase = coarseMap->getFullMap()->getIndexBase(); - size_t numElements = elementAList.size() / blkSize; - Array elementList(numElements); + LO blkSize = 1; + if (rcp_dynamic_cast(coarseMap->getMap(0, thyraMode)) != Teuchos::null) + blkSize = rcp_dynamic_cast(coarseMap->getMap(0, thyraMode))->getFixedBlockSize(); - // Amalgamate the map - for (LO i = 0; i < Teuchos::as(numElements); i++) - elementList[i] = (elementAList[i*blkSize]-indexBase)/blkSize + indexBase; + for (size_t i = 1; i < numSubFactories; i++) { + LO otherBlkSize = 1; + if (rcp_dynamic_cast(coarseMap->getMap(i, thyraMode)) != Teuchos::null) + otherBlkSize = rcp_dynamic_cast(coarseMap->getMap(i, thyraMode))->getFixedBlockSize(); + TEUCHOS_TEST_FOR_EXCEPTION(otherBlkSize != blkSize, Exceptions::RuntimeError, "BlockedCoordinatesTransferFactory: Subblocks have different Block sizes. This is not yet supported."); + } - RCP coarseCoordMap = MapFactory::Build(coarseMap->getFullMap()->lib(), - Teuchos::OrdinalTraits::invalid(), elementList, indexBase, coarseMap->getFullMap()->getComm()); + GO indexBase = coarseMap->getFullMap()->getIndexBase(); + size_t numElements = elementAList.size() / blkSize; + Array elementList(numElements); - coarseCoordMapBlocked = rcp(new BlockedMap(coarseCoordMap, subBlockMaps, thyraMode)); - } + // Amalgamate the map + for (LO i = 0; i < Teuchos::as(numElements); i++) + elementList[i] = (elementAList[i * blkSize] - indexBase) / blkSize + indexBase; - // Build blocked coordinates vector - RCP bcoarseCoords = rcp(new dBV(coarseCoordMapBlocked,subBlockCoords)); + RCP coarseCoordMap = MapFactory::Build(coarseMap->getFullMap()->lib(), + Teuchos::OrdinalTraits::invalid(), elementList, indexBase, coarseMap->getFullMap()->getComm()); - // Turn the blocked coordinates vector into an unblocked one - RCP coarseCoords = bcoarseCoords->Merge(); - Set >(coarseLevel, "Coordinates", coarseCoords); + coarseCoordMapBlocked = rcp(new BlockedMap(coarseCoordMap, subBlockMaps, thyraMode)); } - template - void BlockedCoordinatesTransferFactory::AddFactory(const RCP& factory) { - subFactories_.push_back(factory); - } + // Build blocked coordinates vector + RCP bcoarseCoords = rcp(new dBV(coarseCoordMapBlocked, subBlockCoords)); + // Turn the blocked coordinates vector into an unblocked one + RCP coarseCoords = bcoarseCoords->Merge(); + Set >(coarseLevel, "Coordinates", coarseCoords); +} +template +void BlockedCoordinatesTransferFactory::AddFactory(const RCP& factory) { + subFactories_.push_back(factory); +} -} // namespace MueLu +} // namespace MueLu -#endif // MUELU_BLOCKEDCOORDINATESTRANSFER_FACTORY_DEF_HPP +#endif // MUELU_BLOCKEDCOORDINATESTRANSFER_FACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_decl.hpp index 63449a12d82b..9460a32a623e 100644 --- a/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_decl.hpp @@ -60,95 +60,94 @@ #include "MueLu_TwoLevelFactoryBase.hpp" namespace MueLu { - /*! - @class BlockedRAPFactory - @brief Factory for building coarse matrices. - */ - template - class BlockedRAPFactory : public TwoLevelFactoryBase { +/*! + @class BlockedRAPFactory + @brief Factory for building coarse matrices. +*/ +template +class BlockedRAPFactory : public TwoLevelFactoryBase { #undef MUELU_BLOCKEDRAPFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - BlockedRAPFactory(); + public: + //! @name Constructors/Destructors. + //@{ - virtual ~BlockedRAPFactory() = default; - //@} + BlockedRAPFactory(); - //! @name Input - //@{ + virtual ~BlockedRAPFactory() = default; + //@} - RCP GetValidParameterList() const override; + //! @name Input + //@{ - void DeclareInput(Level &fineLevel, Level &coarseLevel) const override; + RCP GetValidParameterList() const override; - //@} + void DeclareInput(Level &fineLevel, Level &coarseLevel) const override; - //! @name Build methods. - //@{ - void Build(Level &fineLevel, Level &coarseLevel) const override; - //@} + //@} - //! @name Handling of user-defined transfer factories - //@{ + //! @name Build methods. + //@{ + void Build(Level &fineLevel, Level &coarseLevel) const override; + //@} - //! Indicate that zero entries on the diagonal of Ac shall be repaired (i.e. if A(i,i) == 0.0 set A(i,i) = 1.0) - void SetRepairZeroDiagonal(bool const &repair) { - repairZeroDiagonals_ = repair; - if(repair) checkAc_ = true; // make sure that plausibility check is performed. Otherwise SetRepairZeroDiagonal(true) has no effect. - } + //! @name Handling of user-defined transfer factories + //@{ - //! Indicate that a simple plausibility check shall be done for Ac after building RAP - void SetPlausibilityCheck(bool const &check) { - checkAc_ = check; - } + //! Indicate that zero entries on the diagonal of Ac shall be repaired (i.e. if A(i,i) == 0.0 set A(i,i) = 1.0) + void SetRepairZeroDiagonal(bool const &repair) { + repairZeroDiagonals_ = repair; + if (repair) checkAc_ = true; // make sure that plausibility check is performed. Otherwise SetRepairZeroDiagonal(true) has no effect. + } - //@{ - /*! @brief Add transfer factory in the end of list of transfer factories in RepartitionAcFactory. + //! Indicate that a simple plausibility check shall be done for Ac after building RAP + void SetPlausibilityCheck(bool const &check) { + checkAc_ = check; + } - Transfer factories are derived from TwoLevelFactoryBase and project some data from the fine level to - the next coarser level. - */ - void AddTransferFactory(const RCP& factory); + //@{ + /*! @brief Add transfer factory in the end of list of transfer factories in RepartitionAcFactory. - // TODO add a function to remove a specific transfer factory? + Transfer factories are derived from TwoLevelFactoryBase and project some data from the fine level to + the next coarser level. + */ + void AddTransferFactory(const RCP &factory); - //! Returns number of transfer factories. - size_t NumTransferFactories() const { return transferFacts_.size(); } + // TODO add a function to remove a specific transfer factory? - //@} + //! Returns number of transfer factories. + size_t NumTransferFactories() const { return transferFacts_.size(); } - private: + //@} - //! @name internal plausibility check methods - //! checks main diagonal entries of (0,0) block. Does not affect entries in (1,1) block! - static void CheckMainDiagonal(RCP & bAc, bool repairZeroDiagonals = false); + private: + //! @name internal plausibility check methods + //! checks main diagonal entries of (0,0) block. Does not affect entries in (1,1) block! + static void CheckMainDiagonal(RCP &bAc, bool repairZeroDiagonals = false); - //! If true, perform a basic plausibility check on Ac (default = false) - //! note, that the repairZeroDiagonals_ flag only is valid for checkAc_ == true - bool checkAc_; + //! If true, perform a basic plausibility check on Ac (default = false) + //! note, that the repairZeroDiagonals_ flag only is valid for checkAc_ == true + bool checkAc_; - //! If true, the CheckMainDiagonal routine automatically repairs zero entries on main diagonal (default = false) - //! i.e. if A(i,i) == 0.0 set A(i,i) = 1.0 - //! note, that the repairZeroDiagonals_ flag only is valid for checkAc_ == true - bool repairZeroDiagonals_; + //! If true, the CheckMainDiagonal routine automatically repairs zero entries on main diagonal (default = false) + //! i.e. if A(i,i) == 0.0 set A(i,i) = 1.0 + //! note, that the repairZeroDiagonals_ flag only is valid for checkAc_ == true + bool repairZeroDiagonals_; - //@{ + //@{ - //! list of user-defined transfer Factories - std::vector > transferFacts_; + //! list of user-defined transfer Factories + std::vector > transferFacts_; - //@} + //@} - }; //class BlockedRAPFactory +}; // class BlockedRAPFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_BLOCKEDRAPFACTORY_SHORT -#endif // MUELU_BLOCKEDRAPFACTORY_DECL_HPP +#endif // MUELU_BLOCKEDRAPFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_def.hpp b/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_def.hpp index 7817372296cb..400c00441afd 100644 --- a/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_def.hpp @@ -60,174 +60,169 @@ namespace MueLu { - template - BlockedRAPFactory::BlockedRAPFactory() - : checkAc_(false), repairZeroDiagonals_(false) - { } +template +BlockedRAPFactory::BlockedRAPFactory() + : checkAc_(false) + , repairZeroDiagonals_(false) {} - template - RCP BlockedRAPFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP BlockedRAPFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("transpose: use implicit"); -#undef SET_VALID_ENTRY - validParamList->set< RCP >("A", null, "Generating factory of the matrix A used during the prolongator smoothing process"); - validParamList->set< RCP >("P", null, "Prolongator factory"); - validParamList->set< RCP >("R", null, "Restrictor factory"); + SET_VALID_ENTRY("transpose: use implicit"); +#undef SET_VALID_ENTRY + validParamList->set >("A", null, "Generating factory of the matrix A used during the prolongator smoothing process"); + validParamList->set >("P", null, "Prolongator factory"); + validParamList->set >("R", null, "Restrictor factory"); - return validParamList; - } - - template - void BlockedRAPFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { - const Teuchos::ParameterList& pL = GetParameterList(); - if (pL.get("transpose: use implicit") == false) - Input(coarseLevel, "R"); + return validParamList; +} - Input(fineLevel, "A"); - Input(coarseLevel, "P"); - - // call DeclareInput of all user-given transfer factories - for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) - (*it)->CallDeclareInput(coarseLevel); - } +template +void BlockedRAPFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { + const Teuchos::ParameterList &pL = GetParameterList(); + if (pL.get("transpose: use implicit") == false) + Input(coarseLevel, "R"); - template - void BlockedRAPFactory::Build(Level &fineLevel, Level &coarseLevel) const { //FIXME make fineLevel const!! - FactoryMonitor m(*this, "Computing Ac (block)", coarseLevel); + Input(fineLevel, "A"); + Input(coarseLevel, "P"); - const ParameterList& pL = GetParameterList(); + // call DeclareInput of all user-given transfer factories + for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) + (*it)->CallDeclareInput(coarseLevel); +} - RCP A = Get< RCP >(fineLevel, "A"); - RCP P = Get< RCP >(coarseLevel, "P"); +template +void BlockedRAPFactory::Build(Level &fineLevel, Level &coarseLevel) const { // FIXME make fineLevel const!! + FactoryMonitor m(*this, "Computing Ac (block)", coarseLevel); + const ParameterList &pL = GetParameterList(); - RCP bA = rcp_dynamic_cast(A); - RCP bP = rcp_dynamic_cast(P); - TEUCHOS_TEST_FOR_EXCEPTION(bA.is_null() || bP.is_null(), Exceptions::BadCast, "Matrices A and P must be of type BlockedCrsMatrix."); + RCP A = Get >(fineLevel, "A"); + RCP P = Get >(coarseLevel, "P"); + RCP bA = rcp_dynamic_cast(A); + RCP bP = rcp_dynamic_cast(P); + TEUCHOS_TEST_FOR_EXCEPTION(bA.is_null() || bP.is_null(), Exceptions::BadCast, "Matrices A and P must be of type BlockedCrsMatrix."); - RCP bAP; - RCP bAc; - { - SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel); + RCP bAP; + RCP bAc; + { + SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel); - // Triple matrix product for BlockedCrsMatrixClass - TEUCHOS_TEST_FOR_EXCEPTION((bA->Cols() != bP->Rows()), Exceptions::BadCast, + // Triple matrix product for BlockedCrsMatrixClass + TEUCHOS_TEST_FOR_EXCEPTION((bA->Cols() != bP->Rows()), Exceptions::BadCast, "Block matrix dimensions do not match: " - "A is " << bA->Rows() << "x" << bA->Cols() << - "P is " << bP->Rows() << "x" << bP->Cols()); + "A is " + << bA->Rows() << "x" << bA->Cols() << "P is " << bP->Rows() << "x" << bP->Cols()); - bAP = MatrixMatrix::TwoMatrixMultiplyBlock(*bA, false, *bP, false, GetOStream(Statistics2), true, true); - } - - - // If we do not modify matrix later, allow optimization of storage. - // This is necessary for new faster Epetra MM kernels. - bool doOptimizeStorage = !checkAc_; + bAP = MatrixMatrix::TwoMatrixMultiplyBlock(*bA, false, *bP, false, GetOStream(Statistics2), true, true); + } - const bool doTranspose = true; - const bool doFillComplete = true; - if (pL.get("transpose: use implicit") == true) { - SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel); - bAc = MatrixMatrix::TwoMatrixMultiplyBlock(*bP, doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage); + // If we do not modify matrix later, allow optimization of storage. + // This is necessary for new faster Epetra MM kernels. + bool doOptimizeStorage = !checkAc_; - } else { - RCP R = Get< RCP >(coarseLevel, "R"); - RCP bR = rcp_dynamic_cast(R); - TEUCHOS_TEST_FOR_EXCEPTION(bR.is_null(), Exceptions::BadCast, "Matrix R must be of type BlockedCrsMatrix."); + const bool doTranspose = true; + const bool doFillComplete = true; + if (pL.get("transpose: use implicit") == true) { + SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel); + bAc = MatrixMatrix::TwoMatrixMultiplyBlock(*bP, doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage); - TEUCHOS_TEST_FOR_EXCEPTION(bA->Rows() != bR->Cols(), Exceptions::BadCast, - "Block matrix dimensions do not match: " - "R is " << bR->Rows() << "x" << bR->Cols() << - "A is " << bA->Rows() << "x" << bA->Cols()); + } else { + RCP R = Get >(coarseLevel, "R"); + RCP bR = rcp_dynamic_cast(R); + TEUCHOS_TEST_FOR_EXCEPTION(bR.is_null(), Exceptions::BadCast, "Matrix R must be of type BlockedCrsMatrix."); - SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel); - bAc = MatrixMatrix::TwoMatrixMultiplyBlock(*bR, !doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage); - } + TEUCHOS_TEST_FOR_EXCEPTION(bA->Rows() != bR->Cols(), Exceptions::BadCast, + "Block matrix dimensions do not match: " + "R is " + << bR->Rows() << "x" << bR->Cols() << "A is " << bA->Rows() << "x" << bA->Cols()); + SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel); + bAc = MatrixMatrix::TwoMatrixMultiplyBlock(*bR, !doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage); + } - if (checkAc_) - CheckMainDiagonal(bAc); + if (checkAc_) + CheckMainDiagonal(bAc); - GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*bAc, "Ac (blocked)"); + GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*bAc, "Ac (blocked)"); - Set >(coarseLevel, "A", bAc); + Set >(coarseLevel, "A", bAc); - if (transferFacts_.begin() != transferFacts_.end()) { - SubFactoryMonitor m1(*this, "Projections", coarseLevel); + if (transferFacts_.begin() != transferFacts_.end()) { + SubFactoryMonitor m1(*this, "Projections", coarseLevel); - // call Build of all user-given transfer factories - for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { - RCP fac = *it; + // call Build of all user-given transfer factories + for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { + RCP fac = *it; - GetOStream(Runtime0) << "BlockRAPFactory: call transfer factory: " << fac->description() << std::endl; + GetOStream(Runtime0) << "BlockRAPFactory: call transfer factory: " << fac->description() << std::endl; - fac->CallBuild(coarseLevel); + fac->CallBuild(coarseLevel); - // AP (11/11/13): I am not sure exactly why we need to call Release, but we do need it to get rid - // of dangling data for CoordinatesTransferFactory - coarseLevel.Release(*fac); - } + // AP (11/11/13): I am not sure exactly why we need to call Release, but we do need it to get rid + // of dangling data for CoordinatesTransferFactory + coarseLevel.Release(*fac); } } +} + +template +void BlockedRAPFactory::CheckMainDiagonal(RCP &bAc, bool repairZeroDiagonals) { + RCP c00 = bAc->getMatrix(0, 0); + RCP Aout = MatrixFactory::Build(c00->getRowMap(), c00->getGlobalMaxNumRowEntries()); + + RCP diagVec = VectorFactory::Build(c00->getRowMap()); + c00->getLocalDiagCopy(*diagVec); + ArrayRCP diagVal = diagVec->getDataNonConst(0); + + // loop over local rows + for (size_t row = 0; row < c00->getLocalNumRows(); row++) { + // get global row id + GO grid = c00->getRowMap()->getGlobalElement(row); // global row id + + ArrayView indices; + ArrayView vals; + c00->getLocalRowView(row, indices, vals); + + // just copy all values in output + ArrayRCP indout(indices.size(), Teuchos::OrdinalTraits::zero()); + ArrayRCP valout(indices.size(), Teuchos::ScalarTraits::zero()); + + // just copy values + for (size_t i = 0; i < as(indices.size()); i++) { + GO gcid = c00->getColMap()->getGlobalElement(indices[i]); // LID -> GID (column) + indout[i] = gcid; + valout[i] = vals[i]; + } - - template - void BlockedRAPFactory::CheckMainDiagonal(RCP & bAc, bool repairZeroDiagonals) { - RCP c00 = bAc->getMatrix(0, 0); - RCP Aout = MatrixFactory::Build(c00->getRowMap(), c00->getGlobalMaxNumRowEntries()); - - RCP diagVec = VectorFactory::Build(c00->getRowMap()); - c00->getLocalDiagCopy(*diagVec); - ArrayRCP diagVal = diagVec->getDataNonConst(0); - - // loop over local rows - for (size_t row = 0; row < c00->getLocalNumRows(); row++) { - // get global row id - GO grid = c00->getRowMap()->getGlobalElement(row); // global row id - - ArrayView indices; - ArrayView vals; - c00->getLocalRowView(row, indices, vals); - - // just copy all values in output - ArrayRCP indout(indices.size(), Teuchos::OrdinalTraits::zero()); - ArrayRCP valout(indices.size(), Teuchos::ScalarTraits::zero()); - - // just copy values - for (size_t i = 0; i < as(indices.size()); i++) { - GO gcid = c00->getColMap()->getGlobalElement(indices[i]); // LID -> GID (column) - indout [i] = gcid; - valout [i] = vals[i]; - } - - Aout->insertGlobalValues(grid, indout.view(0, indout.size()), valout.view(0, valout.size())); - if (diagVal[row] == Teuchos::ScalarTraits::zero() && repairZeroDiagonals) { - // always overwrite diagonal entry - Aout->insertGlobalValues(grid, Teuchos::tuple(grid), Teuchos::tuple(1.0)); - } + Aout->insertGlobalValues(grid, indout.view(0, indout.size()), valout.view(0, valout.size())); + if (diagVal[row] == Teuchos::ScalarTraits::zero() && repairZeroDiagonals) { + // always overwrite diagonal entry + Aout->insertGlobalValues(grid, Teuchos::tuple(grid), Teuchos::tuple(1.0)); } + } - Aout->fillComplete(c00->getDomainMap(), c00->getRangeMap()); + Aout->fillComplete(c00->getDomainMap(), c00->getRangeMap()); - bAc->setMatrix(0, 0, Aout); - } + bAc->setMatrix(0, 0, Aout); +} - template - void BlockedRAPFactory::AddTransferFactory(const RCP& factory) { - // check if it's a TwoLevelFactoryBase based transfer factory - TEUCHOS_TEST_FOR_EXCEPTION(rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, - "Transfer factory is not derived from TwoLevelFactoryBase. This is very strange. " - "(Note: you can remove this exception if there's a good reason for)"); - transferFacts_.push_back(factory); - } +template +void BlockedRAPFactory::AddTransferFactory(const RCP &factory) { + // check if it's a TwoLevelFactoryBase based transfer factory + TEUCHOS_TEST_FOR_EXCEPTION(rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, + "Transfer factory is not derived from TwoLevelFactoryBase. This is very strange. " + "(Note: you can remove this exception if there's a good reason for)"); + transferFacts_.push_back(factory); +} -} //namespace MueLu +} // namespace MueLu #define MUELU_BLOCKEDRAPFACTORY_SHORT -#endif // MUELU_BLOCKEDRAPFACTORY_DEF_HPP +#endif // MUELU_BLOCKEDRAPFACTORY_DEF_HPP // TODO add plausibility check // TODO add CheckMainDiagonal for Blocked operator diff --git a/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_decl.hpp index 2ee51c482c54..d9046da451e6 100644 --- a/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_decl.hpp @@ -94,68 +94,67 @@ namespace MueLu { ----------|--------------|------------ | Coordinates | CoordinatesTransferFactory | coarse level coordinates */ - template - class CoordinatesTransferFactory : public TwoLevelFactoryBase { - public: - typedef Scalar scalar_type; - typedef LocalOrdinal local_ordinal_type; - typedef GlobalOrdinal global_ordinal_type; - typedef typename Node::device_type DeviceType; - typedef typename DeviceType::execution_space execution_space; - - private: +template +class CoordinatesTransferFactory : public TwoLevelFactoryBase { + public: + typedef Scalar scalar_type; + typedef LocalOrdinal local_ordinal_type; + typedef GlobalOrdinal global_ordinal_type; + typedef typename Node::device_type DeviceType; + typedef typename DeviceType::execution_space execution_space; + + private: #undef MUELU_COORDINATESTRANSFERFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - /*! @brief Constructor. + /*! @brief Constructor. - @param vectorName The name of the quantity to be restricted. - @param restrictionName The name of the restriction Matrix. + @param vectorName The name of the quantity to be restricted. + @param restrictionName The name of the restriction Matrix. - The operator associated with projectionName will be applied to the MultiVector associated with - vectorName. - */ - CoordinatesTransferFactory() { } + The operator associated with projectionName will be applied to the MultiVector associated with + vectorName. + */ + CoordinatesTransferFactory() {} - //! Destructor. - virtual ~CoordinatesTransferFactory() { } + //! Destructor. + virtual ~CoordinatesTransferFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! @name Input - //@{ + //! @name Input + //@{ - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + /*! @brief Specifies the data that this class needs, and the factories that generate that data. - If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class - will fall back to the settings in FactoryManager. - */ - void DeclareInput(Level &finelevel, Level &coarseLevel) const; + If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class + will fall back to the settings in FactoryManager. + */ + void DeclareInput(Level &finelevel, Level &coarseLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - //! Build an object with this factory. - void Build(Level & fineLevel, Level &coarseLevel) const; + //! Build an object with this factory. + void Build(Level &fineLevel, Level &coarseLevel) const; - //@} + //@} - private: + private: +}; // class CoordinatesTransferFactory - }; // class CoordinatesTransferFactory - -} // namespace MueLu +} // namespace MueLu #define MUELU_COORDINATESTRANSFERFACTORY_SHORT -#endif // MUELU_COORDINATESTRANSFER_FACTORY_DECL_HPP +#endif // MUELU_COORDINATESTRANSFER_FACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_def.hpp b/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_def.hpp index b8f0efbbdb2f..dc6bd4c6c3ec 100644 --- a/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_def.hpp @@ -60,227 +60,224 @@ namespace MueLu { - template - RCP CoordinatesTransferFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set >("Coordinates", Teuchos::null, "Factory for coordinates generation"); - validParamList->set >("Aggregates", Teuchos::null, "Factory for coordinates generation"); - validParamList->set >("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); - validParamList->set ("structured aggregation", false, "Flag specifying that the geometric data is transferred for StructuredAggregationFactory"); - validParamList->set ("aggregation coupled", false, "Flag specifying if the aggregation algorithm was used in coupled mode."); - validParamList->set ("Geometric", false, "Flag specifying that the coordinates are transferred for GeneralGeometricPFactory"); - validParamList->set >("coarseCoordinates", Teuchos::null, "Factory for coarse coordinates generation"); - validParamList->set >("gCoarseNodesPerDim", Teuchos::null, "Factory providing the global number of nodes per spatial dimensions of the mesh"); - validParamList->set >("lCoarseNodesPerDim", Teuchos::null, "Factory providing the local number of nodes per spatial dimensions of the mesh"); - validParamList->set >("numDimensions" , Teuchos::null, "Factory providing the number of spatial dimensions of the mesh"); - validParamList->set ("write start", -1, "first level at which coordinates should be written to file"); - validParamList->set ("write end", -1, "last level at which coordinates should be written to file"); - validParamList->set ("hybrid aggregation", false, "Flag specifying that hybrid aggregation data is transfered for HybridAggregationFactory"); - validParamList->set >("aggregationRegionTypeCoarse", Teuchos::null, "Factory indicating what aggregation type is to be used on the coarse level of the region"); - validParamList->set ("interface aggregation", false, "Flag specifying that interface aggregation data is transfered for HybridAggregationFactory"); - validParamList->set >("coarseInterfacesDimensions", Teuchos::null, "Factory providing coarseInterfacesDimensions"); - validParamList->set >("nodeOnCoarseInterface", Teuchos::null, "Factory providing nodeOnCoarseInterface"); - - - return validParamList; - } - - template - void CoordinatesTransferFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { - static bool isAvailableCoords = false; - - const ParameterList& pL = GetParameterList(); - if(pL.get("structured aggregation") == true) { - if(pL.get("aggregation coupled") == true) { - Input(fineLevel, "gCoarseNodesPerDim"); - } - Input(fineLevel, "lCoarseNodesPerDim"); - Input(fineLevel, "numDimensions"); - } else if(pL.get("Geometric") == true) { - Input(coarseLevel, "coarseCoordinates"); - Input(coarseLevel, "gCoarseNodesPerDim"); - Input(coarseLevel, "lCoarseNodesPerDim"); - } else if(pL.get("hybrid aggregation") == true) { - Input(fineLevel, "aggregationRegionTypeCoarse"); - Input(fineLevel, "lCoarseNodesPerDim"); - Input(fineLevel, "numDimensions"); - if(pL.get("interface aggregation") == true) { - Input(fineLevel, "coarseInterfacesDimensions"); - Input(fineLevel, "nodeOnCoarseInterface"); - } - } else { - if (coarseLevel.GetRequestMode() == Level::REQUEST) - isAvailableCoords = coarseLevel.IsAvailable("Coordinates", this); - - if (isAvailableCoords == false) { - Input(fineLevel, "Coordinates"); - Input(fineLevel, "Aggregates"); - Input(fineLevel, "CoarseMap"); - } +template +RCP CoordinatesTransferFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->set >("Coordinates", Teuchos::null, "Factory for coordinates generation"); + validParamList->set >("Aggregates", Teuchos::null, "Factory for coordinates generation"); + validParamList->set >("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); + validParamList->set("structured aggregation", false, "Flag specifying that the geometric data is transferred for StructuredAggregationFactory"); + validParamList->set("aggregation coupled", false, "Flag specifying if the aggregation algorithm was used in coupled mode."); + validParamList->set("Geometric", false, "Flag specifying that the coordinates are transferred for GeneralGeometricPFactory"); + validParamList->set >("coarseCoordinates", Teuchos::null, "Factory for coarse coordinates generation"); + validParamList->set >("gCoarseNodesPerDim", Teuchos::null, "Factory providing the global number of nodes per spatial dimensions of the mesh"); + validParamList->set >("lCoarseNodesPerDim", Teuchos::null, "Factory providing the local number of nodes per spatial dimensions of the mesh"); + validParamList->set >("numDimensions", Teuchos::null, "Factory providing the number of spatial dimensions of the mesh"); + validParamList->set("write start", -1, "first level at which coordinates should be written to file"); + validParamList->set("write end", -1, "last level at which coordinates should be written to file"); + validParamList->set("hybrid aggregation", false, "Flag specifying that hybrid aggregation data is transfered for HybridAggregationFactory"); + validParamList->set >("aggregationRegionTypeCoarse", Teuchos::null, "Factory indicating what aggregation type is to be used on the coarse level of the region"); + validParamList->set("interface aggregation", false, "Flag specifying that interface aggregation data is transfered for HybridAggregationFactory"); + validParamList->set >("coarseInterfacesDimensions", Teuchos::null, "Factory providing coarseInterfacesDimensions"); + validParamList->set >("nodeOnCoarseInterface", Teuchos::null, "Factory providing nodeOnCoarseInterface"); + + return validParamList; +} + +template +void CoordinatesTransferFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { + static bool isAvailableCoords = false; + + const ParameterList& pL = GetParameterList(); + if (pL.get("structured aggregation") == true) { + if (pL.get("aggregation coupled") == true) { + Input(fineLevel, "gCoarseNodesPerDim"); + } + Input(fineLevel, "lCoarseNodesPerDim"); + Input(fineLevel, "numDimensions"); + } else if (pL.get("Geometric") == true) { + Input(coarseLevel, "coarseCoordinates"); + Input(coarseLevel, "gCoarseNodesPerDim"); + Input(coarseLevel, "lCoarseNodesPerDim"); + } else if (pL.get("hybrid aggregation") == true) { + Input(fineLevel, "aggregationRegionTypeCoarse"); + Input(fineLevel, "lCoarseNodesPerDim"); + Input(fineLevel, "numDimensions"); + if (pL.get("interface aggregation") == true) { + Input(fineLevel, "coarseInterfacesDimensions"); + Input(fineLevel, "nodeOnCoarseInterface"); + } + } else { + if (coarseLevel.GetRequestMode() == Level::REQUEST) + isAvailableCoords = coarseLevel.IsAvailable("Coordinates", this); + + if (isAvailableCoords == false) { + Input(fineLevel, "Coordinates"); + Input(fineLevel, "Aggregates"); + Input(fineLevel, "CoarseMap"); } } +} - template - void CoordinatesTransferFactory::Build(Level & fineLevel, Level &coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); +template +void CoordinatesTransferFactory::Build(Level& fineLevel, Level& coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); - using xdMV = Xpetra::MultiVector::magnitudeType,LO,GO,NO>; + using xdMV = Xpetra::MultiVector::magnitudeType, LO, GO, NO>; - GetOStream(Runtime0) << "Transferring coordinates" << std::endl; + GetOStream(Runtime0) << "Transferring coordinates" << std::endl; - int numDimensions; - RCP coarseCoords; - RCP fineCoords; - Array gCoarseNodesPerDir; - Array lCoarseNodesPerDir; + int numDimensions; + RCP coarseCoords; + RCP fineCoords; + Array gCoarseNodesPerDir; + Array lCoarseNodesPerDir; - const ParameterList& pL = GetParameterList(); + const ParameterList& pL = GetParameterList(); - if(pL.get("hybrid aggregation") == true) { - std::string regionType = Get(fineLevel,"aggregationRegionTypeCoarse"); - numDimensions = Get(fineLevel, "numDimensions"); - lCoarseNodesPerDir = Get >(fineLevel, "lCoarseNodesPerDim"); - Set(coarseLevel, "aggregationRegionType", regionType); - Set (coarseLevel, "numDimensions", numDimensions); - Set > (coarseLevel, "lNodesPerDim", lCoarseNodesPerDir); + if (pL.get("hybrid aggregation") == true) { + std::string regionType = Get(fineLevel, "aggregationRegionTypeCoarse"); + numDimensions = Get(fineLevel, "numDimensions"); + lCoarseNodesPerDir = Get >(fineLevel, "lCoarseNodesPerDim"); + Set(coarseLevel, "aggregationRegionType", regionType); + Set(coarseLevel, "numDimensions", numDimensions); + Set >(coarseLevel, "lNodesPerDim", lCoarseNodesPerDir); - if((pL.get("interface aggregation") == true) && (regionType == "uncoupled")) { - Array coarseInterfacesDimensions = Get >(fineLevel, "coarseInterfacesDimensions"); - Array nodeOnCoarseInterface = Get >(fineLevel, "nodeOnCoarseInterface"); - Set >(coarseLevel, "interfacesDimensions", coarseInterfacesDimensions); - Set >(coarseLevel, "nodeOnInterface", nodeOnCoarseInterface); - } + if ((pL.get("interface aggregation") == true) && (regionType == "uncoupled")) { + Array coarseInterfacesDimensions = Get >(fineLevel, "coarseInterfacesDimensions"); + Array nodeOnCoarseInterface = Get >(fineLevel, "nodeOnCoarseInterface"); + Set >(coarseLevel, "interfacesDimensions", coarseInterfacesDimensions); + Set >(coarseLevel, "nodeOnInterface", nodeOnCoarseInterface); + } - } else if(pL.get("structured aggregation") == true) { - if(pL.get("aggregation coupled") == true) { - gCoarseNodesPerDir = Get >(fineLevel, "gCoarseNodesPerDim"); - Set >(coarseLevel, "gNodesPerDim", gCoarseNodesPerDir); - } - lCoarseNodesPerDir = Get >(fineLevel, "lCoarseNodesPerDim"); - Set >(coarseLevel, "lNodesPerDim", lCoarseNodesPerDir); - numDimensions = Get(fineLevel, "numDimensions"); - Set(coarseLevel, "numDimensions", numDimensions); - - } else if(pL.get("Geometric") == true) { - coarseCoords = Get >(coarseLevel, "coarseCoordinates"); - gCoarseNodesPerDir = Get >(coarseLevel, "gCoarseNodesPerDim"); - lCoarseNodesPerDir = Get >(coarseLevel, "lCoarseNodesPerDim"); + } else if (pL.get("structured aggregation") == true) { + if (pL.get("aggregation coupled") == true) { + gCoarseNodesPerDir = Get >(fineLevel, "gCoarseNodesPerDim"); Set >(coarseLevel, "gNodesPerDim", gCoarseNodesPerDir); - Set >(coarseLevel, "lNodesPerDim", lCoarseNodesPerDir); + } + lCoarseNodesPerDir = Get >(fineLevel, "lCoarseNodesPerDim"); + Set >(coarseLevel, "lNodesPerDim", lCoarseNodesPerDir); + numDimensions = Get(fineLevel, "numDimensions"); + Set(coarseLevel, "numDimensions", numDimensions); + + } else if (pL.get("Geometric") == true) { + coarseCoords = Get >(coarseLevel, "coarseCoordinates"); + gCoarseNodesPerDir = Get >(coarseLevel, "gCoarseNodesPerDim"); + lCoarseNodesPerDir = Get >(coarseLevel, "lCoarseNodesPerDim"); + Set >(coarseLevel, "gNodesPerDim", gCoarseNodesPerDir); + Set >(coarseLevel, "lNodesPerDim", lCoarseNodesPerDir); + + Set >(coarseLevel, "Coordinates", coarseCoords); + + } else { + if (coarseLevel.IsAvailable("Coordinates", this)) { + GetOStream(Runtime0) << "Reusing coordinates" << std::endl; + return; + } - Set >(coarseLevel, "Coordinates", coarseCoords); + fineCoords = Get >(fineLevel, "Coordinates"); + RCP coarseMap = Get >(fineLevel, "CoarseMap"); + + // coarseMap is being used to set up the domain map of tentative P, and therefore, the row map of Ac + // Therefore, if we amalgamate coarseMap, logical nodes in the coordinates vector would correspond to + // logical blocks in the matrix + LO blkSize = 1; + if (rcp_dynamic_cast(coarseMap) != Teuchos::null) + blkSize = rcp_dynamic_cast(coarseMap)->getFixedBlockSize(); + + RCP coarseCoordMap; + RCP uniqueMap = fineCoords->getMap(); + if (blkSize > 1) { + // If the block size is greater than one, we need to create a coarse coordinate map + // FIXME: The amalgamation should really be done on device. + GO indexBase = coarseMap->getIndexBase(); + ArrayView elementAList = coarseMap->getLocalElementList(); + size_t numElements = elementAList.size() / blkSize; + Array elementList(numElements); + + // Amalgamate the map + for (LO i = 0; i < Teuchos::as(numElements); i++) + elementList[i] = (elementAList[i * blkSize] - indexBase) / blkSize + indexBase; - } else { - if (coarseLevel.IsAvailable("Coordinates", this)) { - GetOStream(Runtime0) << "Reusing coordinates" << std::endl; - return; - } - - fineCoords = Get< RCP >(fineLevel, "Coordinates"); - RCP coarseMap = Get< RCP > (fineLevel, "CoarseMap"); - - // coarseMap is being used to set up the domain map of tentative P, and therefore, the row map of Ac - // Therefore, if we amalgamate coarseMap, logical nodes in the coordinates vector would correspond to - // logical blocks in the matrix - LO blkSize = 1; - if (rcp_dynamic_cast(coarseMap) != Teuchos::null) - blkSize = rcp_dynamic_cast(coarseMap)->getFixedBlockSize(); - - RCP coarseCoordMap; - RCP uniqueMap = fineCoords->getMap(); - if(blkSize > 1) { - // If the block size is greater than one, we need to create a coarse coordinate map - // FIXME: The amalgamation should really be done on device. - GO indexBase = coarseMap->getIndexBase(); - ArrayView elementAList = coarseMap->getLocalElementList(); - size_t numElements = elementAList.size() / blkSize; - Array elementList(numElements); - - // Amalgamate the map - for (LO i = 0; i < Teuchos::as(numElements); i++) - elementList[i] = (elementAList[i*blkSize]-indexBase)/blkSize + indexBase; - - { - SubFactoryMonitor sfm(*this, "MapFactory: coarseCoordMap", fineLevel); - coarseCoordMap = MapFactory ::Build(coarseMap->lib(), Teuchos::OrdinalTraits::invalid(), elementList, indexBase, coarseMap->getComm()); - } - } - else { - // If the block size is one, we can just use the coarse map for coordinates - coarseCoordMap = coarseMap; + { + SubFactoryMonitor sfm(*this, "MapFactory: coarseCoordMap", fineLevel); + coarseCoordMap = MapFactory ::Build(coarseMap->lib(), Teuchos::OrdinalTraits::invalid(), elementList, indexBase, coarseMap->getComm()); } + } else { + // If the block size is one, we can just use the coarse map for coordinates + coarseCoordMap = coarseMap; + } - // Build the coarseCoords MultiVector - coarseCoords = Xpetra::MultiVectorFactory::magnitudeType,LO,GO,NO>::Build(coarseCoordMap, fineCoords->getNumVectors()); - + // Build the coarseCoords MultiVector + coarseCoords = Xpetra::MultiVectorFactory::magnitudeType, LO, GO, NO>::Build(coarseCoordMap, fineCoords->getNumVectors()); - RCP aggregates; - bool aggregatesCrossProcessors; - aggregates = Get >(fineLevel, "Aggregates"); - aggregatesCrossProcessors = aggregates->AggregatesCrossProcessors(); + RCP aggregates; + bool aggregatesCrossProcessors; + aggregates = Get >(fineLevel, "Aggregates"); + aggregatesCrossProcessors = aggregates->AggregatesCrossProcessors(); - // Create overlapped fine coordinates to reduce global communication - RCP ghostedCoords = fineCoords; - if (aggregatesCrossProcessors) { - RCP nonUniqueMap = aggregates->GetMap(); - RCP importer = ImportFactory::Build(uniqueMap, nonUniqueMap); + // Create overlapped fine coordinates to reduce global communication + RCP ghostedCoords = fineCoords; + if (aggregatesCrossProcessors) { + RCP nonUniqueMap = aggregates->GetMap(); + RCP importer = ImportFactory::Build(uniqueMap, nonUniqueMap); - ghostedCoords = Xpetra::MultiVectorFactory::magnitudeType,LO,GO,NO>::Build(nonUniqueMap, fineCoords->getNumVectors()); - ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT); - } + ghostedCoords = Xpetra::MultiVectorFactory::magnitudeType, LO, GO, NO>::Build(nonUniqueMap, fineCoords->getNumVectors()); + ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT); + } - // The good news is that this graph has already been constructed for the - // TentativePFactory and was cached in Aggregates. So this is a no-op. - auto aggGraph = aggregates->GetGraph(); - auto numAggs = aggGraph.numRows(); + // The good news is that this graph has already been constructed for the + // TentativePFactory and was cached in Aggregates. So this is a no-op. + auto aggGraph = aggregates->GetGraph(); + auto numAggs = aggGraph.numRows(); - auto fineCoordsView = ghostedCoords->getDeviceLocalView(Xpetra::Access::ReadOnly); - auto coarseCoordsView = coarseCoords->getDeviceLocalView(Xpetra::Access::OverwriteAll); + auto fineCoordsView = ghostedCoords->getDeviceLocalView(Xpetra::Access::ReadOnly); + auto coarseCoordsView = coarseCoords->getDeviceLocalView(Xpetra::Access::OverwriteAll); - // Fill in coarse coordinates - { - SubFactoryMonitor m2(*this, "AverageCoords", coarseLevel); + // Fill in coarse coordinates + { + SubFactoryMonitor m2(*this, "AverageCoords", coarseLevel); - const auto dim = ghostedCoords->getNumVectors(); + const auto dim = ghostedCoords->getNumVectors(); - typename AppendTrait::type fineCoordsRandomView = fineCoordsView; - for (size_t j = 0; j < dim; j++) { - Kokkos::parallel_for("MueLu:CoordinatesTransferF:Build:coord", Kokkos::RangePolicy(0, numAggs), - KOKKOS_LAMBDA(const LO i) { - // A row in this graph represents all node ids in the aggregate - // Therefore, averaging is very easy + typename AppendTrait::type fineCoordsRandomView = fineCoordsView; + for (size_t j = 0; j < dim; j++) { + Kokkos::parallel_for( + "MueLu:CoordinatesTransferF:Build:coord", Kokkos::RangePolicy(0, numAggs), + KOKKOS_LAMBDA(const LO i) { + // A row in this graph represents all node ids in the aggregate + // Therefore, averaging is very easy - auto aggregate = aggGraph.rowConst(i); + auto aggregate = aggGraph.rowConst(i); - typename Teuchos::ScalarTraits::magnitudeType sum = 0.0; // do not use Scalar here (Stokhos) - for (size_t colID = 0; colID < static_cast(aggregate.length); colID++) - sum += fineCoordsRandomView(aggregate(colID),j); + typename Teuchos::ScalarTraits::magnitudeType sum = 0.0; // do not use Scalar here (Stokhos) + for (size_t colID = 0; colID < static_cast(aggregate.length); colID++) + sum += fineCoordsRandomView(aggregate(colID), j); - coarseCoordsView(i,j) = sum / aggregate.length; - }); - } + coarseCoordsView(i, j) = sum / aggregate.length; + }); } - - Set >(coarseLevel, "Coordinates", coarseCoords); - } - int writeStart = pL.get("write start"), writeEnd = pL.get("write end"); - if (writeStart == 0 && fineLevel.GetLevelID() == 0 && writeStart <= writeEnd) { - std::ostringstream buf; - buf << fineLevel.GetLevelID(); - std::string fileName = "coordinates_before_rebalance_level_" + buf.str() + ".m"; - Xpetra::IO::magnitudeType,LO,GO,NO>::Write(fileName,*fineCoords); - } - if (writeStart <= coarseLevel.GetLevelID() && coarseLevel.GetLevelID() <= writeEnd) { - std::ostringstream buf; - buf << coarseLevel.GetLevelID(); - std::string fileName = "coordinates_before_rebalance_level_" + buf.str() + ".m"; - Xpetra::IO::magnitudeType,LO,GO,NO>::Write(fileName,*coarseCoords); - } + Set >(coarseLevel, "Coordinates", coarseCoords); + } + + int writeStart = pL.get("write start"), writeEnd = pL.get("write end"); + if (writeStart == 0 && fineLevel.GetLevelID() == 0 && writeStart <= writeEnd) { + std::ostringstream buf; + buf << fineLevel.GetLevelID(); + std::string fileName = "coordinates_before_rebalance_level_" + buf.str() + ".m"; + Xpetra::IO::magnitudeType, LO, GO, NO>::Write(fileName, *fineCoords); + } + if (writeStart <= coarseLevel.GetLevelID() && coarseLevel.GetLevelID() <= writeEnd) { + std::ostringstream buf; + buf << coarseLevel.GetLevelID(); + std::string fileName = "coordinates_before_rebalance_level_" + buf.str() + ".m"; + Xpetra::IO::magnitudeType, LO, GO, NO>::Write(fileName, *coarseCoords); } +} -} // namespace MueLu +} // namespace MueLu -#endif // MUELU_COORDINATESTRANSFER_FACTORY_DEF_HPP +#endif // MUELU_COORDINATESTRANSFER_FACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_DemoFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_DemoFactory_decl.hpp index 75931f90de75..b152ddba9d32 100644 --- a/packages/muelu/src/Misc/MueLu_DemoFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_DemoFactory_decl.hpp @@ -52,57 +52,57 @@ namespace MueLu { - /*! - @class DemoFactory class. - @brief empty factory for demonstration - */ - - template - class DemoFactory : public SingleLevelFactoryBase { +/*! + @class DemoFactory class. + @brief empty factory for demonstration +*/ + +template +class DemoFactory : public SingleLevelFactoryBase { #undef MUELU_DEMOFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - DemoFactory(); + //! Constructor. + DemoFactory(); - //! Destructor. - virtual ~DemoFactory(); + //! Destructor. + virtual ~DemoFactory(); - //@} + //@} - //! @name Input - //@{ + //! @name Input + //@{ - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + /*! @brief Specifies the data that this class needs, and the factories that generate that data. - If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class - will fall back to the settings in FactoryManager. - */ - void DeclareInput(Level ¤tLevel) const; + If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class + will fall back to the settings in FactoryManager. + */ + void DeclareInput(Level ¤tLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - //! Build an object with this factory. - void Build(Level & currentLevel) const; + //! Build an object with this factory. + void Build(Level ¤tLevel) const; - //@} + //@} - private: - // TODO add member variables + private: + // TODO add member variables - }; // class DemoFactory +}; // class DemoFactory -} // namespace MueLu +} // namespace MueLu #define MUELU_DEMOFACTORY_SHORT -#endif // MUELU_DEMOFACTORY_DECL_HPP +#endif // MUELU_DEMOFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_DemoFactory_def.hpp b/packages/muelu/src/Misc/MueLu_DemoFactory_def.hpp index b390142ca2f8..cc7a327b79f9 100644 --- a/packages/muelu/src/Misc/MueLu_DemoFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_DemoFactory_def.hpp @@ -55,25 +55,24 @@ namespace MueLu { - template - DemoFactory::DemoFactory() - { } +template +DemoFactory::DemoFactory() {} - template - DemoFactory::~DemoFactory() {} +template +DemoFactory::~DemoFactory() {} - template - void DemoFactory::DeclareInput(Level &/* currentLevel */) const { - // TODO: declare input for factory - //Input(currentLevel, varName_); - } +template +void DemoFactory::DeclareInput(Level& /* currentLevel */) const { + // TODO: declare input for factory + // Input(currentLevel, varName_); +} - template - void DemoFactory::Build(Level & /* currentLevel */) const { - // TODO: implement factory - } +template +void DemoFactory::Build(Level& /* currentLevel */) const { + // TODO: implement factory +} -} // namespace MueLu +} // namespace MueLu #define MUELU_DEMOFACTORY_SHORT -#endif // MUELU_DEMOFACTORY_DEF_HPP +#endif // MUELU_DEMOFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_decl.hpp index ab99f2f9b964..82a467269b53 100644 --- a/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_decl.hpp @@ -55,58 +55,57 @@ namespace MueLu { - /*! - @class DropNegativeEntriesFactory class. - @brief Application-specific filtering for A. Can be used in context of graph coarsening and aggregation. - - This factory drops all negative entries (or entries with a magnitude < 0). Only weak positive connections are kept. - Do not use this kind of filtering for regular PDEs unless you have very good reasons. - */ - - template - class DropNegativeEntriesFactory : public SingleLevelFactoryBase { +/*! + @class DropNegativeEntriesFactory class. + @brief Application-specific filtering for A. Can be used in context of graph coarsening and aggregation. + + This factory drops all negative entries (or entries with a magnitude < 0). Only weak positive connections are kept. + Do not use this kind of filtering for regular PDEs unless you have very good reasons. +*/ + +template +class DropNegativeEntriesFactory : public SingleLevelFactoryBase { #undef MUELU_DROPNEGATIVEENTRIESFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - DropNegativeEntriesFactory() { } + DropNegativeEntriesFactory() {} - //! Destructor. - virtual ~DropNegativeEntriesFactory() { } + //! Destructor. + virtual ~DropNegativeEntriesFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! - @brief Build method. + /*! + @brief Build method. - Builds filtered matrix and returns it in currentLevel. - */ - void Build(Level& currentLevel) const; + Builds filtered matrix and returns it in currentLevel. + */ + void Build(Level& currentLevel) const; - //@} + //@} - }; //class DropNegativeEntriesFactory +}; // class DropNegativeEntriesFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_DROPNEGATIVEENTRIESFACTORY_SHORT -#endif // MUELU_DROPNEGATIVEENTRIESFACTORY_DECL_HPP +#endif // MUELU_DROPNEGATIVEENTRIESFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_def.hpp b/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_def.hpp index 1bf7b0bfd4c8..4e4882ad5a0d 100644 --- a/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_def.hpp @@ -60,76 +60,76 @@ namespace MueLu { - template - RCP DropNegativeEntriesFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP DropNegativeEntriesFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) -#undef SET_VALID_ENTRY +#undef SET_VALID_ENTRY - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A used for filtering"); + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A used for filtering"); - return validParamList; - } + return validParamList; +} - template - void DropNegativeEntriesFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - } +template +void DropNegativeEntriesFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); +} - template - void DropNegativeEntriesFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Matrix filtering (springs)", currentLevel); +template +void DropNegativeEntriesFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Matrix filtering (springs)", currentLevel); - RCP Ain = Get< RCP >(currentLevel, "A"); + RCP Ain = Get >(currentLevel, "A"); - LocalOrdinal nDofsPerNode = Ain->GetFixedBlockSize(); + LocalOrdinal nDofsPerNode = Ain->GetFixedBlockSize(); - // create new empty Operator - Teuchos::RCP Aout = MatrixFactory::Build(Ain->getRowMap(), Ain->getGlobalMaxNumRowEntries()); + // create new empty Operator + Teuchos::RCP Aout = MatrixFactory::Build(Ain->getRowMap(), Ain->getGlobalMaxNumRowEntries()); - size_t numLocalRows = Ain->getLocalNumRows(); - for(size_t row=0; rowgetRowMap()->getGlobalElement(row); + size_t numLocalRows = Ain->getLocalNumRows(); + for (size_t row = 0; row < numLocalRows; row++) { + GlobalOrdinal grid = Ain->getRowMap()->getGlobalElement(row); - int rDofID = Teuchos::as(grid % nDofsPerNode); + int rDofID = Teuchos::as(grid % nDofsPerNode); - // extract row information from input matrix - Teuchos::ArrayView indices; - Teuchos::ArrayView vals; - Ain->getLocalRowView(row, indices, vals); + // extract row information from input matrix + Teuchos::ArrayView indices; + Teuchos::ArrayView vals; + Ain->getLocalRowView(row, indices, vals); - // just copy all values in output - Teuchos::ArrayRCP indout(indices.size(),Teuchos::ScalarTraits::zero()); - Teuchos::ArrayRCP valout(indices.size(),Teuchos::ScalarTraits::zero()); + // just copy all values in output + Teuchos::ArrayRCP indout(indices.size(), Teuchos::ScalarTraits::zero()); + Teuchos::ArrayRCP valout(indices.size(), Teuchos::ScalarTraits::zero()); - size_t nNonzeros = 0; - for(size_t i=0; i<(size_t)indices.size(); i++) { - GlobalOrdinal gcid = Ain->getColMap()->getGlobalElement(indices[i]); // global column id + size_t nNonzeros = 0; + for (size_t i = 0; i < (size_t)indices.size(); i++) { + GlobalOrdinal gcid = Ain->getColMap()->getGlobalElement(indices[i]); // global column id - int cDofID = Teuchos::as(gcid % nDofsPerNode); - if(rDofID == cDofID && Teuchos::ScalarTraits::magnitude(vals[i]) >= Teuchos::ScalarTraits::magnitude(Teuchos::ScalarTraits::zero())) { - indout [nNonzeros] = gcid; - valout [nNonzeros] = vals[i]; - nNonzeros++; - } + int cDofID = Teuchos::as(gcid % nDofsPerNode); + if (rDofID == cDofID && Teuchos::ScalarTraits::magnitude(vals[i]) >= Teuchos::ScalarTraits::magnitude(Teuchos::ScalarTraits::zero())) { + indout[nNonzeros] = gcid; + valout[nNonzeros] = vals[i]; + nNonzeros++; } - indout.resize(nNonzeros); - valout.resize(nNonzeros); - - Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size())); } + indout.resize(nNonzeros); + valout.resize(nNonzeros); - Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap()); + Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), indout.view(0, indout.size()), valout.view(0, valout.size())); + } - // copy block size information - Aout->SetFixedBlockSize(nDofsPerNode); + Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap()); - GetOStream(Statistics0, 0) << "Nonzeros in A (input): " << Ain->getGlobalNumEntries() << ", Nonzeros after filtering A: " << Aout->getGlobalNumEntries() << std::endl; + // copy block size information + Aout->SetFixedBlockSize(nDofsPerNode); - Set(currentLevel, "A", Aout); - } + GetOStream(Statistics0, 0) << "Nonzeros in A (input): " << Ain->getGlobalNumEntries() << ", Nonzeros after filtering A: " << Aout->getGlobalNumEntries() << std::endl; + + Set(currentLevel, "A", Aout); +} -} //namespace MueLu +} // namespace MueLu -#endif // MUELU_DROPNEGATIVEENTRIESFACTORY_DEF_HPP +#endif // MUELU_DROPNEGATIVEENTRIESFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_FilteredAFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_FilteredAFactory_decl.hpp index 23f56137596d..bd278f9410d5 100644 --- a/packages/muelu/src/Misc/MueLu_FilteredAFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_FilteredAFactory_decl.hpp @@ -58,60 +58,59 @@ #include "MueLu_Aggregates_fwd.hpp" namespace MueLu { - /*! - @class FilteredAFactory class. - @brief Factory for building filtered matrices using filtered graphs. - */ - - template - class FilteredAFactory : public SingleLevelFactoryBase { +/*! + @class FilteredAFactory class. + @brief Factory for building filtered matrices using filtered graphs. +*/ + +template +class FilteredAFactory : public SingleLevelFactoryBase { #undef MUELU_FILTEREDAFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - FilteredAFactory() { } + FilteredAFactory() {} - //! Destructor. - virtual ~FilteredAFactory() { } + //! Destructor. + virtual ~FilteredAFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! - @brief Build method. + /*! + @brief Build method. - Builds filtered matrix and returns it in currentLevel. - */ - void Build(Level& currentLevel) const; + Builds filtered matrix and returns it in currentLevel. + */ + void Build(Level& currentLevel) const; - //@} - private: - void BuildReuse(const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const; - void BuildNew (const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const; - void BuildNewUsingRootStencil(const Matrix& A, const GraphBase& G, double dirichletThresh, Level& currentLevel, Matrix& filteredA, bool use_spread_lumping, double DdomAllowGrowthRate, double DdomCap) const; - void ExperimentalLumping(const Matrix& A, Matrix& filteredA, double rho, double rho2) const; + //@} + private: + void BuildReuse(const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const; + void BuildNew(const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const; + void BuildNewUsingRootStencil(const Matrix& A, const GraphBase& G, double dirichletThresh, Level& currentLevel, Matrix& filteredA, bool use_spread_lumping, double DdomAllowGrowthRate, double DdomCap) const; + void ExperimentalLumping(const Matrix& A, Matrix& filteredA, double rho, double rho2) const; - }; //class FilteredAFactory +}; // class FilteredAFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_FILTEREDAFACTORY_SHORT -#endif // MUELU_FILTEREDAFACTORY_DECL_HPP +#endif // MUELU_FILTEREDAFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_FilteredAFactory_def.hpp b/packages/muelu/src/Misc/MueLu_FilteredAFactory_def.hpp index 2828b3c0cb0e..702aaa763111 100644 --- a/packages/muelu/src/Misc/MueLu_FilteredAFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_FilteredAFactory_def.hpp @@ -62,174 +62,162 @@ // Variable to enable lots of debug output #define MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING 0 - namespace MueLu { - template - void sort_and_unique(T & array) { - std::sort(array.begin(),array.end()); - std::unique(array.begin(),array.end()); - } - +template +void sort_and_unique(T& array) { + std::sort(array.begin(), array.end()); + std::unique(array.begin(), array.end()); +} - - template - RCP FilteredAFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP FilteredAFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("filtered matrix: use lumping"); - SET_VALID_ENTRY("filtered matrix: reuse graph"); - SET_VALID_ENTRY("filtered matrix: reuse eigenvalue"); - SET_VALID_ENTRY("filtered matrix: use root stencil"); - SET_VALID_ENTRY("filtered matrix: use spread lumping"); - SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor"); - SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap"); - SET_VALID_ENTRY("filtered matrix: Dirichlet threshold"); -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A used for filtering"); - validParamList->set< RCP >("Graph", Teuchos::null, "Generating factory for coalesced filtered graph"); - validParamList->set< RCP >("Filtering", Teuchos::null, "Generating factory for filtering boolean"); - - - // Only need these for the "use root stencil" option - validParamList->set< RCP >("Aggregates", Teuchos::null, "Generating factory of the aggregates"); - validParamList->set< RCP >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo"); - return validParamList; + SET_VALID_ENTRY("filtered matrix: use lumping"); + SET_VALID_ENTRY("filtered matrix: reuse graph"); + SET_VALID_ENTRY("filtered matrix: reuse eigenvalue"); + SET_VALID_ENTRY("filtered matrix: use root stencil"); + SET_VALID_ENTRY("filtered matrix: use spread lumping"); + SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor"); + SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap"); + SET_VALID_ENTRY("filtered matrix: Dirichlet threshold"); +#undef SET_VALID_ENTRY + + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A used for filtering"); + validParamList->set >("Graph", Teuchos::null, "Generating factory for coalesced filtered graph"); + validParamList->set >("Filtering", Teuchos::null, "Generating factory for filtering boolean"); + + // Only need these for the "use root stencil" option + validParamList->set >("Aggregates", Teuchos::null, "Generating factory of the aggregates"); + validParamList->set >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo"); + return validParamList; +} + +template +void FilteredAFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + Input(currentLevel, "Filtering"); + Input(currentLevel, "Graph"); + const ParameterList& pL = GetParameterList(); + if (pL.isParameter("filtered matrix: use root stencil") && pL.get("filtered matrix: use root stencil") == true) { + Input(currentLevel, "Aggregates"); + Input(currentLevel, "UnAmalgamationInfo"); } +} - template - void FilteredAFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - Input(currentLevel, "Filtering"); - Input(currentLevel, "Graph"); - const ParameterList& pL = GetParameterList(); - if(pL.isParameter("filtered matrix: use root stencil") && pL.get("filtered matrix: use root stencil") == true){ - Input(currentLevel, "Aggregates"); - Input(currentLevel, "UnAmalgamationInfo"); - } - } +template +void FilteredAFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Matrix filtering", currentLevel); - template - void FilteredAFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Matrix filtering", currentLevel); - - RCP A = Get< RCP >(currentLevel, "A"); - if (Get(currentLevel, "Filtering") == false) { - GetOStream(Runtime0) << "Filtered matrix is not being constructed as no filtering is being done" << std::endl; - Set(currentLevel, "A", A); - return; - } + RCP A = Get >(currentLevel, "A"); + if (Get(currentLevel, "Filtering") == false) { + GetOStream(Runtime0) << "Filtered matrix is not being constructed as no filtering is being done" << std::endl; + Set(currentLevel, "A", A); + return; + } - const ParameterList& pL = GetParameterList(); - bool lumping = pL.get("filtered matrix: use lumping"); - if (lumping) - GetOStream(Runtime0) << "Lumping dropped entries" << std::endl; + const ParameterList& pL = GetParameterList(); + bool lumping = pL.get("filtered matrix: use lumping"); + if (lumping) + GetOStream(Runtime0) << "Lumping dropped entries" << std::endl; - bool use_spread_lumping = pL.get("filtered matrix: use spread lumping"); - if (use_spread_lumping && (!lumping) ) - throw std::runtime_error("Must also request 'filtered matrix: use lumping' in order to use spread lumping"); + bool use_spread_lumping = pL.get("filtered matrix: use spread lumping"); + if (use_spread_lumping && (!lumping)) + throw std::runtime_error("Must also request 'filtered matrix: use lumping' in order to use spread lumping"); - if (use_spread_lumping) { - GetOStream(Runtime0) << "using spread lumping " << std::endl; - } + if (use_spread_lumping) { + GetOStream(Runtime0) << "using spread lumping " << std::endl; + } - double DdomAllowGrowthRate = 1.1; - double DdomCap = 2.0; - if (use_spread_lumping) { - DdomAllowGrowthRate = pL.get("filtered matrix: spread lumping diag dom growth factor"); - DdomCap = pL.get("filtered matrix: spread lumping diag dom cap"); - } - bool use_root_stencil = lumping && pL.get("filtered matrix: use root stencil"); - if (use_root_stencil) - GetOStream(Runtime0) << "Using root stencil for dropping" << std::endl; - double dirichlet_threshold = pL.get("filtered matrix: Dirichlet threshold"); - if(dirichlet_threshold >= 0.0) - GetOStream(Runtime0) << "Filtering Dirichlet threshold of "<("filtered matrix: reuse graph")) - GetOStream(Runtime0) << "Reusing graph"< G = Get< RCP >(currentLevel, "Graph"); - if(MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING) - { - FILE * f = fopen("graph.dat","w"); - size_t numGRows = G->GetNodeNumVertices(); - for (size_t i = 0; i < numGRows; i++) { - // Set up filtering array - ArrayView indsG = G->getNeighborVertices(i); - for(size_t j=0; j<(size_t)indsG.size(); j++) { - fprintf(f,"%d %d 1.0\n",(int)i,(int)indsG[j]); - } + double DdomAllowGrowthRate = 1.1; + double DdomCap = 2.0; + if (use_spread_lumping) { + DdomAllowGrowthRate = pL.get("filtered matrix: spread lumping diag dom growth factor"); + DdomCap = pL.get("filtered matrix: spread lumping diag dom cap"); + } + bool use_root_stencil = lumping && pL.get("filtered matrix: use root stencil"); + if (use_root_stencil) + GetOStream(Runtime0) << "Using root stencil for dropping" << std::endl; + double dirichlet_threshold = pL.get("filtered matrix: Dirichlet threshold"); + if (dirichlet_threshold >= 0.0) + GetOStream(Runtime0) << "Filtering Dirichlet threshold of " << dirichlet_threshold << std::endl; + + if (use_root_stencil || pL.get("filtered matrix: reuse graph")) + GetOStream(Runtime0) << "Reusing graph" << std::endl; + else + GetOStream(Runtime0) << "Generating new graph" << std::endl; + + RCP G = Get >(currentLevel, "Graph"); + if (MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING) { + FILE* f = fopen("graph.dat", "w"); + size_t numGRows = G->GetNodeNumVertices(); + for (size_t i = 0; i < numGRows; i++) { + // Set up filtering array + ArrayView indsG = G->getNeighborVertices(i); + for (size_t j = 0; j < (size_t)indsG.size(); j++) { + fprintf(f, "%d %d 1.0\n", (int)i, (int)indsG[j]); } - fclose(f); } + fclose(f); + } - RCP fillCompleteParams(new ParameterList); - fillCompleteParams->set("No Nonlocal Changes", true); - - RCP filteredA; - if(use_root_stencil) { - filteredA = MatrixFactory::Build(A->getCrsGraph()); - filteredA->fillComplete(fillCompleteParams); - filteredA->resumeFill(); - BuildNewUsingRootStencil(*A, *G, dirichlet_threshold, currentLevel,*filteredA, use_spread_lumping,DdomAllowGrowthRate, DdomCap); - filteredA->fillComplete(fillCompleteParams); - - } - else if (pL.get("filtered matrix: reuse graph")) { - filteredA = MatrixFactory::Build(A->getCrsGraph()); - filteredA->resumeFill(); - BuildReuse(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold,*filteredA); - // only lump inside BuildReuse if lumping is true and use_spread_lumping is false - // note: they use_spread_lumping cannot be true if lumping is false - - if (use_spread_lumping) ExperimentalLumping(*A, *filteredA, DdomAllowGrowthRate, DdomCap); - filteredA->fillComplete(fillCompleteParams); - - } else { - - filteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getLocalMaxNumRowEntries()); - BuildNew(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold,*filteredA); - // only lump inside BuildNew if lumping is true and use_spread_lumping is false - // note: they use_spread_lumping cannot be true if lumping is false - if (use_spread_lumping) ExperimentalLumping(*A, *filteredA, DdomAllowGrowthRate, DdomCap); - filteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams); - } - - - - if(MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING) - { - Xpetra::IO::Write("filteredA.dat", *filteredA); - - //original filtered A and actual A - Xpetra::IO::Write("A.dat", *A); - RCP origFilteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getLocalMaxNumRowEntries()); - BuildNew(*A, *G, lumping, dirichlet_threshold,*origFilteredA); - if (use_spread_lumping) ExperimentalLumping(*A, *origFilteredA, DdomAllowGrowthRate, DdomCap); - origFilteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams); - Xpetra::IO::Write("origFilteredA.dat", *origFilteredA); - } + RCP fillCompleteParams(new ParameterList); + fillCompleteParams->set("No Nonlocal Changes", true); + + RCP filteredA; + if (use_root_stencil) { + filteredA = MatrixFactory::Build(A->getCrsGraph()); + filteredA->fillComplete(fillCompleteParams); + filteredA->resumeFill(); + BuildNewUsingRootStencil(*A, *G, dirichlet_threshold, currentLevel, *filteredA, use_spread_lumping, DdomAllowGrowthRate, DdomCap); + filteredA->fillComplete(fillCompleteParams); + + } else if (pL.get("filtered matrix: reuse graph")) { + filteredA = MatrixFactory::Build(A->getCrsGraph()); + filteredA->resumeFill(); + BuildReuse(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold, *filteredA); + // only lump inside BuildReuse if lumping is true and use_spread_lumping is false + // note: they use_spread_lumping cannot be true if lumping is false + + if (use_spread_lumping) ExperimentalLumping(*A, *filteredA, DdomAllowGrowthRate, DdomCap); + filteredA->fillComplete(fillCompleteParams); + + } else { + filteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getLocalMaxNumRowEntries()); + BuildNew(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold, *filteredA); + // only lump inside BuildNew if lumping is true and use_spread_lumping is false + // note: they use_spread_lumping cannot be true if lumping is false + if (use_spread_lumping) ExperimentalLumping(*A, *filteredA, DdomAllowGrowthRate, DdomCap); + filteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams); + } + if (MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING) { + Xpetra::IO::Write("filteredA.dat", *filteredA); - filteredA->SetFixedBlockSize(A->GetFixedBlockSize()); + // original filtered A and actual A + Xpetra::IO::Write("A.dat", *A); + RCP origFilteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getLocalMaxNumRowEntries()); + BuildNew(*A, *G, lumping, dirichlet_threshold, *origFilteredA); + if (use_spread_lumping) ExperimentalLumping(*A, *origFilteredA, DdomAllowGrowthRate, DdomCap); + origFilteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams); + Xpetra::IO::Write("origFilteredA.dat", *origFilteredA); + } - if (pL.get("filtered matrix: reuse eigenvalue")) { - // Reuse max eigenvalue from A - // It is unclear what eigenvalue is the best for the smoothing, but we already may have - // the D^{-1}A estimate in A, may as well use it. - // NOTE: ML does that too - filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate()); - } + filteredA->SetFixedBlockSize(A->GetFixedBlockSize()); - Set(currentLevel, "A", filteredA); + if (pL.get("filtered matrix: reuse eigenvalue")) { + // Reuse max eigenvalue from A + // It is unclear what eigenvalue is the best for the smoothing, but we already may have + // the D^{-1}A estimate in A, may as well use it. + // NOTE: ML does that too + filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate()); } + Set(currentLevel, "A", filteredA); +} + // Epetra's API allows direct access to row array. // Tpetra's API does not, providing only ArrayView // But in most situations we are currently interested in, it is safe to assume @@ -238,801 +226,794 @@ namespace MueLu { // replaceLocalValues() call which is quite expensive due to all the searches. //#define ASSUME_DIRECT_ACCESS_TO_ROW // See github issue 10883#issuecomment-1256676340 - // Both Epetra and Tpetra matrix-matrix multiply use the following trick: - // if an entry of the left matrix is zero, it does not compute or store the - // zero value. - // - // This trick allows us to bypass constructing a new matrix. Instead, we - // make a deep copy of the original one, and fill it in with zeros, which - // are ignored during the prolongator smoothing. - template - void FilteredAFactory:: - BuildReuse(const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const { - using TST = typename Teuchos::ScalarTraits; - SC zero = TST::zero(); - - - size_t blkSize = A.GetFixedBlockSize(); - - ArrayView inds; - ArrayView valsA; +// Both Epetra and Tpetra matrix-matrix multiply use the following trick: +// if an entry of the left matrix is zero, it does not compute or store the +// zero value. +// +// This trick allows us to bypass constructing a new matrix. Instead, we +// make a deep copy of the original one, and fill it in with zeros, which +// are ignored during the prolongator smoothing. +template +void FilteredAFactory:: + BuildReuse(const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const { + using TST = typename Teuchos::ScalarTraits; + SC zero = TST::zero(); + + size_t blkSize = A.GetFixedBlockSize(); + + ArrayView inds; + ArrayView valsA; #ifdef ASSUME_DIRECT_ACCESS_TO_ROW - ArrayView vals; + ArrayView vals; #else - Array vals; + Array vals; #endif - Array filter( std::max(blkSize*G.GetImportMap()->getLocalNumElements(), - A.getColMap()->getLocalNumElements()), - 0); + Array filter(std::max(blkSize * G.GetImportMap()->getLocalNumElements(), + A.getColMap()->getLocalNumElements()), + 0); - size_t numGRows = G.GetNodeNumVertices(); - for (size_t i = 0; i < numGRows; i++) { - // Set up filtering array - ArrayView indsG = G.getNeighborVertices(i); - for (size_t j = 0; j < as(indsG.size()); j++) - for (size_t k = 0; k < blkSize; k++) - filter[indsG[j]*blkSize+k] = 1; + size_t numGRows = G.GetNodeNumVertices(); + for (size_t i = 0; i < numGRows; i++) { + // Set up filtering array + ArrayView indsG = G.getNeighborVertices(i); + for (size_t j = 0; j < as(indsG.size()); j++) + for (size_t k = 0; k < blkSize; k++) + filter[indsG[j] * blkSize + k] = 1; - for (size_t k = 0; k < blkSize; k++) { - LO row = i*blkSize + k; + for (size_t k = 0; k < blkSize; k++) { + LO row = i * blkSize + k; - A.getLocalRowView(row, inds, valsA); + A.getLocalRowView(row, inds, valsA); - size_t nnz = inds.size(); - if (nnz == 0) - continue; + size_t nnz = inds.size(); + if (nnz == 0) + continue; #ifdef ASSUME_DIRECT_ACCESS_TO_ROW - // Transform ArrayView into ArrayView - ArrayView vals1; - filteredA.getLocalRowView(row, inds, vals1); - vals = ArrayView(const_cast(vals1.getRawPtr()), nnz); + // Transform ArrayView into ArrayView + ArrayView vals1; + filteredA.getLocalRowView(row, inds, vals1); + vals = ArrayView(const_cast(vals1.getRawPtr()), nnz); - memcpy(vals.getRawPtr(), valsA.getRawPtr(), nnz*sizeof(SC)); + memcpy(vals.getRawPtr(), valsA.getRawPtr(), nnz * sizeof(SC)); #else - vals = Array(valsA); + vals = Array(valsA); #endif - SC ZERO = Teuchos::ScalarTraits::zero(); - // SC ONE = Teuchos::ScalarTraits::one(); - SC A_rowsum = ZERO, F_rowsum = ZERO; - for(LO l = 0; l < (LO)inds.size(); l++) - A_rowsum += valsA[l]; + SC ZERO = Teuchos::ScalarTraits::zero(); + // SC ONE = Teuchos::ScalarTraits::one(); + SC A_rowsum = ZERO, F_rowsum = ZERO; + for (LO l = 0; l < (LO)inds.size(); l++) + A_rowsum += valsA[l]; - if (lumping == false) { - for (size_t j = 0; j < nnz; j++) - if (!filter[inds[j]]) - vals[j] = zero; + if (lumping == false) { + for (size_t j = 0; j < nnz; j++) + if (!filter[inds[j]]) + vals[j] = zero; - } else { - LO diagIndex = -1; - SC diagExtra = zero; - - for (size_t j = 0; j < nnz; j++) { - if (filter[inds[j]]) { - if (inds[j] == row) { - // Remember diagonal position - diagIndex = j; - } - continue; + } else { + LO diagIndex = -1; + SC diagExtra = zero; + + for (size_t j = 0; j < nnz; j++) { + if (filter[inds[j]]) { + if (inds[j] == row) { + // Remember diagonal position + diagIndex = j; } + continue; + } - diagExtra += vals[j]; + diagExtra += vals[j]; - vals[j] = zero; - } + vals[j] = zero; + } - // Lump dropped entries - // NOTE - // * Does it make sense to lump for elasticity? - // * Is it different for diffusion and elasticity? - //SC diagA = ZERO; - if (diagIndex != -1) { - //diagA = vals[diagIndex]; - vals[diagIndex] += diagExtra; - if(dirichletThresh >= 0.0 && TST::real(vals[diagIndex]) <= dirichletThresh) { - - // printf("WARNING: row %d diag(Afiltered) = %8.2e diag(A)=%8.2e\n",row,vals[diagIndex],diagA); - for(LO l = 0; l < (LO)nnz; l++) - F_rowsum += vals[l]; - // printf(" : A rowsum = %8.2e F rowsum = %8.2e\n",A_rowsum,F_rowsum); - vals[diagIndex] = TST::one(); - } + // Lump dropped entries + // NOTE + // * Does it make sense to lump for elasticity? + // * Is it different for diffusion and elasticity? + // SC diagA = ZERO; + if (diagIndex != -1) { + // diagA = vals[diagIndex]; + vals[diagIndex] += diagExtra; + if (dirichletThresh >= 0.0 && TST::real(vals[diagIndex]) <= dirichletThresh) { + // printf("WARNING: row %d diag(Afiltered) = %8.2e diag(A)=%8.2e\n",row,vals[diagIndex],diagA); + for (LO l = 0; l < (LO)nnz; l++) + F_rowsum += vals[l]; + // printf(" : A rowsum = %8.2e F rowsum = %8.2e\n",A_rowsum,F_rowsum); + vals[diagIndex] = TST::one(); } - } + } #ifndef ASSUME_DIRECT_ACCESS_TO_ROW - // Because we used a column map in the construction of the matrix - // we can just use insertLocalValues here instead of insertGlobalValues - filteredA.replaceLocalValues(row, inds, vals); + // Because we used a column map in the construction of the matrix + // we can just use insertLocalValues here instead of insertGlobalValues + filteredA.replaceLocalValues(row, inds, vals); #endif - } - - // Reset filtering array - for (size_t j = 0; j < as (indsG.size()); j++) - for (size_t k = 0; k < blkSize; k++) - filter[indsG[j]*blkSize+k] = 0; } - } - - template - void FilteredAFactory:: - BuildNew(const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const { - using TST = typename Teuchos::ScalarTraits; - SC zero = Teuchos::ScalarTraits::zero(); - - size_t blkSize = A.GetFixedBlockSize(); - ArrayView indsA; - ArrayView valsA; - Array inds; - Array vals; - - Array filter(blkSize * G.GetImportMap()->getLocalNumElements(), 0); - - size_t numGRows = G.GetNodeNumVertices(); - for (size_t i = 0; i < numGRows; i++) { - // Set up filtering array - ArrayView indsG = G.getNeighborVertices(i); - for (size_t j = 0; j < as(indsG.size()); j++) - for (size_t k = 0; k < blkSize; k++) - filter[indsG[j]*blkSize+k] = 1; - - for (size_t k = 0; k < blkSize; k++) { - LO row = i*blkSize + k; - - A.getLocalRowView(row, indsA, valsA); - - size_t nnz = indsA.size(); - if (nnz == 0) - continue; - - inds.resize(indsA.size()); - vals.resize(valsA.size()); + // Reset filtering array + for (size_t j = 0; j < as(indsG.size()); j++) + for (size_t k = 0; k < blkSize; k++) + filter[indsG[j] * blkSize + k] = 0; + } +} + +template +void FilteredAFactory:: + BuildNew(const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const { + using TST = typename Teuchos::ScalarTraits; + SC zero = Teuchos::ScalarTraits::zero(); + + size_t blkSize = A.GetFixedBlockSize(); + + ArrayView indsA; + ArrayView valsA; + Array inds; + Array vals; + + Array filter(blkSize * G.GetImportMap()->getLocalNumElements(), 0); + + size_t numGRows = G.GetNodeNumVertices(); + for (size_t i = 0; i < numGRows; i++) { + // Set up filtering array + ArrayView indsG = G.getNeighborVertices(i); + for (size_t j = 0; j < as(indsG.size()); j++) + for (size_t k = 0; k < blkSize; k++) + filter[indsG[j] * blkSize + k] = 1; + + for (size_t k = 0; k < blkSize; k++) { + LO row = i * blkSize + k; + + A.getLocalRowView(row, indsA, valsA); + + size_t nnz = indsA.size(); + if (nnz == 0) + continue; + + inds.resize(indsA.size()); + vals.resize(valsA.size()); + + size_t numInds = 0; + if (lumping == false) { + for (size_t j = 0; j < nnz; j++) + if (filter[indsA[j]]) { + inds[numInds] = indsA[j]; + vals[numInds] = valsA[j]; + numInds++; + } - size_t numInds = 0; - if (lumping == false) { - for (size_t j = 0; j < nnz; j++) - if (filter[indsA[j]]) { - inds[numInds] = indsA[j]; - vals[numInds] = valsA[j]; - numInds++; - } + } else { + LO diagIndex = -1; + SC diagExtra = zero; - } else { - LO diagIndex = -1; - SC diagExtra = zero; - - for (size_t j = 0; j < nnz; j++) { - if (filter[indsA[j]]) { - inds[numInds] = indsA[j]; - vals[numInds] = valsA[j]; + for (size_t j = 0; j < nnz; j++) { + if (filter[indsA[j]]) { + inds[numInds] = indsA[j]; + vals[numInds] = valsA[j]; - // Remember diagonal position - if (inds[numInds] == row) - diagIndex = numInds; + // Remember diagonal position + if (inds[numInds] == row) + diagIndex = numInds; - numInds++; + numInds++; - } else { - diagExtra += valsA[j]; - } + } else { + diagExtra += valsA[j]; } + } - // Lump dropped entries - // NOTE - // * Does it make sense to lump for elasticity? - // * Is it different for diffusion and elasticity? - if (diagIndex != -1) { - vals[diagIndex] += diagExtra; - if(dirichletThresh >= 0.0 && TST::real(vals[diagIndex]) <= dirichletThresh) { - // SC A_rowsum = ZERO, F_rowsum = ZERO; - // printf("WARNING: row %d diag(Afiltered) = %8.2e diag(A)=%8.2e\n",row,vals[diagIndex],diagA); - // for(LO l = 0; l < (LO)nnz; l++) - // F_rowsum += vals[l]; - // printf(" : A rowsum = %8.2e F rowsum = %8.2e\n",A_rowsum,F_rowsum); - vals[diagIndex] = TST::one(); - } + // Lump dropped entries + // NOTE + // * Does it make sense to lump for elasticity? + // * Is it different for diffusion and elasticity? + if (diagIndex != -1) { + vals[diagIndex] += diagExtra; + if (dirichletThresh >= 0.0 && TST::real(vals[diagIndex]) <= dirichletThresh) { + // SC A_rowsum = ZERO, F_rowsum = ZERO; + // printf("WARNING: row %d diag(Afiltered) = %8.2e diag(A)=%8.2e\n",row,vals[diagIndex],diagA); + // for(LO l = 0; l < (LO)nnz; l++) + // F_rowsum += vals[l]; + // printf(" : A rowsum = %8.2e F rowsum = %8.2e\n",A_rowsum,F_rowsum); + vals[diagIndex] = TST::one(); } - } - inds.resize(numInds); - vals.resize(numInds); - - - - // Because we used a column map in the construction of the matrix - // we can just use insertLocalValues here instead of insertGlobalValues - filteredA.insertLocalValues(row, inds, vals); } + inds.resize(numInds); + vals.resize(numInds); - // Reset filtering array - for (size_t j = 0; j < as (indsG.size()); j++) - for (size_t k = 0; k < blkSize; k++) - filter[indsG[j]*blkSize+k] = 0; + // Because we used a column map in the construction of the matrix + // we can just use insertLocalValues here instead of insertGlobalValues + filteredA.insertLocalValues(row, inds, vals); } - } - template - void FilteredAFactory:: - BuildNewUsingRootStencil(const Matrix& A, const GraphBase& G, double dirichletThresh, Level& currentLevel, Matrix& filteredA, bool use_spread_lumping, double DdomAllowGrowthRate, double DdomCap) const { - using TST = typename Teuchos::ScalarTraits; - using Teuchos::arcp_const_cast; - SC ZERO = Teuchos::ScalarTraits::zero(); - SC ONE = Teuchos::ScalarTraits::one(); - LO INVALID = Teuchos::OrdinalTraits::invalid(); - - size_t numNodes = G.GetNodeNumVertices(); - size_t blkSize = A.GetFixedBlockSize(); - size_t numRows = A.getMap()->getLocalNumElements(); - ArrayView indsA; - ArrayView valsA; - ArrayRCP rowptr; - ArrayRCP inds; - ArrayRCP vals_const; - ArrayRCP vals; - - // We're going to grab the vals array from filteredA and then blitz it with NAN as a placeholder for "entries that have - // not yey been touched." If I see an entry in the primary loop that has a zero, then I assume it has been nuked by - // it's symmetric pair, so I add it to the diagonal. If it has a NAN, process as normal. - RCP filteredAcrs = dynamic_cast(&filteredA)->getCrsMatrix(); - filteredAcrs->getAllValues(rowptr,inds,vals_const); - vals = arcp_const_cast(vals_const); - Array vals_dropped_indicator(vals.size(),false); - - // In the badAggNeighbors loop, if the entry has any number besides NAN, I add it to the diagExtra and then zero the guy. - RCP aggregates = Get< RCP > (currentLevel, "Aggregates"); - RCP amalgInfo = Get< RCP > (currentLevel, "UnAmalgamationInfo"); - LO numAggs = aggregates->GetNumAggregates(); - - // Check map nesting - RCP rowMap = A.getRowMap(); - RCP colMap = A.getColMap(); - bool goodMap = MueLu::Utilities::MapsAreNested(*rowMap, *colMap); - TEUCHOS_TEST_FOR_EXCEPTION(!goodMap, Exceptions::RuntimeError,"FilteredAFactory: Maps are not nested"); - - // Since we're going to symmetrize this - Array diagIndex(numRows,INVALID); - Array diagExtra(numRows,ZERO); - - // Lists of nodes in each aggregate - struct { - // GH: For now, copy everything to host until we properly set this factory to run device code - // Instead, we'll copy data into HostMirrors and run the algorithms on host, saving optimization for later. - typename Aggregates::LO_view ptr, nodes, unaggregated; - typename Aggregates::LO_view::HostMirror ptr_h, nodes_h, unaggregated_h; - } nodesInAgg; - aggregates->ComputeNodesInAggregate(nodesInAgg.ptr, nodesInAgg.nodes, nodesInAgg.unaggregated); - nodesInAgg.ptr_h = Kokkos::create_mirror_view(nodesInAgg.ptr); - nodesInAgg.nodes_h = Kokkos::create_mirror_view(nodesInAgg.nodes); - nodesInAgg.unaggregated_h = Kokkos::create_mirror_view(nodesInAgg.unaggregated); - Kokkos::deep_copy(nodesInAgg.ptr_h, nodesInAgg.ptr); - Kokkos::deep_copy(nodesInAgg.nodes_h, nodesInAgg.nodes); - Kokkos::deep_copy(nodesInAgg.unaggregated_h, nodesInAgg.unaggregated); - Teuchos::ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getData(0); // GH: this is needed on device, grab the pointer after we call ComputeNodesInAggregate - - LO graphNumCols = G.GetImportMap()->getLocalNumElements(); - Array filter(graphNumCols, false); - - // Loop over the unaggregated nodes. Blitz those rows. We don't want to smooth singletons. - for(LO i=0; i< (LO)nodesInAgg.unaggregated_h.extent(0); i++) { - for (LO m = 0; m < (LO)blkSize; m++) { - LO row = amalgInfo->ComputeLocalDOF(nodesInAgg.unaggregated_h(i),m); - if (row >= (LO)numRows) continue; - size_t index_start = rowptr[row]; - A.getLocalRowView(row, indsA, valsA); - for(LO k=0; k<(LO)indsA.size(); k++) { - if(row == indsA[k]) { - vals[index_start+k] = ONE; - diagIndex[row] = k; - } - else - vals[index_start+k] = ZERO; - } + // Reset filtering array + for (size_t j = 0; j < as(indsG.size()); j++) + for (size_t k = 0; k < blkSize; k++) + filter[indsG[j] * blkSize + k] = 0; + } +} + +template +void FilteredAFactory:: + BuildNewUsingRootStencil(const Matrix& A, const GraphBase& G, double dirichletThresh, Level& currentLevel, Matrix& filteredA, bool use_spread_lumping, double DdomAllowGrowthRate, double DdomCap) const { + using TST = typename Teuchos::ScalarTraits; + using Teuchos::arcp_const_cast; + SC ZERO = Teuchos::ScalarTraits::zero(); + SC ONE = Teuchos::ScalarTraits::one(); + LO INVALID = Teuchos::OrdinalTraits::invalid(); + + size_t numNodes = G.GetNodeNumVertices(); + size_t blkSize = A.GetFixedBlockSize(); + size_t numRows = A.getMap()->getLocalNumElements(); + ArrayView indsA; + ArrayView valsA; + ArrayRCP rowptr; + ArrayRCP inds; + ArrayRCP vals_const; + ArrayRCP vals; + + // We're going to grab the vals array from filteredA and then blitz it with NAN as a placeholder for "entries that have + // not yey been touched." If I see an entry in the primary loop that has a zero, then I assume it has been nuked by + // it's symmetric pair, so I add it to the diagonal. If it has a NAN, process as normal. + RCP filteredAcrs = dynamic_cast(&filteredA)->getCrsMatrix(); + filteredAcrs->getAllValues(rowptr, inds, vals_const); + vals = arcp_const_cast(vals_const); + Array vals_dropped_indicator(vals.size(), false); + + // In the badAggNeighbors loop, if the entry has any number besides NAN, I add it to the diagExtra and then zero the guy. + RCP aggregates = Get >(currentLevel, "Aggregates"); + RCP amalgInfo = Get >(currentLevel, "UnAmalgamationInfo"); + LO numAggs = aggregates->GetNumAggregates(); + + // Check map nesting + RCP rowMap = A.getRowMap(); + RCP colMap = A.getColMap(); + bool goodMap = MueLu::Utilities::MapsAreNested(*rowMap, *colMap); + TEUCHOS_TEST_FOR_EXCEPTION(!goodMap, Exceptions::RuntimeError, "FilteredAFactory: Maps are not nested"); + + // Since we're going to symmetrize this + Array diagIndex(numRows, INVALID); + Array diagExtra(numRows, ZERO); + + // Lists of nodes in each aggregate + struct { + // GH: For now, copy everything to host until we properly set this factory to run device code + // Instead, we'll copy data into HostMirrors and run the algorithms on host, saving optimization for later. + typename Aggregates::LO_view ptr, nodes, unaggregated; + typename Aggregates::LO_view::HostMirror ptr_h, nodes_h, unaggregated_h; + } nodesInAgg; + aggregates->ComputeNodesInAggregate(nodesInAgg.ptr, nodesInAgg.nodes, nodesInAgg.unaggregated); + nodesInAgg.ptr_h = Kokkos::create_mirror_view(nodesInAgg.ptr); + nodesInAgg.nodes_h = Kokkos::create_mirror_view(nodesInAgg.nodes); + nodesInAgg.unaggregated_h = Kokkos::create_mirror_view(nodesInAgg.unaggregated); + Kokkos::deep_copy(nodesInAgg.ptr_h, nodesInAgg.ptr); + Kokkos::deep_copy(nodesInAgg.nodes_h, nodesInAgg.nodes); + Kokkos::deep_copy(nodesInAgg.unaggregated_h, nodesInAgg.unaggregated); + Teuchos::ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getData(0); // GH: this is needed on device, grab the pointer after we call ComputeNodesInAggregate + + LO graphNumCols = G.GetImportMap()->getLocalNumElements(); + Array filter(graphNumCols, false); + + // Loop over the unaggregated nodes. Blitz those rows. We don't want to smooth singletons. + for (LO i = 0; i < (LO)nodesInAgg.unaggregated_h.extent(0); i++) { + for (LO m = 0; m < (LO)blkSize; m++) { + LO row = amalgInfo->ComputeLocalDOF(nodesInAgg.unaggregated_h(i), m); + if (row >= (LO)numRows) continue; + size_t index_start = rowptr[row]; + A.getLocalRowView(row, indsA, valsA); + for (LO k = 0; k < (LO)indsA.size(); k++) { + if (row == indsA[k]) { + vals[index_start + k] = ONE; + diagIndex[row] = k; + } else + vals[index_start + k] = ZERO; } - }//end nodesInAgg.unaggregated.extent(0); - - - std::vector badCount(numAggs,0); - - // Find the biggest aggregate size in *nodes* - LO maxAggSize=0; - for(LO i=0; i goodAggNeighbors(G.getLocalMaxNumRowEntries()); - std::vector badAggNeighbors(std::min(G.getLocalMaxNumRowEntries()*maxAggSize,numNodes)); - - size_t numNewDrops=0; - size_t numOldDrops=0; - size_t numFixedDiags=0; - size_t numSymDrops = 0; - - for(LO i=0; iIsRoot(nodesInAgg.nodes_h(k))) { - root_node = nodesInAgg.nodes_h(k); break; - } + } + } // end nodesInAgg.unaggregated.extent(0); + + std::vector badCount(numAggs, 0); + + // Find the biggest aggregate size in *nodes* + LO maxAggSize = 0; + for (LO i = 0; i < numAggs; i++) + maxAggSize = std::max(maxAggSize, nodesInAgg.ptr_h(i + 1) - nodesInAgg.ptr_h(i)); + + // Loop over all the aggregates + std::vector goodAggNeighbors(G.getLocalMaxNumRowEntries()); + std::vector badAggNeighbors(std::min(G.getLocalMaxNumRowEntries() * maxAggSize, numNodes)); + + size_t numNewDrops = 0; + size_t numOldDrops = 0; + size_t numFixedDiags = 0; + size_t numSymDrops = 0; + + for (LO i = 0; i < numAggs; i++) { + LO numNodesInAggregate = nodesInAgg.ptr_h(i + 1) - nodesInAgg.ptr_h(i); + if (numNodesInAggregate == 0) continue; + + // Find the root *node* + LO root_node = INVALID; + for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) { + if (aggregates->IsRoot(nodesInAgg.nodes_h(k))) { + root_node = nodesInAgg.nodes_h(k); + break; } + } - TEUCHOS_TEST_FOR_EXCEPTION(root_node == INVALID, - Exceptions::RuntimeError,"MueLu::FilteredAFactory::BuildNewUsingRootStencil: Cannot find root node"); + TEUCHOS_TEST_FOR_EXCEPTION(root_node == INVALID, + Exceptions::RuntimeError, "MueLu::FilteredAFactory::BuildNewUsingRootStencil: Cannot find root node"); - // Find the list of "good" node neighbors (aka nodes which border the root node in the Graph G) - ArrayView goodNodeNeighbors = G.getNeighborVertices(root_node); + // Find the list of "good" node neighbors (aka nodes which border the root node in the Graph G) + ArrayView goodNodeNeighbors = G.getNeighborVertices(root_node); - // Now find the list of "good" aggregate neighbors (aka the aggregates neighbor the root node in the Graph G) - goodAggNeighbors.resize(0); - for(LO k=0; k<(LO) goodNodeNeighbors.size(); k++) { - goodAggNeighbors.push_back(vertex2AggId[goodNodeNeighbors[k]]); - } - sort_and_unique(goodAggNeighbors); - - // Now we get the list of "bad" aggregate neighbors (aka aggregates which border the - // root node in the original matrix A, which are not goodNodeNeighbors). Since we - // don't have an amalgamated version of the original matrix, we use the matrix directly - badAggNeighbors.resize(0); - for(LO j = 0; j < (LO)blkSize; j++) { - LO row = amalgInfo->ComputeLocalDOF(root_node,j); - if (row >= (LO)numRows) continue; - A.getLocalRowView(row, indsA, valsA); - for(LO k=0; k<(LO)indsA.size(); k++) { - if ( (indsA[k] < (LO)numRows) && (TST::magnitude(valsA[k]) != TST::magnitude(ZERO))) { - LO node = amalgInfo->ComputeLocalNode(indsA[k]); - LO agg = vertex2AggId[node]; - if(!std::binary_search(goodAggNeighbors.begin(),goodAggNeighbors.end(),agg)) - badAggNeighbors.push_back(agg); - } + // Now find the list of "good" aggregate neighbors (aka the aggregates neighbor the root node in the Graph G) + goodAggNeighbors.resize(0); + for (LO k = 0; k < (LO)goodNodeNeighbors.size(); k++) { + goodAggNeighbors.push_back(vertex2AggId[goodNodeNeighbors[k]]); + } + sort_and_unique(goodAggNeighbors); + + // Now we get the list of "bad" aggregate neighbors (aka aggregates which border the + // root node in the original matrix A, which are not goodNodeNeighbors). Since we + // don't have an amalgamated version of the original matrix, we use the matrix directly + badAggNeighbors.resize(0); + for (LO j = 0; j < (LO)blkSize; j++) { + LO row = amalgInfo->ComputeLocalDOF(root_node, j); + if (row >= (LO)numRows) continue; + A.getLocalRowView(row, indsA, valsA); + for (LO k = 0; k < (LO)indsA.size(); k++) { + if ((indsA[k] < (LO)numRows) && (TST::magnitude(valsA[k]) != TST::magnitude(ZERO))) { + LO node = amalgInfo->ComputeLocalNode(indsA[k]); + LO agg = vertex2AggId[node]; + if (!std::binary_search(goodAggNeighbors.begin(), goodAggNeighbors.end(), agg)) + badAggNeighbors.push_back(agg); } } - sort_and_unique(badAggNeighbors); + } + sort_and_unique(badAggNeighbors); - // Go through the filtered graph and count the number of connections to the badAggNeighbors - // if there are 2 or more of these connections, remove them from the bad list. + // Go through the filtered graph and count the number of connections to the badAggNeighbors + // if there are 2 or more of these connections, remove them from the bad list. - for (LO k=nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i+1); k++) { - ArrayView nodeNeighbors = G.getNeighborVertices(k); - for (LO kk=0; kk < nodeNeighbors.size(); kk++) { - if ( (vertex2AggId[nodeNeighbors[kk]] >= 0) && (vertex2AggId[nodeNeighbors[kk]] < numAggs)) - (badCount[vertex2AggId[nodeNeighbors[kk]]])++; - } - } - std::vector reallyBadAggNeighbors(std::min(G.getLocalMaxNumRowEntries()*maxAggSize,numNodes)); - reallyBadAggNeighbors.resize(0); - for (LO k=0; k < (LO) badAggNeighbors.size(); k++) { - if (badCount[badAggNeighbors[k]] <= 1 ) reallyBadAggNeighbors.push_back(badAggNeighbors[k]); + for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) { + ArrayView nodeNeighbors = G.getNeighborVertices(k); + for (LO kk = 0; kk < nodeNeighbors.size(); kk++) { + if ((vertex2AggId[nodeNeighbors[kk]] >= 0) && (vertex2AggId[nodeNeighbors[kk]] < numAggs)) + (badCount[vertex2AggId[nodeNeighbors[kk]]])++; } - for (LO k=nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i+1); k++) { - ArrayView nodeNeighbors = G.getNeighborVertices(k); - for (LO kk=0; kk < nodeNeighbors.size(); kk++) { - if ( (vertex2AggId[nodeNeighbors[kk]] >= 0) && (vertex2AggId[nodeNeighbors[kk]] < numAggs)) - badCount[vertex2AggId[nodeNeighbors[kk]]] = 0; - } + } + std::vector reallyBadAggNeighbors(std::min(G.getLocalMaxNumRowEntries() * maxAggSize, numNodes)); + reallyBadAggNeighbors.resize(0); + for (LO k = 0; k < (LO)badAggNeighbors.size(); k++) { + if (badCount[badAggNeighbors[k]] <= 1) reallyBadAggNeighbors.push_back(badAggNeighbors[k]); + } + for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) { + ArrayView nodeNeighbors = G.getNeighborVertices(k); + for (LO kk = 0; kk < nodeNeighbors.size(); kk++) { + if ((vertex2AggId[nodeNeighbors[kk]] >= 0) && (vertex2AggId[nodeNeighbors[kk]] < numAggs)) + badCount[vertex2AggId[nodeNeighbors[kk]]] = 0; } + } - // For each of the reallyBadAggNeighbors, we go and blitz their connections to dofs in this aggregate. - // We remove the INVALID marker when we do this so we don't wind up doubling this up later - for(LO b=0; b<(LO)reallyBadAggNeighbors.size(); b++) { - LO bad_agg = reallyBadAggNeighbors[b]; - for (LO k=nodesInAgg.ptr_h(bad_agg); k < nodesInAgg.ptr_h(bad_agg+1); k++) { - LO bad_node = nodesInAgg.nodes_h(k); - for(LO j = 0; j < (LO)blkSize; j++) { - LO bad_row = amalgInfo->ComputeLocalDOF(bad_node,j); - if (bad_row >= (LO)numRows) continue; - size_t index_start = rowptr[bad_row]; - A.getLocalRowView(bad_row, indsA, valsA); - for(LO l = 0; l < (LO)indsA.size(); l++) { - if(indsA[l] < (LO)numRows && vertex2AggId[amalgInfo->ComputeLocalNode(indsA[l])] == i && vals_dropped_indicator[index_start+l] == false) { - vals_dropped_indicator[index_start + l] = true; - vals[index_start + l] = ZERO; - diagExtra[bad_row] += valsA[l]; - numSymDrops++; - } + // For each of the reallyBadAggNeighbors, we go and blitz their connections to dofs in this aggregate. + // We remove the INVALID marker when we do this so we don't wind up doubling this up later + for (LO b = 0; b < (LO)reallyBadAggNeighbors.size(); b++) { + LO bad_agg = reallyBadAggNeighbors[b]; + for (LO k = nodesInAgg.ptr_h(bad_agg); k < nodesInAgg.ptr_h(bad_agg + 1); k++) { + LO bad_node = nodesInAgg.nodes_h(k); + for (LO j = 0; j < (LO)blkSize; j++) { + LO bad_row = amalgInfo->ComputeLocalDOF(bad_node, j); + if (bad_row >= (LO)numRows) continue; + size_t index_start = rowptr[bad_row]; + A.getLocalRowView(bad_row, indsA, valsA); + for (LO l = 0; l < (LO)indsA.size(); l++) { + if (indsA[l] < (LO)numRows && vertex2AggId[amalgInfo->ComputeLocalNode(indsA[l])] == i && vals_dropped_indicator[index_start + l] == false) { + vals_dropped_indicator[index_start + l] = true; + vals[index_start + l] = ZERO; + diagExtra[bad_row] += valsA[l]; + numSymDrops++; } } } } + } - // Now lets fill the rows in this aggregate and figure out the diagonal lumping - // We loop over each node in the aggregate and then over the neighbors of that node - - for(LO k=nodesInAgg.ptr_h(i); k indsG = G.getNeighborVertices(row_node); - for (size_t j = 0; j < as(indsG.size()); j++) - filter[indsG[j]]=true; - - for (LO m = 0; m < (LO)blkSize; m++) { - LO row = amalgInfo->ComputeLocalDOF(row_node,m); - if (row >= (LO)numRows) continue; - size_t index_start = rowptr[row]; - A.getLocalRowView(row, indsA, valsA); - - for(LO l = 0; l < (LO)indsA.size(); l++) { - int col_node = amalgInfo->ComputeLocalNode(indsA[l]); - bool is_good = filter[col_node]; - if (indsA[l] == row) { - diagIndex[row] = l; - vals[index_start + l] = valsA[l]; - continue; - } + // Now lets fill the rows in this aggregate and figure out the diagonal lumping + // We loop over each node in the aggregate and then over the neighbors of that node - // If we've already dropped this guy (from symmetry above), then continue onward - if(vals_dropped_indicator[index_start +l] == true) { - if(is_good) numOldDrops++; - else numNewDrops++; - continue; - } + for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) { + LO row_node = nodesInAgg.nodes_h(k); + // Set up filtering array + ArrayView indsG = G.getNeighborVertices(row_node); + for (size_t j = 0; j < as(indsG.size()); j++) + filter[indsG[j]] = true; - // FIXME: I'm assuming vertex2AggId is only length of the rowmap, so - // we won'd do secondary dropping on off-processor neighbors - if(is_good && indsA[l] < (LO)numRows) { - int agg = vertex2AggId[col_node]; - if(std::binary_search(reallyBadAggNeighbors.begin(),reallyBadAggNeighbors.end(),agg)) - is_good = false; - } + for (LO m = 0; m < (LO)blkSize; m++) { + LO row = amalgInfo->ComputeLocalDOF(row_node, m); + if (row >= (LO)numRows) continue; + size_t index_start = rowptr[row]; + A.getLocalRowView(row, indsA, valsA); - if(is_good){ - vals[index_start+l] = valsA[l]; - } - else { - if(!filter[col_node]) numOldDrops++; - else numNewDrops++; - diagExtra[row] += valsA[l]; - vals[index_start+l]=ZERO; - vals_dropped_indicator[index_start+l]=true; - } - } //end for l "indsA.size()" loop - - }//end m "blkSize" loop - - // Clear filtering array - for (size_t j = 0; j < as(indsG.size()); j++) - filter[indsG[j]]=false; - - }// end k loop over number of nodes in this agg - }//end i loop over numAggs - - if (!use_spread_lumping) { - // Now do the diagonal modifications in one, final pass - for(LO row=0; row <(LO)numRows; row++) { - if (diagIndex[row] != INVALID) { - size_t index_start = rowptr[row]; - size_t diagIndexInMatrix = index_start + diagIndex[row]; - // printf("diag_vals pre update = %8.2e\n", vals[diagIndex] ); - vals[diagIndexInMatrix] += diagExtra[row]; - SC A_rowsum=ZERO, A_absrowsum = ZERO, F_rowsum = ZERO; - - - if( (dirichletThresh >= 0.0 && TST::real(vals[diagIndexInMatrix]) <= dirichletThresh) || TST::real(vals[diagIndexInMatrix]) == ZERO) { - - if(MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING>0) { - A.getLocalRowView(row, indsA, valsA); - // SC diagA = valsA[diagIndex[row]]; - // printf("WARNING: row %d (diagIndex=%d) diag(Afiltered) = %8.2e diag(A)=%8.2e numInds = %d\n",row,diagIndex[row],vals[diagIndexInMatrix],diagA,(LO)indsA.size()); - - for(LO l = 0; l < (LO)indsA.size(); l++) { - A_rowsum += valsA[l]; - A_absrowsum+=std::abs(valsA[l]); - } - for(LO l = 0; l < (LO)indsA.size(); l++) - F_rowsum += vals[index_start+l]; - // printf(" : A rowsum = %8.2e |A| rowsum = %8.2e rowsum = %8.2e\n",A_rowsum,A_absrowsum,F_rowsum); - if(MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING > 1){ - // printf(" Avals ="); - // for(LO l = 0; l < (LO)indsA.size(); l++) - // printf("%d(%8.2e)[%d] ",(LO)indsA[l],valsA[l],(LO)l); - // printf("\n"); - // printf(" Fvals ="); - // for(LO l = 0; l < (LO)indsA.size(); l++) - // if(vals[index_start+l] != ZERO) - // printf("%d(%8.2e)[%d] ",(LO)indsA[l],vals[index_start+l],(LO)l); - } - } - // Don't know what to do, so blitz the row and dump a one on the diagonal - for(size_t l=rowptr[row]; lComputeLocalNode(indsA[l]); + bool is_good = filter[col_node]; + if (indsA[l] == row) { + diagIndex[row] = l; + vals[index_start + l] = valsA[l]; + continue; } - } - else { - GetOStream(Runtime0)<<"WARNING: Row "<getComm(), numNewDrops, g_newDrops); - MueLu_sumAll(A.getRowMap()->getComm(), numOldDrops, g_oldDrops); - MueLu_sumAll(A.getRowMap()->getComm(), numFixedDiags, g_fixedDiags); - GetOStream(Runtime0)<< "Filtering out "< - void FilteredAFactory:: - ExperimentalLumping(const Matrix& A, Matrix& filteredA, double irho, double irho2) const { - using TST = typename Teuchos::ScalarTraits; - SC zero = TST::zero(); - SC one = TST::one(); - - ArrayView inds; - ArrayView vals; - ArrayView finds; - ArrayView fvals; - - SC PosOffSum, NegOffSum, PosOffDropSum, NegOffDropSum; - SC diag, gamma, alpha; - LO NumPosKept, NumNegKept; - - SC noLumpDdom; - SC numer,denom; - SC PosFilteredSum, NegFilteredSum; - SC Target; - - SC rho = as(irho); - SC rho2 = as(irho2); - - for (LO row = 0; row < (LO) A.getRowMap()->getLocalNumElements(); row++) { - noLumpDdom = as(10000.0); // only used if diagonal is zero - // the whole idea sort of breaks down - // when the diagonal is zero. In particular, - // the old diag dominance ratio is infinity - // ... so what do we want for the new ddom - // ratio. Do we want to allow the diagonal - // to go negative, just to have a better ddom - // ratio? This current choice essentially - // changes 'Target' to a large number - // meaning that we will allow the new - // ddom number to be fairly large (because - // the old one was infinity) - - ArrayView tvals; - A.getLocalRowView(row, inds, vals); - size_t nnz = inds.size(); - if (nnz == 0) continue; - filteredA.getLocalRowView(row, finds, tvals);//assume 2 getLocalRowView()s - // have things in same order - fvals = ArrayView(const_cast(tvals.getRawPtr()), nnz); - - LO diagIndex = -1, fdiagIndex = -1; - - PosOffSum=zero; NegOffSum=zero; PosOffDropSum=zero; NegOffDropSum=zero; - diag=zero; NumPosKept=0; NumNegKept=0; - - // first record diagonal, offdiagonal sums and off diag dropped sums - for (size_t j = 0; j < nnz; j++) { - if (inds[j] == row) { - diagIndex = j; - diag = vals[j]; + // FIXME: I'm assuming vertex2AggId is only length of the rowmap, so + // we won'd do secondary dropping on off-processor neighbors + if (is_good && indsA[l] < (LO)numRows) { + int agg = vertex2AggId[col_node]; + if (std::binary_search(reallyBadAggNeighbors.begin(), reallyBadAggNeighbors.end(), agg)) + is_good = false; } - else { // offdiagonal - if (TST::real(vals[j]) > TST::real(zero) ) PosOffSum += vals[j]; - else NegOffSum += vals[j]; + + if (is_good) { + vals[index_start + l] = valsA[l]; + } else { + if (!filter[col_node]) + numOldDrops++; + else + numNewDrops++; + diagExtra[row] += valsA[l]; + vals[index_start + l] = ZERO; + vals_dropped_indicator[index_start + l] = true; } - } - PosOffDropSum = PosOffSum; - NegOffDropSum = NegOffSum; - NumPosKept = 0; - NumNegKept = 0; - LO j = 0; - for (size_t jj = 0; jj < (size_t) finds.size(); jj++) { - while( inds[j] != finds[jj] ) j++; // assumes that finds is in the same order as - // inds ... but perhaps has some entries missing - if (finds[jj] == row) fdiagIndex = jj; - else { - if (TST::real(vals[j]) > TST::real(zero) ) { - PosOffDropSum -= fvals[jj]; - if (TST::real(fvals[jj]) != TST::real(zero) ) NumPosKept++; + } // end for l "indsA.size()" loop + + } // end m "blkSize" loop + + // Clear filtering array + for (size_t j = 0; j < as(indsG.size()); j++) + filter[indsG[j]] = false; + + } // end k loop over number of nodes in this agg + } // end i loop over numAggs + + if (!use_spread_lumping) { + // Now do the diagonal modifications in one, final pass + for (LO row = 0; row < (LO)numRows; row++) { + if (diagIndex[row] != INVALID) { + size_t index_start = rowptr[row]; + size_t diagIndexInMatrix = index_start + diagIndex[row]; + // printf("diag_vals pre update = %8.2e\n", vals[diagIndex] ); + vals[diagIndexInMatrix] += diagExtra[row]; + SC A_rowsum = ZERO, A_absrowsum = ZERO, F_rowsum = ZERO; + + if ((dirichletThresh >= 0.0 && TST::real(vals[diagIndexInMatrix]) <= dirichletThresh) || TST::real(vals[diagIndexInMatrix]) == ZERO) { + if (MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING > 0) { + A.getLocalRowView(row, indsA, valsA); + // SC diagA = valsA[diagIndex[row]]; + // printf("WARNING: row %d (diagIndex=%d) diag(Afiltered) = %8.2e diag(A)=%8.2e numInds = %d\n",row,diagIndex[row],vals[diagIndexInMatrix],diagA,(LO)indsA.size()); + + for (LO l = 0; l < (LO)indsA.size(); l++) { + A_rowsum += valsA[l]; + A_absrowsum += std::abs(valsA[l]); } - else { - NegOffDropSum -= fvals[jj]; - if (TST::real(fvals[jj]) != TST::real(zero) ) NumNegKept++; + for (LO l = 0; l < (LO)indsA.size(); l++) + F_rowsum += vals[index_start + l]; + // printf(" : A rowsum = %8.2e |A| rowsum = %8.2e rowsum = %8.2e\n",A_rowsum,A_absrowsum,F_rowsum); + if (MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING > 1) { + // printf(" Avals ="); + // for(LO l = 0; l < (LO)indsA.size(); l++) + // printf("%d(%8.2e)[%d] ",(LO)indsA[l],valsA[l],(LO)l); + // printf("\n"); + // printf(" Fvals ="); + // for(LO l = 0; l < (LO)indsA.size(); l++) + // if(vals[index_start+l] != ZERO) + // printf("%d(%8.2e)[%d] ",(LO)indsA[l],vals[index_start+l],(LO)l); } } + // Don't know what to do, so blitz the row and dump a one on the diagonal + for (size_t l = rowptr[row]; l < rowptr[row + 1]; l++) { + vals[l] = ZERO; + } + vals[diagIndexInMatrix] = TST::one(); + numFixedDiags++; } + } else { + GetOStream(Runtime0) << "WARNING: Row " << row << " has no diagonal " << std::endl; + } + } /*end row "numRows" loop"*/ + } - // measure of diagonal dominance if no lumping is done. - if (TST::magnitude(diag) != TST::magnitude(zero) ) - noLumpDdom = (PosOffSum - NegOffSum)/diag; - - // Target is an acceptable diagonal dominance ratio - // which should really be larger than 1 - - Target = rho*noLumpDdom; - if (TST::magnitude(Target) <= TST::magnitude(rho)) Target = rho2; + // Copy all the goop out + for (LO row = 0; row < (LO)numRows; row++) { + filteredA.replaceLocalValues(row, inds(rowptr[row], rowptr[row + 1] - rowptr[row]), vals(rowptr[row], rowptr[row + 1] - rowptr[row])); + } + if (use_spread_lumping) ExperimentalLumping(A, filteredA, DdomAllowGrowthRate, DdomCap); - PosFilteredSum = PosOffSum - PosOffDropSum; - NegFilteredSum = NegOffSum - NegOffDropSum; - // Note: PosNotFilterdSum is not equal to the sum of the - // positive entries after lumping. It just reflects the - // pos offdiag sum of the filtered matrix before lumping - // and does not account for negative dropped terms lumped - // to the positive kept terms. + size_t g_newDrops = 0, g_oldDrops = 0, g_fixedDiags = 0; - // dropped positive offdiags always go to the diagonal as these - // always improve diagonal dominance. + MueLu_sumAll(A.getRowMap()->getComm(), numNewDrops, g_newDrops); + MueLu_sumAll(A.getRowMap()->getComm(), numOldDrops, g_oldDrops); + MueLu_sumAll(A.getRowMap()->getComm(), numFixedDiags, g_fixedDiags); + GetOStream(Runtime0) << "Filtering out " << g_newDrops << " edges, in addition to the " << g_oldDrops << " edges dropped earlier" << std::endl; + GetOStream(Runtime0) << "Fixing " << g_fixedDiags << " zero diagonal values" << std::endl; +} - diag += PosOffDropSum; +// fancy lumping trying to not just move everything to the diagonal but to also consider moving +// some lumping to the kept off-diagonals. We basically aim to not increase the diagonal +// dominance in a row. In particular, the goal is that row i satisfies +// +// lumpedDiagDomMeasure_i <= rho2 +// or +// lumpedDiagDomMeasure <= rho*unlumpedDiagDomMeasure +// +// NOTE: THIS CODE assumes direct access to a row. See comments above concerning +// ASSUME_DIRECT_ACCESS_TO_ROW +// +template +void FilteredAFactory:: + ExperimentalLumping(const Matrix& A, Matrix& filteredA, double irho, double irho2) const { + using TST = typename Teuchos::ScalarTraits; + SC zero = TST::zero(); + SC one = TST::one(); + + ArrayView inds; + ArrayView vals; + ArrayView finds; + ArrayView fvals; + + SC PosOffSum, NegOffSum, PosOffDropSum, NegOffDropSum; + SC diag, gamma, alpha; + LO NumPosKept, NumNegKept; + + SC noLumpDdom; + SC numer, denom; + SC PosFilteredSum, NegFilteredSum; + SC Target; + + SC rho = as(irho); + SC rho2 = as(irho2); + + for (LO row = 0; row < (LO)A.getRowMap()->getLocalNumElements(); row++) { + noLumpDdom = as(10000.0); // only used if diagonal is zero + // the whole idea sort of breaks down + // when the diagonal is zero. In particular, + // the old diag dominance ratio is infinity + // ... so what do we want for the new ddom + // ratio. Do we want to allow the diagonal + // to go negative, just to have a better ddom + // ratio? This current choice essentially + // changes 'Target' to a large number + // meaning that we will allow the new + // ddom number to be fairly large (because + // the old one was infinity) + + ArrayView tvals; + A.getLocalRowView(row, inds, vals); + size_t nnz = inds.size(); + if (nnz == 0) continue; + filteredA.getLocalRowView(row, finds, tvals); // assume 2 getLocalRowView()s + // have things in same order + fvals = ArrayView(const_cast(tvals.getRawPtr()), nnz); + + LO diagIndex = -1, fdiagIndex = -1; + + PosOffSum = zero; + NegOffSum = zero; + PosOffDropSum = zero; + NegOffDropSum = zero; + diag = zero; + NumPosKept = 0; + NumNegKept = 0; + + // first record diagonal, offdiagonal sums and off diag dropped sums + for (size_t j = 0; j < nnz; j++) { + if (inds[j] == row) { + diagIndex = j; + diag = vals[j]; + } else { // offdiagonal + if (TST::real(vals[j]) > TST::real(zero)) + PosOffSum += vals[j]; + else + NegOffSum += vals[j]; + } + } + PosOffDropSum = PosOffSum; + NegOffDropSum = NegOffSum; + NumPosKept = 0; + NumNegKept = 0; + LO j = 0; + for (size_t jj = 0; jj < (size_t)finds.size(); jj++) { + while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as + // inds ... but perhaps has some entries missing + if (finds[jj] == row) + fdiagIndex = jj; + else { + if (TST::real(vals[j]) > TST::real(zero)) { + PosOffDropSum -= fvals[jj]; + if (TST::real(fvals[jj]) != TST::real(zero)) NumPosKept++; + } else { + NegOffDropSum -= fvals[jj]; + if (TST::real(fvals[jj]) != TST::real(zero)) NumNegKept++; + } + } + } - // now lets work on lumping dropped negative offdiags - gamma = -NegOffDropSum - PosFilteredSum; + // measure of diagonal dominance if no lumping is done. + if (TST::magnitude(diag) != TST::magnitude(zero)) + noLumpDdom = (PosOffSum - NegOffSum) / diag; + + // Target is an acceptable diagonal dominance ratio + // which should really be larger than 1 + + Target = rho * noLumpDdom; + if (TST::magnitude(Target) <= TST::magnitude(rho)) Target = rho2; + + PosFilteredSum = PosOffSum - PosOffDropSum; + NegFilteredSum = NegOffSum - NegOffDropSum; + // Note: PosNotFilterdSum is not equal to the sum of the + // positive entries after lumping. It just reflects the + // pos offdiag sum of the filtered matrix before lumping + // and does not account for negative dropped terms lumped + // to the positive kept terms. + + // dropped positive offdiags always go to the diagonal as these + // always improve diagonal dominance. + + diag += PosOffDropSum; + + // now lets work on lumping dropped negative offdiags + gamma = -NegOffDropSum - PosFilteredSum; + + if (TST::real(gamma) < TST::real(zero)) { + // the total amount of negative dropping is less than PosFilteredSum, + // so we can distribute this dropping to pos offdiags. After lumping + // the sum of the pos offdiags is just -gamma so we just assign pos + // offdiags proportional to vals[j]/PosFilteredSum + // Note: in this case the diagonal is not changed as all lumping + // occurs to the pos offdiags + + if (fdiagIndex != -1) fvals[fdiagIndex] = diag; + j = 0; + for (LO jj = 0; jj < (LO)finds.size(); jj++) { + while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as + // inds ... but perhaps has some entries missing + if ((j != diagIndex) && (TST::real(vals[j]) > TST::real(zero)) && (TST::magnitude(fvals[jj]) != TST::magnitude(zero))) + fvals[jj] = -gamma * (vals[j] / PosFilteredSum); + } + } else { + // So there are more negative values that need lumping than kept + // positive offdiags. Meaning there is enough negative lumping to + // completely clear out all pos offdiags. If we lump all negs + // to pos off diags, we'd actually change them to negative. We + // only do this if we are desperate. Otherwise, we'll clear out + // all the positive kept offdiags and try to lump the rest + // somewhere else. We defer the clearing of pos off diags + // to see first if we are going to be desperate. + + bool flipPosOffDiagsToNeg = false; + + // Even if we lumped by zeroing positive offdiags, we are still + // going to have more lumping to distribute to either + // 1) the diagonal + // 2) the kept negative offdiags + // 3) the kept positive offdiags (desperate) + + // Let's first considering lumping the remaining neg offdiag stuff + // to the diagonal ... if this does not increase the diagonal + // dominance ratio too much (given by rho). + + if ((TST::real(diag) > TST::real(gamma)) && + (TST::real((-NegFilteredSum) / (diag - gamma)) <= TST::real(Target))) { + // 1st if term above insures that resulting diagonal (=diag-gamma) + // is positive. . The left side of 2nd term is the diagonal dominance + // if we lump the remaining stuff (gamma) to the diagonal. Recall, + // that now there are no positive off-diags so the sum(abs(offdiags)) + // is just the negative of NegFilteredSum + + if (fdiagIndex != -1) fvals[fdiagIndex] = diag - gamma; + } else if (NumNegKept > 0) { + // need to do some lumping to neg offdiags to avoid a large + // increase in diagonal dominance. We first compute alpha + // which measures how much gamma should go to the + // negative offdiags. The rest will go to the diagonal + + numer = -NegFilteredSum - Target * (diag - gamma); + denom = gamma * (Target - TST::one()); + + // make sure that alpha is between 0 and 1 ... and that it doesn't + // result in a sign flip + // Note: when alpha is set to 1, then the diagonal is not modified + // and the negative offdiags just get shifted from those + // removed and those kept, meaning that the digaonal dominance + // should be the same as before + // + // can alpha be negative? It looks like denom should always + // be positive. The 'if' statement above + // Normally, diag-gamma should also be positive (but if it + // is negative then numer is guaranteed to be positve). + // look at the 'if' above, + // if (( TST::real(diag) > TST::real(gamma)) && + // ( TST::real((-NegFilteredSum)/(diag - gamma)) <= TST::real(Target))) { + // + // Should guarantee that numer is positive. This is obvious when + // the second condition is false. When it is the first condition that + // is false, it follows that the two indiviudal terms in the numer + // formula must be positive. + + if (TST::magnitude(denom) < TST::magnitude(numer)) + alpha = TST::one(); + else + alpha = numer / denom; + if (TST::real(alpha) < TST::real(zero)) alpha = zero; + if (TST::real(diag) < TST::real((one - alpha) * gamma)) alpha = TST::one(); + + // first change the diagonal + + if (fdiagIndex != -1) fvals[fdiagIndex] = diag - (one - alpha) * gamma; + + // after lumping the sum of neg offdiags will be NegFilteredSum + // + alpha*gamma. That is the remaining negative entries altered + // by the percent (=alpha) of stuff (=gamma) that needs to be + // lumped after taking into account lumping to pos offdiags + + // Do this by assigning a fraction of NegFilteredSum+alpha*gamma + // proportional to vals[j]/NegFilteredSum + + SC temp = (NegFilteredSum + alpha * gamma) / NegFilteredSum; + j = 0; + for (LO jj = 0; jj < (LO)finds.size(); jj++) { + while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as + // inds ... but perhaps has some entries missing + if ((jj != fdiagIndex) && (TST::magnitude(fvals[jj]) != TST::magnitude(zero)) && + (TST::real(vals[j]) < TST::real(zero))) + fvals[jj] = temp * vals[j]; + } + } else { // desperate case + // So we don't have any kept negative offdiags ... - if (TST::real(gamma) < TST::real(zero) ) { - // the total amount of negative dropping is less than PosFilteredSum, - // so we can distribute this dropping to pos offdiags. After lumping - // the sum of the pos offdiags is just -gamma so we just assign pos - // offdiags proportional to vals[j]/PosFilteredSum - // Note: in this case the diagonal is not changed as all lumping - // occurs to the pos offdiags + if (NumPosKept > 0) { + // luckily we can push this stuff to the pos offdiags + // which now makes them negative + flipPosOffDiagsToNeg = true; - if (fdiagIndex != -1) fvals[fdiagIndex] = diag; j = 0; - for(LO jj = 0; jj < (LO)finds.size(); jj++) { - while( inds[j] != finds[jj] ) j++; // assumes that finds is in the same order as - // inds ... but perhaps has some entries missing - if ((j != diagIndex)&&(TST::real(vals[j]) > TST::real(zero) ) && (TST::magnitude(fvals[jj]) != TST::magnitude(zero))) - fvals[jj] = -gamma*(vals[j]/PosFilteredSum); - + for (LO jj = 0; jj < (LO)finds.size(); jj++) { + while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as + // inds ... but perhaps has some entries missing + if ((j != diagIndex) && (TST::magnitude(fvals[jj]) != TST::magnitude(zero)) && + (TST::real(vals[j]) > TST::real(zero))) + fvals[jj] = -gamma / ((SC)NumPosKept); } } - else { - // So there are more negative values that need lumping than kept - // positive offdiags. Meaning there is enough negative lumping to - // completely clear out all pos offdiags. If we lump all negs - // to pos off diags, we'd actually change them to negative. We - // only do this if we are desperate. Otherwise, we'll clear out - // all the positive kept offdiags and try to lump the rest - // somewhere else. We defer the clearing of pos off diags - // to see first if we are going to be desperate. - - bool flipPosOffDiagsToNeg = false; - - // Even if we lumped by zeroing positive offdiags, we are still - // going to have more lumping to distribute to either - // 1) the diagonal - // 2) the kept negative offdiags - // 3) the kept positive offdiags (desperate) - - // Let's first considering lumping the remaining neg offdiag stuff - // to the diagonal ... if this does not increase the diagonal - // dominance ratio too much (given by rho). - - if (( TST::real(diag) > TST::real(gamma)) && - ( TST::real((-NegFilteredSum)/(diag - gamma)) <= TST::real(Target))) { - // 1st if term above insures that resulting diagonal (=diag-gamma) - // is positive. . The left side of 2nd term is the diagonal dominance - // if we lump the remaining stuff (gamma) to the diagonal. Recall, - // that now there are no positive off-diags so the sum(abs(offdiags)) - // is just the negative of NegFilteredSum - - if (fdiagIndex != -1) fvals[fdiagIndex] = diag - gamma; - } - else if (NumNegKept > 0) { - // need to do some lumping to neg offdiags to avoid a large - // increase in diagonal dominance. We first compute alpha - // which measures how much gamma should go to the - // negative offdiags. The rest will go to the diagonal - - numer = -NegFilteredSum - Target*(diag-gamma); - denom = gamma*(Target - TST::one()); - - // make sure that alpha is between 0 and 1 ... and that it doesn't - // result in a sign flip - // Note: when alpha is set to 1, then the diagonal is not modified - // and the negative offdiags just get shifted from those - // removed and those kept, meaning that the digaonal dominance - // should be the same as before - // - // can alpha be negative? It looks like denom should always - // be positive. The 'if' statement above - // Normally, diag-gamma should also be positive (but if it - // is negative then numer is guaranteed to be positve). - // look at the 'if' above, - // if (( TST::real(diag) > TST::real(gamma)) && - // ( TST::real((-NegFilteredSum)/(diag - gamma)) <= TST::real(Target))) { - // - // Should guarantee that numer is positive. This is obvious when - // the second condition is false. When it is the first condition that - // is false, it follows that the two indiviudal terms in the numer - // formula must be positive. - - if ( TST::magnitude(denom) < TST::magnitude(numer) ) alpha = TST::one(); - else alpha = numer/denom; - if ( TST::real(alpha) < TST::real(zero)) alpha = zero; - if ( TST::real(diag) < TST::real((one-alpha)*gamma) ) alpha = TST::one(); - - // first change the diagonal - - if (fdiagIndex != -1) fvals[fdiagIndex] = diag - (one-alpha)*gamma; - - // after lumping the sum of neg offdiags will be NegFilteredSum - // + alpha*gamma. That is the remaining negative entries altered - // by the percent (=alpha) of stuff (=gamma) that needs to be - // lumped after taking into account lumping to pos offdiags - - // Do this by assigning a fraction of NegFilteredSum+alpha*gamma - // proportional to vals[j]/NegFilteredSum - - SC temp = (NegFilteredSum+alpha*gamma)/NegFilteredSum; - j = 0; - for(LO jj = 0; jj < (LO)finds.size(); jj++) { - while( inds[j] != finds[jj] ) j++; // assumes that finds is in the same order as - // inds ... but perhaps has some entries missing - if ( (jj != fdiagIndex)&&(TST::magnitude(fvals[jj]) != TST::magnitude(zero) ) && - ( TST::real(vals[j]) < TST::real(zero) ) ) - fvals[jj] = temp*vals[j]; - } - } - else { // desperate case - // So we don't have any kept negative offdiags ... - - if (NumPosKept > 0) { - // luckily we can push this stuff to the pos offdiags - // which now makes them negative - flipPosOffDiagsToNeg = true; - - j = 0; - for(LO jj = 0; jj < (LO)finds.size(); jj++) { - while( inds[j] != finds[jj] ) j++; // assumes that finds is in the same order as - // inds ... but perhaps has some entries missing - if ( (j != diagIndex)&&(TST::magnitude(fvals[jj]) != TST::magnitude(zero) ) && - (TST::real(vals[j]) > TST::real(zero) )) - fvals[jj] = -gamma/( (SC) NumPosKept); - } - } - // else abandon rowsum preservation and do nothing - - } - if (!flipPosOffDiagsToNeg) { // not desperate so we now zero out - // all pos terms including some - // not originally filtered - // but zeroed due to lumping - j = 0; - for(LO jj = 0; jj < (LO)finds.size(); jj++) { - while( inds[j] != finds[jj] ) j++; // assumes that finds is in the same order as - // inds ... but perhaps has some entries missing - if ((jj != fdiagIndex)&& (TST::real(vals[j]) > TST::real(zero))) fvals[jj] = zero; - } - } - } // positive gamma else - - } //loop over all rows - } + // else abandon rowsum preservation and do nothing + } + if (!flipPosOffDiagsToNeg) { // not desperate so we now zero out + // all pos terms including some + // not originally filtered + // but zeroed due to lumping + j = 0; + for (LO jj = 0; jj < (LO)finds.size(); jj++) { + while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as + // inds ... but perhaps has some entries missing + if ((jj != fdiagIndex) && (TST::real(vals[j]) > TST::real(zero))) fvals[jj] = zero; + } + } + } // positive gamma else + } // loop over all rows +} -} //namespace MueLu +} // namespace MueLu -#endif // MUELU_FILTEREDAFACTORY_DEF_HPP +#endif // MUELU_FILTEREDAFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_decl.hpp index 32b5389b0e1d..a24afb8724fe 100644 --- a/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_decl.hpp @@ -47,7 +47,6 @@ #ifndef PACKAGES_MUELU_SRC_MISC_MUELU_FINELEVELINPUTDATAFACTORY_DECL_HPP_ #define PACKAGES_MUELU_SRC_MISC_MUELU_FINELEVELINPUTDATAFACTORY_DECL_HPP_ - #include #include "MueLu_ConfigDefs.hpp" @@ -62,63 +61,61 @@ #include "MueLu_Graph_fwd.hpp" namespace MueLuTests { - template - class FineLevelInputDataFactoryTester; +template +class FineLevelInputDataFactoryTester; } namespace MueLu { - /*! - @class FineLevelInputData class. - @brief Factory for piping in input data from the finest level into the MueLu data dependency system - */ - - template - class FineLevelInputDataFactory : public SingleLevelFactoryBase { - friend class MueLuTests::FineLevelInputDataFactoryTester; +/*! + @class FineLevelInputData class. + @brief Factory for piping in input data from the finest level into the MueLu data dependency system +*/ + +template +class FineLevelInputDataFactory : public SingleLevelFactoryBase { + friend class MueLuTests::FineLevelInputDataFactoryTester; #undef MUELU_FINELEVELINPUTDATAFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - FineLevelInputDataFactory() { } + FineLevelInputDataFactory() {} - //! Destructor. - virtual ~FineLevelInputDataFactory() { } + //! Destructor. + virtual ~FineLevelInputDataFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! - @brief Build method. - */ - void Build(Level& currentLevel) const; - - //@} - private: + /*! + @brief Build method. + */ + void Build(Level& currentLevel) const; - void test() const { std::cout << "TEST" << std::endl; } + //@} + private: + void test() const { std::cout << "TEST" << std::endl; } - }; //class FineLevelInputDataFactory +}; // class FineLevelInputDataFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_FINELEVELINPUTDATAFACTORY_SHORT diff --git a/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_def.hpp b/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_def.hpp index a54c59a6bf1f..590b66f974ff 100644 --- a/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_def.hpp @@ -55,131 +55,120 @@ namespace MueLu { - template - RCP FineLevelInputDataFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP FineLevelInputDataFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - // Variable name (e.g. A or P or Coordinates) - validParamList->set< std::string >("Variable", std::string("A"), "Variable name on all coarse levels (except the finest level)."); + // Variable name (e.g. A or P or Coordinates) + validParamList->set("Variable", std::string("A"), "Variable name on all coarse levels (except the finest level)."); - // Names of generating factories (on finest level and coarse levels) - validParamList->set< RCP >("Fine level factory", Teuchos::null, "Generating factory of the fine level variable"); - validParamList->set< RCP >("Coarse level factory", Teuchos::null, "Generating factory for data on all coarse levels (except the finest)"); + // Names of generating factories (on finest level and coarse levels) + validParamList->set >("Fine level factory", Teuchos::null, "Generating factory of the fine level variable"); + validParamList->set >("Coarse level factory", Teuchos::null, "Generating factory for data on all coarse levels (except the finest)"); - // Type of variable (see source code for a complete list of all available types) - validParamList->set ("Variable type", std::string("Matrix"), "Type of variable"); + // Type of variable (see source code for a complete list of all available types) + validParamList->set("Variable type", std::string("Matrix"), "Type of variable"); - return validParamList; - } - - template - void FineLevelInputDataFactory::DeclareInput(Level& currentLevel) const { - - const ParameterList & pL = GetParameterList(); - - std::string variableName = ""; - if(pL.isParameter("Variable")) - variableName = pL.get("Variable"); + return validParamList; +} - std::string factoryName = "NoFactory"; - if (currentLevel.GetLevelID() == 0) { - factoryName = "Fine level factory"; - } else { - factoryName = "Coarse level factory"; - } +template +void FineLevelInputDataFactory::DeclareInput(Level& currentLevel) const { + const ParameterList& pL = GetParameterList(); - TEUCHOS_TEST_FOR_EXCEPTION(variableName == "", MueLu::Exceptions::RuntimeError, "FineLevelInputDataFactory: no variable name provided. Please set \'Variable\' parameter in your input deck."); + std::string variableName = ""; + if (pL.isParameter("Variable")) + variableName = pL.get("Variable"); - // data must be specified in factory! (not in factory manager) - RCP fact = GetFactory(factoryName); - currentLevel.DeclareInput(variableName, fact.get(), this); + std::string factoryName = "NoFactory"; + if (currentLevel.GetLevelID() == 0) { + factoryName = "Fine level factory"; + } else { + factoryName = "Coarse level factory"; } - template - void FineLevelInputDataFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "InputUserData", currentLevel); + TEUCHOS_TEST_FOR_EXCEPTION(variableName == "", MueLu::Exceptions::RuntimeError, "FineLevelInputDataFactory: no variable name provided. Please set \'Variable\' parameter in your input deck."); - const ParameterList& pL = GetParameterList(); + // data must be specified in factory! (not in factory manager) + RCP fact = GetFactory(factoryName); + currentLevel.DeclareInput(variableName, fact.get(), this); +} - std::string variableName = ""; - if (pL.isParameter("Variable")) - variableName = pL.get("Variable"); +template +void FineLevelInputDataFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "InputUserData", currentLevel); - std::string variableType = ""; - if(pL.isParameter("Variable type")) - variableType = pL.get("Variable type"); + const ParameterList& pL = GetParameterList(); - std::string factoryName = "NoFactory"; - if (currentLevel.GetLevelID() == 0) { - factoryName = "Fine level factory"; - } else { - factoryName = "Coarse level factory"; - } - RCP fact = GetFactory(factoryName); + std::string variableName = ""; + if (pL.isParameter("Variable")) + variableName = pL.get("Variable"); - GetOStream(Debug) << "Use " << variableName << " of type " << variableType << " from " << factoryName << "(" << fact.get() << ")" << std::endl; + std::string variableType = ""; + if (pL.isParameter("Variable type")) + variableType = pL.get("Variable type"); - // check data type - //std::string strType = currentLevel.GetTypeName(variableName, fact.get()); - if (variableType == "int") { - int data = currentLevel.Get(variableName, fact.get()); + std::string factoryName = "NoFactory"; + if (currentLevel.GetLevelID() == 0) { + factoryName = "Fine level factory"; + } else { + factoryName = "Coarse level factory"; + } + RCP fact = GetFactory(factoryName); + + GetOStream(Debug) << "Use " << variableName << " of type " << variableType << " from " << factoryName << "(" << fact.get() << ")" << std::endl; + + // check data type + // std::string strType = currentLevel.GetTypeName(variableName, fact.get()); + if (variableType == "int") { + int data = currentLevel.Get(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType == "double") { + double data = currentLevel.Get(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType == "string") { + std::string data = currentLevel.Get(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else { + size_t npos = std::string::npos; + + if (variableType.find("Aggregates") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("Graph") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("SmootherBase") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("SmootherPrototype") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("Export") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("Import") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("Map") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("Matrix") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); Set(currentLevel, variableName, data); - } else if (variableType == "double") { - double data = currentLevel.Get(variableName, fact.get()); + } else if (variableType.find("MultiVector") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); Set(currentLevel, variableName, data); - } else if (variableType == "string") { - std::string data = currentLevel.Get(variableName, fact.get()); + } else if (variableType.find("Operator") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); Set(currentLevel, variableName, data); } else { - size_t npos = std::string::npos; - - if (variableType.find("Aggregates") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("Graph") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("SmootherBase") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("SmootherPrototype") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("Export") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("Import") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("Map") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("Matrix") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("MultiVector") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("Operator") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else { - // TAW: is this working with empty procs? - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "FineLevelInputDataFactory: cannot detect type of variable " << variableName << " generated by " << fact.get() << ". User provided type " << variableType ); - } + // TAW: is this working with empty procs? + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "FineLevelInputDataFactory: cannot detect type of variable " << variableName << " generated by " << fact.get() << ". User provided type " << variableType); } } +} -} //namespace MueLu +} // namespace MueLu #endif /* PACKAGES_MUELU_SRC_MISC_MUELU_FINELEVELINPUTDATAFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_decl.hpp index 8e28f8fc1c43..55c59db3fc54 100644 --- a/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_decl.hpp @@ -54,62 +54,60 @@ #include "MueLu_InitialBlockNumberFactory_fwd.hpp" - namespace MueLu { /*! @class InitialBlockNumberFactory class. @brief Class for generating an initial LocalOrdinal-type BlockNumber vector, based on an input paraemter for interleaved dofs. - + */ - template - class InitialBlockNumberFactory : public SingleLevelFactoryBase { +template +class InitialBlockNumberFactory : public SingleLevelFactoryBase { #undef MUELU_INITIALBLOCKNUMBERFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - - /*! @brief Constructor. - */ - InitialBlockNumberFactory() { } + public: + //! @name Constructors/Destructors. - //! Destructor. - virtual ~InitialBlockNumberFactory() { } + /*! @brief Constructor. + */ + InitialBlockNumberFactory() {} - RCP GetValidParameterList() const; + //! Destructor. + virtual ~InitialBlockNumberFactory() {} - //@} + RCP GetValidParameterList() const; - //! @name Input - //@{ + //@} - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + //! @name Input + //@{ - If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class - will fall back to the settings in FactoryManager. - */ - void DeclareInput(Level ¤tLevel) const; + /*! @brief Specifies the data that this class needs, and the factories that generate that data. - //@} + If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class + will fall back to the settings in FactoryManager. + */ + void DeclareInput(Level ¤tLevel) const; - //! @name Build methods. - //@{ + //@} - //! Build an object with this factory. - void Build(Level & currentLevel) const; + //! @name Build methods. + //@{ - //@} + //! Build an object with this factory. + void Build(Level ¤tLevel) const; - private: + //@} - }; // class InitialBlockNumberFactory + private: +}; // class InitialBlockNumberFactory -} // namespace MueLu +} // namespace MueLu #define MUELU_INITIALBLOCKNUMBERFACTORY_SHORT -#endif // MUELU_INITIALBLOCKNUMBER_FACTORY_DECL_HPP +#endif // MUELU_INITIALBLOCKNUMBER_FACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_def.hpp b/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_def.hpp index 8cfe22245925..ae774c838857 100644 --- a/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_def.hpp @@ -59,40 +59,40 @@ namespace MueLu { - template - RCP InitialBlockNumberFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP InitialBlockNumberFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize"); -#undef SET_VALID_ENTRY + SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize"); +#undef SET_VALID_ENTRY - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A"); - return validParamList; - } + return validParamList; +} - template - void InitialBlockNumberFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - } +template +void InitialBlockNumberFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); +} - template - void InitialBlockNumberFactory::Build(Level & currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - const ParameterList & pL = GetParameterList(); +template +void InitialBlockNumberFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + const ParameterList& pL = GetParameterList(); - RCP A = Get< RCP >(currentLevel, "A"); - LO blocksize = as(pL.get("aggregation: block diagonal: interleaved blocksize")); + RCP A = Get >(currentLevel, "A"); + LO blocksize = as(pL.get("aggregation: block diagonal: interleaved blocksize")); - GetOStream(Statistics1) << "Generating interleaved blocking with "< BlockNumber = LocalOrdinalVectorFactory::Build(A->getRowMap(),false); - Teuchos::ArrayRCP bn_data = BlockNumber->getDataNonConst(0); - for(LO i=0; i<(LO)A->getRowMap()->getLocalNumElements();i++) - bn_data[i] = i % blocksize; - - Set(currentLevel,"BlockNumber",BlockNumber); - } + GetOStream(Statistics1) << "Generating interleaved blocking with " << blocksize << " equations" << std::endl; + RCP BlockNumber = LocalOrdinalVectorFactory::Build(A->getRowMap(), false); + Teuchos::ArrayRCP bn_data = BlockNumber->getDataNonConst(0); + for (LO i = 0; i < (LO)A->getRowMap()->getLocalNumElements(); i++) + bn_data[i] = i % blocksize; -} // namespace MueLu + Set(currentLevel, "BlockNumber", BlockNumber); +} -#endif // MUELU_INITIALBLOCKNUMBER_FACTORY_DEF_HPP +} // namespace MueLu + +#endif // MUELU_INITIALBLOCKNUMBER_FACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_decl.hpp index b992723be328..3bbcf7c19fe9 100644 --- a/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_decl.hpp @@ -48,8 +48,7 @@ #include "MueLu_SingleLevelFactoryBase.hpp" -namespace MueLu -{ +namespace MueLu { /*! @class InterfaceAggregationFactory class. @@ -118,23 +117,21 @@ namespace MueLu | CoarseDualNodeID2PrimalNodeID | InterfaceAggregationFactory | Coarsened mapping of dual node IDs two primal node IDs. */ -template -class InterfaceAggregationFactory : public SingleLevelFactoryBase -{ + class Node = DefaultNode> +class InterfaceAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_INTERFACEAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" -public: - + public: //! Input //@{ RCP GetValidParameterList() const override; - void DeclareInput(Level ¤tLevel) const override; + void DeclareInput(Level& currentLevel) const override; //@} @@ -142,11 +139,11 @@ class InterfaceAggregationFactory : public SingleLevelFactoryBase //@{ /*! @brief Build aggregates. */ - void Build(Level ¤tLevel) const override; + void Build(Level& currentLevel) const override; //@} -private: + private: /*! @brief Build dual aggregates based on a given dual-to-primal node mapping * * @param[in] prefix Prefix for screen output @@ -166,10 +163,9 @@ class InterfaceAggregationFactory : public SingleLevelFactoryBase * @param[in/out] currentLevel Level on which the aggregation needs to be performed */ void BuildBasedOnPrimalInterfaceDofMap(const std::string& prefix, Level& currentLevel) const; - }; -} // namespace MueLu +} // namespace MueLu #define MUELU_INTERFACEAGGREGATIONFACTORY_SHORT #endif /* MUELU_INTERFACEAGGREGATIONFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_def.hpp b/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_def.hpp index 4382cca2523b..ff05a39c0b3f 100644 --- a/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_def.hpp @@ -58,75 +58,64 @@ #include "MueLu_InterfaceAggregationFactory_decl.hpp" -namespace MueLu -{ +namespace MueLu { template -RCP InterfaceAggregationFactory::GetValidParameterList() const -{ +RCP InterfaceAggregationFactory::GetValidParameterList() const { RCP validParamList = rcp(new ParameterList()); validParamList->set>("A", Teuchos::null, "Generating factory of A (matrix block related to dual DOFs)"); validParamList->set>("Aggregates", Teuchos::null, "Generating factory of the Aggregates (for block 0,0)"); validParamList->set("Dual/primal mapping strategy", "vague", - "Strategy to represent mapping between dual and primal quantities [node-based, dof-based]"); + "Strategy to represent mapping between dual and primal quantities [node-based, dof-based]"); validParamList->set>("DualNodeID2PrimalNodeID", Teuchos::null, - "Generating factory of the DualNodeID2PrimalNodeID map as input data in a Moertel-compatible std::map to map local IDs of dual nodes to local IDs of primal nodes"); + "Generating factory of the DualNodeID2PrimalNodeID map as input data in a Moertel-compatible std::map to map local IDs of dual nodes to local IDs of primal nodes"); validParamList->set("number of DOFs per dual node", -Teuchos::ScalarTraits::one(), - "Number of DOFs per dual node"); + "Number of DOFs per dual node"); validParamList->set>("Primal interface DOF map", Teuchos::null, - "Generating factory of the primal DOF row map of slave side of the coupling surface"); + "Generating factory of the primal DOF row map of slave side of the coupling surface"); return validParamList; -} // GetValidParameterList() +} // GetValidParameterList() template -void InterfaceAggregationFactory::DeclareInput(Level ¤tLevel) const -{ - Input(currentLevel, "A"); // matrix block of dual variables +void InterfaceAggregationFactory::DeclareInput(Level ¤tLevel) const { + Input(currentLevel, "A"); // matrix block of dual variables Input(currentLevel, "Aggregates"); const ParameterList &pL = GetParameterList(); - TEUCHOS_TEST_FOR_EXCEPTION(pL.get("Dual/primal mapping strategy")=="vague", Exceptions::InvalidArgument, - "Strategy for dual/primal mapping not selected. Please select one of the available strategies.") - if (pL.get("Dual/primal mapping strategy") == "node-based") - { - if (currentLevel.GetLevelID() == 0) - { + TEUCHOS_TEST_FOR_EXCEPTION(pL.get("Dual/primal mapping strategy") == "vague", Exceptions::InvalidArgument, + "Strategy for dual/primal mapping not selected. Please select one of the available strategies.") + if (pL.get("Dual/primal mapping strategy") == "node-based") { + if (currentLevel.GetLevelID() == 0) { TEUCHOS_TEST_FOR_EXCEPTION(!currentLevel.IsAvailable("DualNodeID2PrimalNodeID", NoFactory::get()), - Exceptions::RuntimeError, "DualNodeID2PrimalNodeID was not provided by the user on level 0!"); + Exceptions::RuntimeError, "DualNodeID2PrimalNodeID was not provided by the user on level 0!"); currentLevel.DeclareInput("DualNodeID2PrimalNodeID", NoFactory::get(), this); - } - else - { + } else { Input(currentLevel, "DualNodeID2PrimalNodeID"); } - } - else if (pL.get("Dual/primal mapping strategy") == "dof-based") - { + } else if (pL.get("Dual/primal mapping strategy") == "dof-based") { if (currentLevel.GetLevelID() == 0) currentLevel.DeclareInput("Primal interface DOF map", NoFactory::get(), this); else Input(currentLevel, "Primal interface DOF map"); - } - else + } else TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::InvalidArgument, "Unknown strategy for dual/primal mapping.") -} // DeclareInput +} // DeclareInput template -void InterfaceAggregationFactory::Build(Level ¤tLevel) const -{ +void InterfaceAggregationFactory::Build(Level ¤tLevel) const { const std::string prefix = "MueLu::InterfaceAggregationFactory::Build: "; FactoryMonitor m(*this, "Build", currentLevel); // Call a specialized build routine based on the format of user-given input - const ParameterList &pL = GetParameterList(); + const ParameterList &pL = GetParameterList(); const std::string parameterName = "Dual/primal mapping strategy"; if (pL.get(parameterName) == "node-based") BuildBasedOnNodeMapping(prefix, currentLevel); @@ -134,23 +123,22 @@ void InterfaceAggregationFactory::Bui BuildBasedOnPrimalInterfaceDofMap(prefix, currentLevel); else TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::InvalidArgument, - "MueLu::InterfaceAggregationFactory::Builld(): Unknown strategy for dual/primal mapping. Set a valid value for the parameter \"" << parameterName << "\".") + "MueLu::InterfaceAggregationFactory::Builld(): Unknown strategy for dual/primal mapping. Set a valid value for the parameter \"" << parameterName << "\".") } template -void InterfaceAggregationFactory::BuildBasedOnNodeMapping(const std::string& prefix, - Level ¤tLevel) const -{ +void InterfaceAggregationFactory::BuildBasedOnNodeMapping(const std::string &prefix, + Level ¤tLevel) const { using Dual2Primal_type = std::map; const ParameterList &pL = GetParameterList(); - RCP A = Get>(currentLevel, "A"); + RCP A = Get>(currentLevel, "A"); const LocalOrdinal numDofsPerDualNode = pL.get("number of DOFs per dual node"); - TEUCHOS_TEST_FOR_EXCEPTION(numDofsPerDualNode::one(), Exceptions::InvalidArgument, - "Number of dual DOFs per node < 0 (default value). Specify a valid \"number of DOFs per dual node\" in the parameter list for the InterfaceAggregationFactory."); + TEUCHOS_TEST_FOR_EXCEPTION(numDofsPerDualNode < Teuchos::ScalarTraits::one(), Exceptions::InvalidArgument, + "Number of dual DOFs per node < 0 (default value). Specify a valid \"number of DOFs per dual node\" in the parameter list for the InterfaceAggregationFactory."); - RCP primalAggregates = Get>(currentLevel, "Aggregates"); + RCP primalAggregates = Get>(currentLevel, "Aggregates"); ArrayRCP primalVertex2AggId = primalAggregates->GetVertex2AggId()->getData(0); // Get the user-prescribed mapping of dual to primal node IDs @@ -161,21 +149,20 @@ void InterfaceAggregationFactory::Bui mapNodesDualToPrimal = Get>(currentLevel, "DualNodeID2PrimalNodeID"); RCP operatorRangeMap = A->getRangeMap(); - const size_t myRank = operatorRangeMap->getComm()->getRank(); + const size_t myRank = operatorRangeMap->getComm()->getRank(); LocalOrdinal globalNumDualNodes = operatorRangeMap->getGlobalNumElements() / numDofsPerDualNode; - LocalOrdinal localNumDualNodes = operatorRangeMap->getLocalNumElements() / numDofsPerDualNode; + LocalOrdinal localNumDualNodes = operatorRangeMap->getLocalNumElements() / numDofsPerDualNode; TEUCHOS_TEST_FOR_EXCEPTION(localNumDualNodes != Teuchos::as(mapNodesDualToPrimal->size()), - std::runtime_error, prefix << " MueLu requires the range map and the DualNodeID2PrimalNodeID map to be compatible."); + std::runtime_error, prefix << " MueLu requires the range map and the DualNodeID2PrimalNodeID map to be compatible."); RCP dualNodeMap = Teuchos::null; if (numDofsPerDualNode == 1) dualNodeMap = operatorRangeMap; - else - { - GlobalOrdinal indexBase = operatorRangeMap->getIndexBase(); - auto comm = operatorRangeMap->getComm(); + else { + GlobalOrdinal indexBase = operatorRangeMap->getIndexBase(); + auto comm = operatorRangeMap->getComm(); std::vector myDualNodes = {}; for (size_t i = 0; i < operatorRangeMap->getLocalNumElements(); i += numDofsPerDualNode) @@ -184,7 +171,7 @@ void InterfaceAggregationFactory::Bui dualNodeMap = MapFactory::Build(operatorRangeMap->lib(), globalNumDualNodes, myDualNodes, indexBase, comm); } TEUCHOS_TEST_FOR_EXCEPTION(localNumDualNodes != Teuchos::as(dualNodeMap->getLocalNumElements()), - std::runtime_error, prefix << " Local number of dual nodes given by user is incompatible to the dual node map."); + std::runtime_error, prefix << " Local number of dual nodes given by user is incompatible to the dual node map."); RCP dualAggregates = rcp(new Aggregates(dualNodeMap)); dualAggregates->setObjectLabel("InterfaceAggregation"); @@ -193,7 +180,7 @@ void InterfaceAggregationFactory::Bui dualAggregates->AggregatesCrossProcessors(primalAggregates->AggregatesCrossProcessors()); ArrayRCP dualVertex2AggId = dualAggregates->GetVertex2AggId()->getDataNonConst(0); - ArrayRCP dualProcWinner = dualAggregates->GetProcWinner()->getDataNonConst(0); + ArrayRCP dualProcWinner = dualAggregates->GetProcWinner()->getDataNonConst(0); RCP coarseMapNodesDualToPrimal = rcp(new Dual2Primal_type()); RCP coarseMapNodesPrimalToDual = rcp(new Dual2Primal_type()); @@ -205,10 +192,9 @@ void InterfaceAggregationFactory::Bui * - assign dual nodes to dual aggregates * - recursively coarsen the dual-to-primal node mapping */ - LocalOrdinal localPrimalNodeID = - Teuchos::ScalarTraits::one(); - LocalOrdinal currentPrimalAggId = - Teuchos::ScalarTraits::one(); - for (LocalOrdinal localDualNodeID = 0; localDualNodeID < localNumDualNodes; ++localDualNodeID) - { + LocalOrdinal localPrimalNodeID = -Teuchos::ScalarTraits::one(); + LocalOrdinal currentPrimalAggId = -Teuchos::ScalarTraits::one(); + for (LocalOrdinal localDualNodeID = 0; localDualNodeID < localNumDualNodes; ++localDualNodeID) { // Get local ID of the primal node associated to the current dual node localPrimalNodeID = (*mapNodesDualToPrimal)[localDualNodeID]; @@ -217,17 +203,16 @@ void InterfaceAggregationFactory::Bui // Test if the current primal aggregate has no associated dual aggregate, yet. // Create new dual aggregate, if necessary. - if (coarseMapNodesPrimalToDual->count(currentPrimalAggId) == 0) - { + if (coarseMapNodesPrimalToDual->count(currentPrimalAggId) == 0) { // Associate a new dual aggregate w/ the current primal aggregate - (*coarseMapNodesPrimalToDual)[currentPrimalAggId] = numLocalDualAggregates; + (*coarseMapNodesPrimalToDual)[currentPrimalAggId] = numLocalDualAggregates; (*coarseMapNodesDualToPrimal)[numLocalDualAggregates] = currentPrimalAggId; ++numLocalDualAggregates; } // Fill the dual aggregate dualVertex2AggId[localDualNodeID] = (*coarseMapNodesPrimalToDual)[currentPrimalAggId]; - dualProcWinner[localDualNodeID] = myRank; + dualProcWinner[localDualNodeID] = myRank; } // Store dual aggregeate data as well as coarsening information @@ -235,25 +220,24 @@ void InterfaceAggregationFactory::Bui Set(currentLevel, "Aggregates", dualAggregates); Set(currentLevel, "CoarseDualNodeID2PrimalNodeID", coarseMapNodesDualToPrimal); GetOStream(Statistics1) << dualAggregates->description() << std::endl; -} // BuildBasedOnNodeMapping +} // BuildBasedOnNodeMapping template void InterfaceAggregationFactory::BuildBasedOnPrimalInterfaceDofMap( - const std::string& prefix, Level ¤tLevel) const -{ + const std::string &prefix, Level ¤tLevel) const { const GlobalOrdinal GO_ZERO = Teuchos::ScalarTraits::zero(); - const GlobalOrdinal GO_ONE = Teuchos::ScalarTraits::one(); + const GlobalOrdinal GO_ONE = Teuchos::ScalarTraits::one(); // filled with striding information from A01 - LocalOrdinal numDofsPerDualNode = 0; + LocalOrdinal numDofsPerDualNode = 0; LocalOrdinal numDofsPerPrimalNode = 0; // Grab the off-diagonal block (0,1) from the global blocked operator - RCP A01 = Get>(currentLevel, "A"); - RCP primalAggregates = Get>(currentLevel, "Aggregates"); + RCP A01 = Get>(currentLevel, "A"); + RCP primalAggregates = Get>(currentLevel, "Aggregates"); ArrayRCP primalVertex2AggId = primalAggregates->GetVertex2AggId()->getData(0); - auto comm = A01->getRowMap()->getComm(); + auto comm = A01->getRowMap()->getComm(); const int myRank = comm->getRank(); RCP primalInterfaceDofRowMap = Teuchos::null; @@ -266,59 +250,59 @@ void InterfaceAggregationFactory::Bui TEUCHOS_ASSERT(!primalInterfaceDofRowMap.is_null()); if (A01->IsView("stridedMaps") && rcp_dynamic_cast(A01->getRowMap("stridedMaps")) != Teuchos::null) { - auto stridedRowMap = rcp_dynamic_cast(A01->getRowMap("stridedMaps")); - auto stridedColMap = rcp_dynamic_cast(A01->getColMap("stridedMaps")); + auto stridedRowMap = rcp_dynamic_cast(A01->getRowMap("stridedMaps")); + auto stridedColMap = rcp_dynamic_cast(A01->getColMap("stridedMaps")); numDofsPerPrimalNode = Teuchos::as(stridedRowMap->getFixedBlockSize()); - numDofsPerDualNode = Teuchos::as(stridedColMap->getFixedBlockSize()); + numDofsPerDualNode = Teuchos::as(stridedColMap->getFixedBlockSize()); if (numDofsPerPrimalNode != numDofsPerDualNode) { - GetOStream(Warnings) << "InterfaceAggregation attempts to work with " - << numDofsPerPrimalNode << " primal DOFs per node and " << numDofsPerDualNode << " dual DOFs per node." - << "Be careful! Algorithm is not well-tested, if number of primal and dual DOFs per node differ." << std::endl; + GetOStream(Warnings) << "InterfaceAggregation attempts to work with " + << numDofsPerPrimalNode << " primal DOFs per node and " << numDofsPerDualNode << " dual DOFs per node." + << "Be careful! Algorithm is not well-tested, if number of primal and dual DOFs per node differ." << std::endl; } } - TEUCHOS_TEST_FOR_EXCEPTION(numDofsPerPrimalNode==0, Exceptions::RuntimeError, - "InterfaceAggregationFactory could not extract the number of primal DOFs per node from striding information. At least, make sure that StridedMap information has actually been provided."); - TEUCHOS_TEST_FOR_EXCEPTION(numDofsPerDualNode==0, Exceptions::RuntimeError, - "InterfaceAggregationFactory could not extract the number of dual DOFs per node from striding information. At least, make sure that StridedMap information has actually been provided."); + TEUCHOS_TEST_FOR_EXCEPTION(numDofsPerPrimalNode == 0, Exceptions::RuntimeError, + "InterfaceAggregationFactory could not extract the number of primal DOFs per node from striding information. At least, make sure that StridedMap information has actually been provided."); + TEUCHOS_TEST_FOR_EXCEPTION(numDofsPerDualNode == 0, Exceptions::RuntimeError, + "InterfaceAggregationFactory could not extract the number of dual DOFs per node from striding information. At least, make sure that StridedMap information has actually been provided."); /* Determine block information for primal block - * - * primalDofOffset: global offset of primal DOF GIDs (usually is zero (default)) - * primalBlockDim: block dim for fixed size blocks - * - is 2 or 3 (for 2d or 3d problems) on the finest level (# displacement dofs per node) - * - is 3 or 6 (for 2d or 3d problems) on coarser levels (# nullspace vectors) - */ + * + * primalDofOffset: global offset of primal DOF GIDs (usually is zero (default)) + * primalBlockDim: block dim for fixed size blocks + * - is 2 or 3 (for 2d or 3d problems) on the finest level (# displacement dofs per node) + * - is 3 or 6 (for 2d or 3d problems) on coarser levels (# nullspace vectors) + */ GlobalOrdinal primalDofOffset = GO_ZERO; - LocalOrdinal primalBlockDim = numDofsPerPrimalNode; + LocalOrdinal primalBlockDim = numDofsPerPrimalNode; /* Determine block information for Lagrange multipliers - * - * dualDofOffset: usually > zero (set by domainOffset for Ptent11Fact) - * dualBlockDim: - * - is primalBlockDim (for 2d or 3d problems) on the finest level (1 Lagrange multiplier per - * displacement dof) - * - is 2 or 3 (for 2d or 3d problems) on coarser levels (same as on finest level, whereas there - * are 3 or 6 displacement dofs per node) - */ + * + * dualDofOffset: usually > zero (set by domainOffset for Ptent11Fact) + * dualBlockDim: + * - is primalBlockDim (for 2d or 3d problems) on the finest level (1 Lagrange multiplier per + * displacement dof) + * - is 2 or 3 (for 2d or 3d problems) on coarser levels (same as on finest level, whereas there + * are 3 or 6 displacement dofs per node) + */ GlobalOrdinal dualDofOffset = A01->getColMap()->getMinAllGlobalIndex(); - LocalOrdinal dualBlockDim = numDofsPerDualNode; + LocalOrdinal dualBlockDim = numDofsPerDualNode; // Generate global replicated mapping "lagrNodeId -> dispNodeId" - RCP dualDofMap = A01->getDomainMap(); + RCP dualDofMap = A01->getDomainMap(); GlobalOrdinal gMaxDualNodeId = AmalgamationFactory::DOFGid2NodeId( dualDofMap->getMaxAllGlobalIndex(), dualBlockDim, dualDofOffset, dualDofMap->getIndexBase()); GlobalOrdinal gMinDualNodeId = AmalgamationFactory::DOFGid2NodeId( dualDofMap->getMinAllGlobalIndex(), dualBlockDim, dualDofOffset, dualDofMap->getIndexBase()); GetOStream(Runtime1) << " Dual DOF map: index base = " << dualDofMap->getIndexBase() - << ", block dim = " << dualBlockDim - << ", gid offset = " << dualDofOffset - << std::endl; + << ", block dim = " << dualBlockDim + << ", gid offset = " << dualDofOffset + << std::endl; GetOStream(Runtime1) << " [primal / dual] DOFs per node = [" << numDofsPerPrimalNode - << "/" << numDofsPerDualNode << "]" << std::endl; + << "/" << numDofsPerDualNode << "]" << std::endl; // Generate locally replicated vector for mapping dual node IDs to primal node IDs Array dualNodeId2primalNodeId(gMaxDualNodeId - gMinDualNodeId + 1, -GO_ONE); @@ -333,16 +317,15 @@ void InterfaceAggregationFactory::Bui // Fill mapping of Lagrange Node IDs to displacement aggregate IDs const size_t numMyPrimalInterfaceDOFs = primalInterfaceDofRowMap->getLocalNumElements(); - for (size_t r = 0; r < numMyPrimalInterfaceDOFs; r += numDofsPerPrimalNode) - { + for (size_t r = 0; r < numMyPrimalInterfaceDOFs; r += numDofsPerPrimalNode) { GlobalOrdinal gPrimalRowId = primalInterfaceDofRowMap->getGlobalElement(r); - if (A01->getRowMap()->isNodeGlobalElement(gPrimalRowId)) // Remove this if? + if (A01->getRowMap()->isNodeGlobalElement(gPrimalRowId)) // Remove this if? { - const LocalOrdinal lPrimalRowId = A01->getRowMap()->getLocalElement(gPrimalRowId); + const LocalOrdinal lPrimalRowId = A01->getRowMap()->getLocalElement(gPrimalRowId); const GlobalOrdinal gPrimalNodeId = AmalgamationFactory::DOFGid2NodeId(gPrimalRowId, primalBlockDim, primalDofOffset, primalInterfaceDofRowMap->getIndexBase()); - const LocalOrdinal lPrimalNodeId = lPrimalRowId / numDofsPerPrimalNode; - const LocalOrdinal primalAggId = primalVertex2AggId[lPrimalNodeId]; + const LocalOrdinal lPrimalNodeId = lPrimalRowId / numDofsPerPrimalNode; + const LocalOrdinal primalAggId = primalVertex2AggId[lPrimalNodeId]; const GlobalOrdinal gDualDofId = A01->getColMap()->getGlobalElement(r); @@ -350,32 +333,30 @@ void InterfaceAggregationFactory::Bui if (local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] == -GO_ONE) { local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] = gPrimalNodeId; - local_dualNodeId2primalAggId[gDualNodeId - gMinDualNodeId] = primalAggId; + local_dualNodeId2primalAggId[gDualNodeId - gMinDualNodeId] = primalAggId; } else { GetOStream(Warnings) << "PROC: " << myRank << " gDualNodeId " << gDualNodeId << " is already connected to primal nodeId " - << local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] - << ". Ignore new dispNodeId: " << gPrimalNodeId << std::endl; + << local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] + << ". Ignore new dispNodeId: " << gPrimalNodeId << std::endl; } - } } const int dualNodeId2primalNodeIdSize = Teuchos::as(local_dualNodeId2primalNodeId.size()); Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, dualNodeId2primalNodeIdSize, - &local_dualNodeId2primalNodeId[0], &dualNodeId2primalNodeId[0]); + &local_dualNodeId2primalNodeId[0], &dualNodeId2primalNodeId[0]); Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, dualNodeId2primalNodeIdSize, - &local_dualNodeId2primalAggId[0], &dualNodeId2primalAggId[0]); + &local_dualNodeId2primalAggId[0], &dualNodeId2primalAggId[0]); // build node map for dual variables // generate "artificial nodes" for lagrange multipliers // the node map is also used for defining the Aggregates for the lagrange multipliers std::vector dualNodes; - for (size_t r = 0; r < A01->getDomainMap()->getLocalNumElements(); r++) - { + for (size_t r = 0; r < A01->getDomainMap()->getLocalNumElements(); r++) { // determine global Lagrange multiplier row Dof // generate a node id using the grid, lagr_blockdim and lagr_offset // todo make sure, that // nodeId is unique and does not interfer with the displacement nodes - GlobalOrdinal gDualDofId = A01->getDomainMap()->getGlobalElement(r); + GlobalOrdinal gDualDofId = A01->getDomainMap()->getGlobalElement(r); GlobalOrdinal gDualNodeId = AmalgamationFactory::DOFGid2NodeId(gDualDofId, dualBlockDim, dualDofOffset, 0); dualNodes.push_back(gDualNodeId); } @@ -385,7 +366,7 @@ void InterfaceAggregationFactory::Bui // define node map for Lagrange multipliers Teuchos::RCP dualNodeMap = MapFactory::Build(A01->getRowMap()->lib(), - Teuchos::OrdinalTraits::invalid(), dualNodes, A01->getRowMap()->getIndexBase(), comm); + Teuchos::OrdinalTraits::invalid(), dualNodes, A01->getRowMap()->getIndexBase(), comm); // Build aggregates using the lagrange multiplier node map Teuchos::RCP dualAggregates = Teuchos::rcp(new Aggregates(dualNodeMap)); @@ -393,30 +374,29 @@ void InterfaceAggregationFactory::Bui // extract aggregate data structures to fill Teuchos::ArrayRCP dualVertex2AggId = dualAggregates->GetVertex2AggId()->getDataNonConst(0); - Teuchos::ArrayRCP dualProcWinner = dualAggregates->GetProcWinner()->getDataNonConst(0); + Teuchos::ArrayRCP dualProcWinner = dualAggregates->GetProcWinner()->getDataNonConst(0); // loop over local lagrange multiplier node ids LocalOrdinal nLocalAggregates = 0; std::map primalAggId2localDualAggId; - for (size_t lDualNodeID = 0; lDualNodeID < dualNodeMap->getLocalNumElements(); ++lDualNodeID) - { + for (size_t lDualNodeID = 0; lDualNodeID < dualNodeMap->getLocalNumElements(); ++lDualNodeID) { const GlobalOrdinal gDualNodeId = dualNodeMap->getGlobalElement(lDualNodeID); const GlobalOrdinal primalAggId = dualNodeId2primalAggId[gDualNodeId - gMinDualNodeId]; if (primalAggId2localDualAggId.count(primalAggId) == 0) primalAggId2localDualAggId[primalAggId] = nLocalAggregates++; dualVertex2AggId[lDualNodeID] = primalAggId2localDualAggId[primalAggId]; - dualProcWinner[lDualNodeID] = myRank; + dualProcWinner[lDualNodeID] = myRank; } - const LocalOrdinal fullblocksize = numDofsPerDualNode; - const GlobalOrdinal offset = A01->getColMap()->getMinAllGlobalIndex(); - const LocalOrdinal blockid = -1; - const LocalOrdinal nStridedOffset = 0; + const LocalOrdinal fullblocksize = numDofsPerDualNode; + const GlobalOrdinal offset = A01->getColMap()->getMinAllGlobalIndex(); + const LocalOrdinal blockid = -1; + const LocalOrdinal nStridedOffset = 0; const LocalOrdinal stridedblocksize = fullblocksize; RCP> rowTranslation = rcp(new Array()); RCP> colTranslation = rcp(new Array()); - const size_t numMyDualNodes = dualNodeMap->getLocalNumElements(); + const size_t numMyDualNodes = dualNodeMap->getLocalNumElements(); for (size_t lDualNodeID = 0; lDualNodeID < numMyDualNodes; ++lDualNodeID) { for (LocalOrdinal dof = 0; dof < numDofsPerDualNode; ++dof) { rowTranslation->push_back(lDualNodeID); @@ -427,8 +407,8 @@ void InterfaceAggregationFactory::Bui TEUCHOS_ASSERT(A01->isFillComplete()); RCP dualAmalgamationInfo = rcp(new AmalgamationInfo(rowTranslation, colTranslation, - A01->getDomainMap(), A01->getDomainMap(), A01->getDomainMap(), - fullblocksize, offset, blockid, nStridedOffset, stridedblocksize)); + A01->getDomainMap(), A01->getDomainMap(), A01->getDomainMap(), + fullblocksize, offset, blockid, nStridedOffset, stridedblocksize)); dualAggregates->SetNumAggregates(nLocalAggregates); dualAggregates->AggregatesCrossProcessors(primalAggregates->AggregatesCrossProcessors()); @@ -441,8 +421,8 @@ void InterfaceAggregationFactory::Bui currentLevel.Set("Aggregates", dualAggregates, this); currentLevel.Set("UnAmalgamationInfo", dualAmalgamationInfo, this); -} // BuildBasedOnPrimalInterfaceDofMap +} // BuildBasedOnPrimalInterfaceDofMap -} // namespace MueLu +} // namespace MueLu #endif /* MUELU_INTERFACEAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_decl.hpp index 0c22adb5e443..8da95fcdcc7d 100644 --- a/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_decl.hpp @@ -52,8 +52,7 @@ #include "MueLu_ConfigDefs.hpp" #include "MueLu_TwoLevelFactoryBase.hpp" -namespace MueLu -{ +namespace MueLu { /*! @class InterfaceMappingTransferFactory @@ -73,14 +72,13 @@ namespace MueLu The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see InterfaceAggregationFactory::GetValidParameters).
The * in the @c requested column states that the data is requested as input with all dependencies (see InterfaceAggregationFactory::DeclareInput). */ -template -class InterfaceMappingTransferFactory : public TwoLevelFactoryBase -{ + class Node = DefaultNode> +class InterfaceMappingTransferFactory : public TwoLevelFactoryBase { #undef MUELU_INTERFACEMAPPINGTRANSFERFACTORY_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" -public: + public: //! Constructor. InterfaceMappingTransferFactory() = default; @@ -92,6 +90,6 @@ class InterfaceMappingTransferFactory : public TwoLevelFactoryBase void Build(Level &fineLevel, Level &coarseLevel) const override; }; -} // namespace MueLu +} // namespace MueLu #define MUELU_INTERFACEMAPPINGTRANSFERFACTORY_SHORT #endif /* MUELU_INTERFACEMAPPINGTRANSFERFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_def.hpp b/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_def.hpp index 5daab13cbdfb..a5e7f0dd0129 100644 --- a/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_def.hpp @@ -47,32 +47,28 @@ #include "MueLu_InterfaceMappingTransferFactory_decl.hpp" -namespace MueLu -{ +namespace MueLu { template -RCP InterfaceMappingTransferFactory::GetValidParameterList() const -{ +RCP InterfaceMappingTransferFactory::GetValidParameterList() const { RCP validParamList = rcp(new ParameterList()); validParamList->set>("CoarseDualNodeID2PrimalNodeID", Teuchos::null, "Generating factory of the CoarseDualNodeID2PrimalNodeID map"); return validParamList; } template -void InterfaceMappingTransferFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const -{ +void InterfaceMappingTransferFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { Input(fineLevel, "CoarseDualNodeID2PrimalNodeID"); } template -void InterfaceMappingTransferFactory::Build(Level &fineLevel, Level &coarseLevel) const -{ +void InterfaceMappingTransferFactory::Build(Level &fineLevel, Level &coarseLevel) const { Monitor m(*this, "Interface Mapping transfer factory"); RCP> coarseLagr2Dof = Get>>(fineLevel, "CoarseDualNodeID2PrimalNodeID"); Set(coarseLevel, "DualNodeID2PrimalNodeID", coarseLagr2Dof); } -} // namespace MueLu +} // namespace MueLu #endif /* MUELU_INTERFACEMAPPINGTRANSFERFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_decl.hpp index 91b50b0d5c1f..160a2ada8c7d 100644 --- a/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_decl.hpp @@ -60,78 +60,78 @@ namespace MueLu { - /*! - @class InverseApproximationFactory class. - @brief Factory for building the approximate inverse of a matrix. +/*! + @class InverseApproximationFactory class. + @brief Factory for building the approximate inverse of a matrix. - ## Context, assumptions, and use cases ## + ## Context, assumptions, and use cases ## - This factory is intended to be used for building an approximate inverse of a given matrix \A. This is for now only - used in the SchurComplementFactory to generate a respective \Ainv matrix. + This factory is intended to be used for building an approximate inverse of a given matrix \A. This is for now only + used in the SchurComplementFactory to generate a respective \Ainv matrix. - For blocked matrices, the InverseApproximationFactory per default generates an approximate inverse of the A_00 term. + For blocked matrices, the InverseApproximationFactory per default generates an approximate inverse of the A_00 term. - ## Input/output of this factory ## + ## Input/output of this factory ## - ### User parameters of InterfaceAggregationFactory ### - Parameter | type | default | master.xml | validated | requested | description - ----------|------|---------|:----------:|:---------:|:---------:|------------ - A | Factory | null | | * | * | Generating factory of the matrix A - inverse: approximation type | string | diagonal | | * | * | Method used to approximate the inverse - inverse: fixing | bool | false | | * | * | Fix diagonal by replacing small entries with 1.0 + ### User parameters of InterfaceAggregationFactory ### + Parameter | type | default | master.xml | validated | requested | description + ----------|------|---------|:----------:|:---------:|:---------:|------------ + A | Factory | null | | * | * | Generating factory of the matrix A + inverse: approximation type | string | diagonal | | * | * | Method used to approximate the inverse + inverse: fixing | bool | false | | * | * | Fix diagonal by replacing small entries with 1.0 - The * in the master.xml column denotes that the parameter is defined in the master.xml file. - The * in the validated column means that the parameter is declared in the list of valid input parameters (see GetValidParameters() ). - The * in the requested column states that the data is requested as input with all dependencies (see DeclareInput() ). + The * in the master.xml column denotes that the parameter is defined in the master.xml file. + The * in the validated column means that the parameter is declared in the list of valid input parameters (see GetValidParameters() ). + The * in the requested column states that the data is requested as input with all dependencies (see DeclareInput() ). - ### Variables provided by this factory ### + ### Variables provided by this factory ### - After InverseApproximationFactory::Build the following data is available (if requested) + After InverseApproximationFactory::Build the following data is available (if requested) - Parameter | generated by | description - ----------|--------------|------------ - | Ainv | InverseApproximationFactory | The approximate inverse of a given matrix. - */ + Parameter | generated by | description + ----------|--------------|------------ + | Ainv | InverseApproximationFactory | The approximate inverse of a given matrix. +*/ - template - class InverseApproximationFactory : public SingleLevelFactoryBase { +template +class InverseApproximationFactory : public SingleLevelFactoryBase { #undef MUELU_INVERSEAPPROXIMATIONFACTORY_SHORT - #include "MueLu_UseShortNames.hpp" +#include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - InverseApproximationFactory() = default; + //! Constructor. + InverseApproximationFactory() = default; - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //@{ - //! @name Build methods. + //@{ + //! @name Build methods. - //! Build an object with this factory. - void Build(Level& currentLevel) const; + //! Build an object with this factory. + void Build(Level& currentLevel) const; - //@} + //@} - private: - //! Sparse inverse calculation method. - RCP GetSparseInverse(const RCP& A, const RCP& sparsityPattern) const; + private: + //! Sparse inverse calculation method. + RCP GetSparseInverse(const RCP& A, const RCP& sparsityPattern) const; - }; // class InverseApproximationFactory +}; // class InverseApproximationFactory -} // namespace MueLu +} // namespace MueLu #define MUELU_INVERSEAPPROXIMATIONFACTORY_SHORT #endif /* MUELU_INVERSEAPPROXIMATIONFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_def.hpp b/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_def.hpp index 7ec193a94d78..30fad853d9ba 100644 --- a/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_def.hpp @@ -69,147 +69,140 @@ namespace MueLu { - template - RCP InverseApproximationFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - using Magnitude = typename Teuchos::ScalarTraits::magnitudeType; - - validParamList->set >("A", NoFactory::getRCP(), "Matrix to build the approximate inverse on.\n"); - - validParamList->set ("inverse: approximation type", "diagonal", "Method used to approximate the inverse."); - validParamList->set ("inverse: drop tolerance", 0.0 , "Values below this threshold are dropped from the matrix (or fixed if diagonal fixing is active)."); - validParamList->set ("inverse: fixing", false , "Keep diagonal and fix small entries with 1.0"); - - return validParamList; +template +RCP InverseApproximationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + using Magnitude = typename Teuchos::ScalarTraits::magnitudeType; + + validParamList->set>("A", NoFactory::getRCP(), "Matrix to build the approximate inverse on.\n"); + + validParamList->set("inverse: approximation type", "diagonal", "Method used to approximate the inverse."); + validParamList->set("inverse: drop tolerance", 0.0, "Values below this threshold are dropped from the matrix (or fixed if diagonal fixing is active)."); + validParamList->set("inverse: fixing", false, "Keep diagonal and fix small entries with 1.0"); + + return validParamList; +} + +template +void InverseApproximationFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); +} + +template +void InverseApproximationFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + using STS = Teuchos::ScalarTraits; + const SC one = STS::one(); + using Magnitude = typename Teuchos::ScalarTraits::magnitudeType; + + const ParameterList& pL = GetParameterList(); + const bool fixing = pL.get("inverse: fixing"); + + // check which approximation type to use + const std::string method = pL.get("inverse: approximation type"); + TEUCHOS_TEST_FOR_EXCEPTION(method != "diagonal" && method != "lumping" && method != "sparseapproxinverse", Exceptions::RuntimeError, + "MueLu::InverseApproximationFactory::Build: Approximation type can be 'diagonal' or 'lumping' or " + "'sparseapproxinverse'."); + + RCP A = Get>(currentLevel, "A"); + RCP bA = Teuchos::rcp_dynamic_cast(A); + const bool isBlocked = (bA == Teuchos::null ? false : true); + + // if blocked operator is used, defaults to A(0,0) + if (isBlocked) A = bA->getMatrix(0, 0); + + const Magnitude tol = pL.get("inverse: drop tolerance"); + RCP Ainv = Teuchos::null; + + if (method == "diagonal") { + const auto diag = VectorFactory::Build(A->getRangeMap(), true); + A->getLocalDiagCopy(*diag); + const RCP D = (!fixing ? Utilities::GetInverse(diag) : Utilities::GetInverse(diag, tol, one)); + Ainv = MatrixFactory::Build(D); + } else if (method == "lumping") { + const auto diag = Utilities::GetLumpedMatrixDiagonal(*A); + const RCP D = (!fixing ? Utilities::GetInverse(diag) : Utilities::GetInverse(diag, tol, one)); + Ainv = MatrixFactory::Build(D); + } else if (method == "sparseapproxinverse") { + RCP sparsityPattern = Utilities::GetThresholdedGraph(A, tol, A->getGlobalMaxNumRowEntries()); + GetOStream(Statistics1) << "NNZ Graph(A): " << A->getCrsGraph()->getGlobalNumEntries() << " , NNZ Tresholded Graph(A): " << sparsityPattern->getGlobalNumEntries() << std::endl; + RCP pAinv = GetSparseInverse(A, sparsityPattern); + Ainv = Utilities::GetThresholdedMatrix(pAinv, tol, fixing, pAinv->getGlobalMaxNumRowEntries()); + GetOStream(Statistics1) << "NNZ Ainv: " << pAinv->getGlobalNumEntries() << ", NNZ Tresholded Ainv (parameter: " << tol << "): " << Ainv->getGlobalNumEntries() << std::endl; } - template - void InverseApproximationFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - } - - template - void InverseApproximationFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - - using STS = Teuchos::ScalarTraits; - const SC one = STS::one(); - using Magnitude = typename Teuchos::ScalarTraits::magnitudeType; - - const ParameterList& pL = GetParameterList(); - const bool fixing = pL.get("inverse: fixing"); - - // check which approximation type to use - const std::string method = pL.get("inverse: approximation type"); - TEUCHOS_TEST_FOR_EXCEPTION(method != "diagonal" && method != "lumping" && method != "sparseapproxinverse", Exceptions::RuntimeError, - "MueLu::InverseApproximationFactory::Build: Approximation type can be 'diagonal' or 'lumping' or " - "'sparseapproxinverse'."); - - RCP A = Get >(currentLevel, "A"); - RCP bA = Teuchos::rcp_dynamic_cast(A); - const bool isBlocked = (bA == Teuchos::null ? false : true); - - // if blocked operator is used, defaults to A(0,0) - if(isBlocked) A = bA->getMatrix(0,0); - - const Magnitude tol = pL.get("inverse: drop tolerance"); - RCP Ainv = Teuchos::null; - - if(method=="diagonal") - { - const auto diag = VectorFactory::Build(A->getRangeMap(), true); - A->getLocalDiagCopy(*diag); - const RCP D = (!fixing ? Utilities::GetInverse(diag) : Utilities::GetInverse(diag, tol, one)); - Ainv = MatrixFactory::Build(D); + GetOStream(Statistics1) << "Approximate inverse calculated by: " << method << "." << std::endl; + GetOStream(Statistics1) << "Ainv has " << Ainv->getGlobalNumRows() << "x" << Ainv->getGlobalNumCols() << " rows and columns." << std::endl; + + Set(currentLevel, "Ainv", Ainv); +} + +template +RCP> +InverseApproximationFactory::GetSparseInverse(const RCP& Aorg, const RCP& sparsityPattern) const { + // construct the inverse matrix with the given sparsity pattern + RCP Ainv = MatrixFactory::Build(sparsityPattern); + Ainv->resumeFill(); + + // gather missing rows from other procs to generate an overlapping map + RCP rowImport = ImportFactory::Build(sparsityPattern->getRowMap(), sparsityPattern->getColMap()); + RCP A = MatrixFactory::Build(Aorg, *rowImport); + + // loop over all rows of the inverse sparsity pattern (this can be done in parallel) + for (size_t k = 0; k < sparsityPattern->getLocalNumRows(); k++) { + // 1. get column indices Ik of local row k + ArrayView Ik; + sparsityPattern->getLocalRowView(k, Ik); + + // 2. get all local A(Ik,:) rows + Array> J(Ik.size()); + Array> Ak(Ik.size()); + Array Jk; + for (LO i = 0; i < Ik.size(); i++) { + A->getLocalRowView(Ik[i], J[i], Ak[i]); + for (LO j = 0; j < J[i].size(); j++) + Jk.append(J[i][j]); } - else if(method=="lumping") - { - const auto diag = Utilities::GetLumpedMatrixDiagonal(*A); - const RCP D = (!fixing ? Utilities::GetInverse(diag) : Utilities::GetInverse(diag, tol, one)); - Ainv = MatrixFactory::Build(D); - } - else if(method=="sparseapproxinverse") - { - RCP sparsityPattern = Utilities::GetThresholdedGraph(A, tol, A->getGlobalMaxNumRowEntries()); - GetOStream(Statistics1) << "NNZ Graph(A): " << A->getCrsGraph()->getGlobalNumEntries() << " , NNZ Tresholded Graph(A): " << sparsityPattern->getGlobalNumEntries() << std::endl; - RCP pAinv = GetSparseInverse(A, sparsityPattern); - Ainv = Utilities::GetThresholdedMatrix(pAinv, tol, fixing, pAinv->getGlobalMaxNumRowEntries()); - GetOStream(Statistics1) << "NNZ Ainv: " << pAinv->getGlobalNumEntries() << ", NNZ Tresholded Ainv (parameter: " << tol << "): " << Ainv->getGlobalNumEntries() << std::endl; - } - - GetOStream(Statistics1) << "Approximate inverse calculated by: " << method << "." << std::endl; - GetOStream(Statistics1) << "Ainv has " << Ainv->getGlobalNumRows() << "x" << Ainv->getGlobalNumCols() << " rows and columns." << std::endl; - - Set(currentLevel, "Ainv", Ainv); - } - - template - RCP> - InverseApproximationFactory::GetSparseInverse(const RCP& Aorg, const RCP& sparsityPattern) const { - - // construct the inverse matrix with the given sparsity pattern - RCP Ainv = MatrixFactory::Build(sparsityPattern); - Ainv->resumeFill(); - - // gather missing rows from other procs to generate an overlapping map - RCP rowImport = ImportFactory::Build(sparsityPattern->getRowMap(), sparsityPattern->getColMap()); - RCP A = MatrixFactory::Build(Aorg, *rowImport); - - // loop over all rows of the inverse sparsity pattern (this can be done in parallel) - for(size_t k=0; kgetLocalNumRows(); k++) { - - // 1. get column indices Ik of local row k - ArrayView Ik; - sparsityPattern->getLocalRowView(k, Ik); - - // 2. get all local A(Ik,:) rows - Array> J(Ik.size()); - Array> Ak(Ik.size()); - Array Jk; - for (LO i = 0; i < Ik.size(); i++) { - A->getLocalRowView(Ik[i], J[i], Ak[i]); - for (LO j = 0; j < J[i].size(); j++) - Jk.append(J[i][j]); + // set of unique column indices Jk + std::sort(Jk.begin(), Jk.end()); + Jk.erase(std::unique(Jk.begin(), Jk.end()), Jk.end()); + // create map + std::map G; + for (LO i = 0; i < Jk.size(); i++) G.insert(std::pair(Jk[i], i)); + + // 3. merge rows together + Teuchos::SerialDenseMatrix localA(Jk.size(), Ik.size(), true); + for (LO i = 0; i < Ik.size(); i++) { + for (LO j = 0; j < J[i].size(); j++) { + localA(G.at(J[i][j]), i) = Ak[i][j]; } - // set of unique column indices Jk - std::sort(Jk.begin(), Jk.end()); - Jk.erase(std::unique(Jk.begin(), Jk.end()), Jk.end()); - // create map - std::map G; - for (LO i = 0; i < Jk.size(); i++) G.insert(std::pair(Jk[i], i)); - - // 3. merge rows together - Teuchos::SerialDenseMatrix localA(Jk.size(), Ik.size(), true); - for (LO i = 0; i < Ik.size(); i++) { - for (LO j = 0; j < J[i].size(); j++) { - localA(G.at(J[i][j]), i) = Ak[i][j]; - } - } - - // 4. get direction-vector - // diagonal needs an entry! - Teuchos::SerialDenseVector ek(Jk.size(), true); - ek[std::find(Jk.begin(), Jk.end(), k) - Jk.begin()] = Teuchos::ScalarTraits::one();; - - // 5. solve linear system for x - Teuchos::SerialDenseVector localX(Ik.size()); - Teuchos::SerialQRDenseSolver qrSolver; - qrSolver.setMatrix(Teuchos::rcp(&localA, false)); - qrSolver.setVectors(Teuchos::rcp(&localX, false), Teuchos::rcp(&ek, false)); - const int err = qrSolver.solve(); - TEUCHOS_TEST_FOR_EXCEPTION(err != 0, Exceptions::RuntimeError, - "MueLu::InverseApproximationFactory::GetSparseInverse: Error in serial QR solve."); - - // 6. set calculated row into Ainv - ArrayView Mk(localX.values(), localX.length()); - Ainv->replaceLocalValues(k, Ik, Mk); - } - Ainv->fillComplete(); - return Ainv; + // 4. get direction-vector + // diagonal needs an entry! + Teuchos::SerialDenseVector ek(Jk.size(), true); + ek[std::find(Jk.begin(), Jk.end(), k) - Jk.begin()] = Teuchos::ScalarTraits::one(); + ; + + // 5. solve linear system for x + Teuchos::SerialDenseVector localX(Ik.size()); + Teuchos::SerialQRDenseSolver qrSolver; + qrSolver.setMatrix(Teuchos::rcp(&localA, false)); + qrSolver.setVectors(Teuchos::rcp(&localX, false), Teuchos::rcp(&ek, false)); + const int err = qrSolver.solve(); + TEUCHOS_TEST_FOR_EXCEPTION(err != 0, Exceptions::RuntimeError, + "MueLu::InverseApproximationFactory::GetSparseInverse: Error in serial QR solve."); + + // 6. set calculated row into Ainv + ArrayView Mk(localX.values(), localX.length()); + Ainv->replaceLocalValues(k, Ik, Mk); } + Ainv->fillComplete(); + + return Ainv; +} -} // namespace MueLu +} // namespace MueLu #endif /* MUELU_INVERSEAPPROXIMATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_LineDetectionFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_LineDetectionFactory_decl.hpp index 9a8207ae430a..75f0d2539590 100644 --- a/packages/muelu/src/Misc/MueLu_LineDetectionFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_LineDetectionFactory_decl.hpp @@ -47,9 +47,9 @@ #define MUELU_LINEDETECTIONFACTORY_DECL_HPP // same as in SemiCoarsenPFactory (TODO rework this) -#define VERTICAL 1 -#define HORIZONTAL 2 -#define GRID_SUPPLIED -1 +#define VERTICAL 1 +#define HORIZONTAL 2 +#define GRID_SUPPLIED -1 #include "MueLu_ConfigDefs.hpp" #include "MueLu_LineDetectionFactory_fwd.hpp" @@ -59,82 +59,82 @@ namespace MueLu { - /*! - @class LineDetectionFactory class. - @brief Factory for building line detection information - */ - - template - class LineDetectionFactory : public SingleLevelFactoryBase { +/*! + @class LineDetectionFactory class. + @brief Factory for building line detection information +*/ + +template +class LineDetectionFactory : public SingleLevelFactoryBase { #undef MUELU_LINEDETECTIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: + public: + using coordinate_type = typename Teuchos::ScalarTraits::coordinateType; + using CoordinateMultiVector = typename Xpetra::MultiVector; - using coordinate_type = typename Teuchos::ScalarTraits::coordinateType; - using CoordinateMultiVector = typename Xpetra::MultiVector; + //! @name Constructors/Destructors. + //@{ - //! @name Constructors/Destructors. - //@{ + LineDetectionFactory() + : Zorientation_(VERTICAL) {} - LineDetectionFactory() : Zorientation_(VERTICAL) { } + //! Destructor. + virtual ~LineDetectionFactory() {} - //! Destructor. - virtual ~LineDetectionFactory() { } + RCP GetValidParameterList() const; - RCP GetValidParameterList() const; + //@} - //@} + //! Input + //@{ - //! Input - //@{ + void DeclareInput(Level& currentLevel) const; - void DeclareInput(Level& currentLevel) const; + //@} - //@} + //! @name Build methods. + //@{ - //! @name Build methods. - //@{ - - /*! - @brief Build method. + /*! + @brief Build method. - Builds line detection information and stores it in currentLevel - */ - void Build(Level& currentLevel) const; + Builds line detection information and stores it in currentLevel + */ + void Build(Level& currentLevel) const; - //@} + //@} - private: - void sort_coordinates(LO numCoords, LO* OrigLoc, - coordinate_type* xvals, - coordinate_type* yvals, - coordinate_type* zvals, - coordinate_type* xtemp, - coordinate_type* ytemp, - coordinate_type* ztemp, - bool flipXY = false) const; + private: + void sort_coordinates(LO numCoords, LO* OrigLoc, + coordinate_type* xvals, + coordinate_type* yvals, + coordinate_type* zvals, + coordinate_type* xtemp, + coordinate_type* ytemp, + coordinate_type* ztemp, + bool flipXY = false) const; - LO ML_compute_line_info(LO LayerId[], LO VertLineId[], - LO Ndof, LO DofsPerNode, - LO MeshNumbering, LO NumNodesPerVertLine, - coordinate_type *xvals, coordinate_type *yvals, coordinate_type *zvals, - const Teuchos::Comm& comm ) const ; + LO ML_compute_line_info(LO LayerId[], LO VertLineId[], + LO Ndof, LO DofsPerNode, + LO MeshNumbering, LO NumNodesPerVertLine, + coordinate_type* xvals, coordinate_type* yvals, coordinate_type* zvals, + const Teuchos::Comm& comm) const; - void ML_az_dsort2(coordinate_type dlist[], LO N, LO list2[]) const; + void ML_az_dsort2(coordinate_type dlist[], LO N, LO list2[]) const; - //! internally stores line detection mode - //! can be either vertical, horizontal or coordinates - //! for the first run. On the coarser levels we automatically - //! switch to vertical mode - mutable LO Zorientation_; + //! internally stores line detection mode + //! can be either vertical, horizontal or coordinates + //! for the first run. On the coarser levels we automatically + //! switch to vertical mode + mutable LO Zorientation_; - }; //class LineDetectionFactory +}; // class LineDetectionFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_LINEDETECTIONFACTORY_SHORT -#endif // MUELU_LINEDETECTIONFACTORY_DECL_HPP +#endif // MUELU_LINEDETECTIONFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_LineDetectionFactory_def.hpp b/packages/muelu/src/Misc/MueLu_LineDetectionFactory_def.hpp index f16534229ce6..d636154f66fa 100644 --- a/packages/muelu/src/Misc/MueLu_LineDetectionFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_LineDetectionFactory_def.hpp @@ -57,452 +57,454 @@ namespace MueLu { - template - RCP LineDetectionFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP LineDetectionFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("linedetection: orientation"); - SET_VALID_ENTRY("linedetection: num layers"); -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< RCP >("Coordinates", Teuchos::null, "Generating factory for coorindates"); - - return validParamList; - } - - template - void LineDetectionFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - - // The factory needs the information about the number of z-layers. While this information is - // provided by the user for the finest level, the factory itself is responsible to provide the - // corresponding information on the coarser levels. Since a factory cannot be dependent on itself - // we use the NoFactory class as generator class, but remove the UserData keep flag, such that - // "NumZLayers" is part of the request/release mechanism. - // Please note, that this prevents us from having several (independent) CoarsePFactory instances! - // TODO: allow factory to dependent on self-generated data for TwoLevelFactories -> introduce ExpertRequest/Release in Level - currentLevel.DeclareInput("NumZLayers", NoFactory::get(), this); - currentLevel.RemoveKeepFlag("NumZLayers", NoFactory::get(), MueLu::UserData); + SET_VALID_ENTRY("linedetection: orientation"); + SET_VALID_ENTRY("linedetection: num layers"); +#undef SET_VALID_ENTRY + + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set >("Coordinates", Teuchos::null, "Generating factory for coorindates"); + + return validParamList; +} + +template +void LineDetectionFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + + // The factory needs the information about the number of z-layers. While this information is + // provided by the user for the finest level, the factory itself is responsible to provide the + // corresponding information on the coarser levels. Since a factory cannot be dependent on itself + // we use the NoFactory class as generator class, but remove the UserData keep flag, such that + // "NumZLayers" is part of the request/release mechanism. + // Please note, that this prevents us from having several (independent) CoarsePFactory instances! + // TODO: allow factory to dependent on self-generated data for TwoLevelFactories -> introduce ExpertRequest/Release in Level + currentLevel.DeclareInput("NumZLayers", NoFactory::get(), this); + currentLevel.RemoveKeepFlag("NumZLayers", NoFactory::get(), MueLu::UserData); +} + +template +void LineDetectionFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Line detection (Ray style)", currentLevel); + + LO NumZDir = 0; + RCP fineCoords; + ArrayRCP x, y, z; + coordinate_type *xptr = NULL, *yptr = NULL, *zptr = NULL; + + // obtain general variables + RCP A = Get >(currentLevel, "A"); + LO BlkSize = A->GetFixedBlockSize(); + RCP rowMap = A->getRowMap(); + LO Ndofs = rowMap->getLocalNumElements(); + LO Nnodes = Ndofs / BlkSize; + + // collect information provided by user + const ParameterList& pL = GetParameterList(); + const std::string lineOrientation = pL.get("linedetection: orientation"); + + // interpret "line orientation" parameter provided by the user on the finest level + if (currentLevel.GetLevelID() == 0) { + if (lineOrientation == "vertical") + Zorientation_ = VERTICAL; + else if (lineOrientation == "horizontal") + Zorientation_ = HORIZONTAL; + else if (lineOrientation == "coordinates") + Zorientation_ = GRID_SUPPLIED; + else + TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, "LineDetectionFactory: The parameter 'semicoarsen: line orientation' must be either 'vertical', 'horizontal' or 'coordinates'."); } - template - void LineDetectionFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Line detection (Ray style)", currentLevel); - - LO NumZDir = 0; - RCP fineCoords; - ArrayRCP x, y, z; - coordinate_type *xptr = NULL, *yptr = NULL, *zptr = NULL; - - // obtain general variables - RCP A = Get< RCP > (currentLevel, "A"); - LO BlkSize = A->GetFixedBlockSize(); - RCP rowMap = A->getRowMap(); - LO Ndofs = rowMap->getLocalNumElements(); - LO Nnodes = Ndofs/BlkSize; - - // collect information provided by user - const ParameterList& pL = GetParameterList(); - const std::string lineOrientation = pL.get("linedetection: orientation"); - - // interpret "line orientation" parameter provided by the user on the finest level - if(currentLevel.GetLevelID() == 0) { - if(lineOrientation=="vertical") - Zorientation_ = VERTICAL; - else if (lineOrientation=="horizontal") - Zorientation_ = HORIZONTAL; - else if (lineOrientation=="coordinates") - Zorientation_ = GRID_SUPPLIED; - else - TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, "LineDetectionFactory: The parameter 'semicoarsen: line orientation' must be either 'vertical', 'horizontal' or 'coordinates'."); - } - - //TEUCHOS_TEST_FOR_EXCEPTION(Zorientation_!=VERTICAL, Exceptions::RuntimeError, "LineDetectionFactory: The 'horizontal' or 'coordinates' have not been tested!!!. Please remove this exception check and carefully test these modes!"); + // TEUCHOS_TEST_FOR_EXCEPTION(Zorientation_!=VERTICAL, Exceptions::RuntimeError, "LineDetectionFactory: The 'horizontal' or 'coordinates' have not been tested!!!. Please remove this exception check and carefully test these modes!"); - // obtain number of z layers (variable over levels) - // This information is user-provided on the finest level and transferred to the coarser - // levels by the SemiCoarsenPFactor using the internal "NumZLayers" variable. - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("NumZLayers", NoFactory::get())) { - NumZDir = currentLevel.Get("NumZLayers", NoFactory::get()); //obtain info - GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << " (information from Level(0))" << std::endl; - } else { - // check whether user provides information or it can be reconstructed from coordinates - NumZDir = pL.get("linedetection: num layers"); - if(NumZDir == -1) { - bool CoordsAvail = currentLevel.IsAvailable("Coordinates"); - - if (CoordsAvail == true) { - // try to reconstruct the number of layers from coordinates - fineCoords = Get< RCP > (currentLevel, "Coordinates"); - TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3, Exceptions::RuntimeError, "Three coordinates arrays must be supplied if line detection orientation not given."); - x = fineCoords->getDataNonConst(0); - y = fineCoords->getDataNonConst(1); - z = fineCoords->getDataNonConst(2); - xptr = x.getRawPtr(); - yptr = y.getRawPtr(); - zptr = z.getRawPtr(); - - LO NumCoords = Ndofs/BlkSize; - - /* sort coordinates so that we can order things according to lines */ - Teuchos::ArrayRCP TOrigLoc= Teuchos::arcp(NumCoords); LO* OrigLoc= TOrigLoc.getRawPtr(); - Teuchos::ArrayRCP Txtemp = Teuchos::arcp(NumCoords); coordinate_type* xtemp = Txtemp.getRawPtr(); - Teuchos::ArrayRCP Tytemp = Teuchos::arcp(NumCoords); coordinate_type* ytemp = Tytemp.getRawPtr(); - Teuchos::ArrayRCP Tztemp = Teuchos::arcp(NumCoords); coordinate_type* ztemp = Tztemp.getRawPtr(); - - // sort coordinates in {x,y,z}vals (returned in {x,y,z}temp) so that we can order things according to lines - // switch x and y coordinates for semi-coarsening... - sort_coordinates(NumCoords, OrigLoc, xptr, yptr, zptr, xtemp, ytemp, ztemp, true); - - /* go through each vertical line and populate blockIndices so all */ - /* dofs within a PDE within a vertical line correspond to one block.*/ - LO NumBlocks = 0; - LO NumNodesPerVertLine = 0; - LO index = 0; - - while ( index < NumCoords ) { - coordinate_type xfirst = xtemp[index]; coordinate_type yfirst = ytemp[index]; - LO next = index+1; - while ( (next != NumCoords) && (xtemp[next] == xfirst) && - (ytemp[next] == yfirst)) - next++; - if (NumBlocks == 0) { - NumNodesPerVertLine = next-index; - } - // the number of vertical lines must be the same on all processors - // TAW: Sep 14 2015: or zero as we allow "empty" processors - //TEUCHOS_TEST_FOR_EXCEPTION(next-index != NumNodesPerVertLine,Exceptions::RuntimeError, "Error code only works for constant block size now!!!\n"); - NumBlocks++; - index = next; + // obtain number of z layers (variable over levels) + // This information is user-provided on the finest level and transferred to the coarser + // levels by the SemiCoarsenPFactor using the internal "NumZLayers" variable. + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("NumZLayers", NoFactory::get())) { + NumZDir = currentLevel.Get("NumZLayers", NoFactory::get()); // obtain info + GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << " (information from Level(0))" << std::endl; + } else { + // check whether user provides information or it can be reconstructed from coordinates + NumZDir = pL.get("linedetection: num layers"); + if (NumZDir == -1) { + bool CoordsAvail = currentLevel.IsAvailable("Coordinates"); + + if (CoordsAvail == true) { + // try to reconstruct the number of layers from coordinates + fineCoords = Get >(currentLevel, "Coordinates"); + TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3, Exceptions::RuntimeError, "Three coordinates arrays must be supplied if line detection orientation not given."); + x = fineCoords->getDataNonConst(0); + y = fineCoords->getDataNonConst(1); + z = fineCoords->getDataNonConst(2); + xptr = x.getRawPtr(); + yptr = y.getRawPtr(); + zptr = z.getRawPtr(); + + LO NumCoords = Ndofs / BlkSize; + + /* sort coordinates so that we can order things according to lines */ + Teuchos::ArrayRCP TOrigLoc = Teuchos::arcp(NumCoords); + LO* OrigLoc = TOrigLoc.getRawPtr(); + Teuchos::ArrayRCP Txtemp = Teuchos::arcp(NumCoords); + coordinate_type* xtemp = Txtemp.getRawPtr(); + Teuchos::ArrayRCP Tytemp = Teuchos::arcp(NumCoords); + coordinate_type* ytemp = Tytemp.getRawPtr(); + Teuchos::ArrayRCP Tztemp = Teuchos::arcp(NumCoords); + coordinate_type* ztemp = Tztemp.getRawPtr(); + + // sort coordinates in {x,y,z}vals (returned in {x,y,z}temp) so that we can order things according to lines + // switch x and y coordinates for semi-coarsening... + sort_coordinates(NumCoords, OrigLoc, xptr, yptr, zptr, xtemp, ytemp, ztemp, true); + + /* go through each vertical line and populate blockIndices so all */ + /* dofs within a PDE within a vertical line correspond to one block.*/ + LO NumBlocks = 0; + LO NumNodesPerVertLine = 0; + LO index = 0; + + while (index < NumCoords) { + coordinate_type xfirst = xtemp[index]; + coordinate_type yfirst = ytemp[index]; + LO next = index + 1; + while ((next != NumCoords) && (xtemp[next] == xfirst) && + (ytemp[next] == yfirst)) + next++; + if (NumBlocks == 0) { + NumNodesPerVertLine = next - index; } - - NumZDir = NumNodesPerVertLine; - GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << " (information reconstructed from provided node coordinates)" << std::endl; - } else { - TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, "LineDetectionFactory: BuildP: User has to provide valid number of layers (e.g. using the 'line detection: num layers' parameter)."); + // the number of vertical lines must be the same on all processors + // TAW: Sep 14 2015: or zero as we allow "empty" processors + // TEUCHOS_TEST_FOR_EXCEPTION(next-index != NumNodesPerVertLine,Exceptions::RuntimeError, "Error code only works for constant block size now!!!\n"); + NumBlocks++; + index = next; } + + NumZDir = NumNodesPerVertLine; + GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << " (information reconstructed from provided node coordinates)" << std::endl; } else { - GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << " (information provided by user through 'line detection: num layers')" << std::endl; + TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, "LineDetectionFactory: BuildP: User has to provide valid number of layers (e.g. using the 'line detection: num layers' parameter)."); } - } // end else (user provides information or can be reconstructed) on finest level - } else { - // coarse level information - // TODO get rid of NoFactory here and use SemiCoarsenPFactory as source of NumZLayers instead. - if(currentLevel.IsAvailable("NumZLayers", NoFactory::get())) { - NumZDir = currentLevel.Get("NumZLayers", NoFactory::get()); //obtain info - GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << std::endl; } else { - TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, "LineDetectionFactory: BuildP: No NumZLayers variable found. This cannot be."); + GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << " (information provided by user through 'line detection: num layers')" << std::endl; } + } // end else (user provides information or can be reconstructed) on finest level + } else { + // coarse level information + // TODO get rid of NoFactory here and use SemiCoarsenPFactory as source of NumZLayers instead. + if (currentLevel.IsAvailable("NumZLayers", NoFactory::get())) { + NumZDir = currentLevel.Get("NumZLayers", NoFactory::get()); // obtain info + GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << std::endl; + } else { + TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, "LineDetectionFactory: BuildP: No NumZLayers variable found. This cannot be."); } + } - // plausibility check and further variable collection - if (Zorientation_ == GRID_SUPPLIED) { // On finest level, fetch user-provided coordinates if available... - bool CoordsAvail = currentLevel.IsAvailable("Coordinates"); - - if (CoordsAvail == false) { - if (currentLevel.GetLevelID() == 0) - throw Exceptions::RuntimeError("Coordinates must be supplied if line detection orientation not given."); - else - throw Exceptions::RuntimeError("Coordinates not generated by previous invocation of LineDetectionFactory's BuildP() method."); - } - fineCoords = Get< RCP > (currentLevel, "Coordinates"); - TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3, Exceptions::RuntimeError, "Three coordinates arrays must be supplied if line detection orientation not given."); - x = fineCoords->getDataNonConst(0); - y = fineCoords->getDataNonConst(1); - z = fineCoords->getDataNonConst(2); - xptr = x.getRawPtr(); - yptr = y.getRawPtr(); - zptr = z.getRawPtr(); - } + // plausibility check and further variable collection + if (Zorientation_ == GRID_SUPPLIED) { // On finest level, fetch user-provided coordinates if available... + bool CoordsAvail = currentLevel.IsAvailable("Coordinates"); - // perform line detection - if (NumZDir > 0) { - LO *LayerId, *VertLineId; - Teuchos::ArrayRCP TLayerId = Teuchos::arcp(Nnodes); LayerId = TLayerId.getRawPtr(); - Teuchos::ArrayRCP TVertLineId= Teuchos::arcp(Nnodes); VertLineId = TVertLineId.getRawPtr(); - - NumZDir = ML_compute_line_info(LayerId, VertLineId, Ndofs, BlkSize, - Zorientation_, NumZDir,xptr,yptr,zptr, *(rowMap->getComm())); - //it is NumZDir=NCLayers*NVertLines*DofsPerNode; - - // store output data on current level - // The line detection data is used by the SemiCoarsenPFactory and the line smoothers in Ifpack/Ifpack2 - Set(currentLevel, "CoarseNumZLayers", NumZDir); - Set(currentLevel, "LineDetection_Layers", TLayerId); - Set(currentLevel, "LineDetection_VertLineIds", TVertLineId); - } else { - Teuchos::ArrayRCP TLayerId = Teuchos::arcp(0); - Teuchos::ArrayRCP TVertLineId = Teuchos::arcp(0); - Teuchos::ArrayRCP TVertLineIdSmoo= Teuchos::arcp(0); - - // store output data on current level - // The line detection data is used by the SemiCoarsenPFactory and the line smoothers in Ifpack/Ifpack2 - Set(currentLevel, "CoarseNumZLayers", NumZDir); - Set(currentLevel, "LineDetection_Layers", TLayerId); - Set(currentLevel, "LineDetection_VertLineIds", TVertLineId); + if (CoordsAvail == false) { + if (currentLevel.GetLevelID() == 0) + throw Exceptions::RuntimeError("Coordinates must be supplied if line detection orientation not given."); + else + throw Exceptions::RuntimeError("Coordinates not generated by previous invocation of LineDetectionFactory's BuildP() method."); } - - // automatically switch to vertical mode on the coarser levels - if(Zorientation_ != VERTICAL) - Zorientation_ = VERTICAL; + fineCoords = Get >(currentLevel, "Coordinates"); + TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3, Exceptions::RuntimeError, "Three coordinates arrays must be supplied if line detection orientation not given."); + x = fineCoords->getDataNonConst(0); + y = fineCoords->getDataNonConst(1); + z = fineCoords->getDataNonConst(2); + xptr = x.getRawPtr(); + yptr = y.getRawPtr(); + zptr = z.getRawPtr(); } - template - LocalOrdinal LineDetectionFactory::ML_compute_line_info(LocalOrdinal LayerId[], LocalOrdinal VertLineId[], LocalOrdinal Ndof, LocalOrdinal DofsPerNode, LocalOrdinal MeshNumbering, LocalOrdinal NumNodesPerVertLine, typename Teuchos::ScalarTraits::coordinateType *xvals, typename Teuchos::ScalarTraits::coordinateType *yvals, typename Teuchos::ScalarTraits::coordinateType *zvals, const Teuchos::Comm& /* comm */) const { - - LO Nnodes, NVertLines, MyNode; - LO NumCoords, next; //, subindex, subnext; - coordinate_type xfirst, yfirst; - coordinate_type *xtemp, *ytemp, *ztemp; - LO *OrigLoc; - LO i,j,count; - LO RetVal; + // perform line detection + if (NumZDir > 0) { + LO *LayerId, *VertLineId; + Teuchos::ArrayRCP TLayerId = Teuchos::arcp(Nnodes); + LayerId = TLayerId.getRawPtr(); + Teuchos::ArrayRCP TVertLineId = Teuchos::arcp(Nnodes); + VertLineId = TVertLineId.getRawPtr(); + + NumZDir = ML_compute_line_info(LayerId, VertLineId, Ndofs, BlkSize, + Zorientation_, NumZDir, xptr, yptr, zptr, *(rowMap->getComm())); + // it is NumZDir=NCLayers*NVertLines*DofsPerNode; + + // store output data on current level + // The line detection data is used by the SemiCoarsenPFactory and the line smoothers in Ifpack/Ifpack2 + Set(currentLevel, "CoarseNumZLayers", NumZDir); + Set(currentLevel, "LineDetection_Layers", TLayerId); + Set(currentLevel, "LineDetection_VertLineIds", TVertLineId); + } else { + Teuchos::ArrayRCP TLayerId = Teuchos::arcp(0); + Teuchos::ArrayRCP TVertLineId = Teuchos::arcp(0); + Teuchos::ArrayRCP TVertLineIdSmoo = Teuchos::arcp(0); + + // store output data on current level + // The line detection data is used by the SemiCoarsenPFactory and the line smoothers in Ifpack/Ifpack2 + Set(currentLevel, "CoarseNumZLayers", NumZDir); + Set(currentLevel, "LineDetection_Layers", TLayerId); + Set(currentLevel, "LineDetection_VertLineIds", TVertLineId); + } - RetVal = 0; - if ((MeshNumbering != VERTICAL) && (MeshNumbering != HORIZONTAL)) { - if ( (xvals == NULL) || (yvals == NULL) || (zvals == NULL)) RetVal = -1; - } - else { - if (NumNodesPerVertLine == -1) RetVal = -4; - if ( ((Ndof/DofsPerNode)%NumNodesPerVertLine) != 0) RetVal = -3; - } - if ( (Ndof%DofsPerNode) != 0) RetVal = -2; + // automatically switch to vertical mode on the coarser levels + if (Zorientation_ != VERTICAL) + Zorientation_ = VERTICAL; +} + +template +LocalOrdinal LineDetectionFactory::ML_compute_line_info(LocalOrdinal LayerId[], LocalOrdinal VertLineId[], LocalOrdinal Ndof, LocalOrdinal DofsPerNode, LocalOrdinal MeshNumbering, LocalOrdinal NumNodesPerVertLine, typename Teuchos::ScalarTraits::coordinateType* xvals, typename Teuchos::ScalarTraits::coordinateType* yvals, typename Teuchos::ScalarTraits::coordinateType* zvals, const Teuchos::Comm& /* comm */) const { + LO Nnodes, NVertLines, MyNode; + LO NumCoords, next; //, subindex, subnext; + coordinate_type xfirst, yfirst; + coordinate_type *xtemp, *ytemp, *ztemp; + LO* OrigLoc; + LO i, j, count; + LO RetVal; + + RetVal = 0; + if ((MeshNumbering != VERTICAL) && (MeshNumbering != HORIZONTAL)) { + if ((xvals == NULL) || (yvals == NULL) || (zvals == NULL)) RetVal = -1; + } else { + if (NumNodesPerVertLine == -1) RetVal = -4; + if (((Ndof / DofsPerNode) % NumNodesPerVertLine) != 0) RetVal = -3; + } + if ((Ndof % DofsPerNode) != 0) RetVal = -2; - TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -1, Exceptions::RuntimeError, "Not semicoarsening as no mesh numbering information or coordinates are given\n"); - TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -4, Exceptions::RuntimeError, "Not semicoarsening as the number of z nodes is not given.\n"); - TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -3, Exceptions::RuntimeError, "Not semicoarsening as the total number of nodes is not evenly divisible by the number of z direction nodes .\n"); - TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -2, Exceptions::RuntimeError, "Not semicoarsening as something is off with the number of degrees-of-freedom per node.\n"); + TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -1, Exceptions::RuntimeError, "Not semicoarsening as no mesh numbering information or coordinates are given\n"); + TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -4, Exceptions::RuntimeError, "Not semicoarsening as the number of z nodes is not given.\n"); + TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -3, Exceptions::RuntimeError, "Not semicoarsening as the total number of nodes is not evenly divisible by the number of z direction nodes .\n"); + TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -2, Exceptions::RuntimeError, "Not semicoarsening as something is off with the number of degrees-of-freedom per node.\n"); - Nnodes = Ndof/DofsPerNode; - for (MyNode = 0; MyNode < Nnodes; MyNode++) VertLineId[MyNode] = -1; - for (MyNode = 0; MyNode < Nnodes; MyNode++) LayerId[MyNode] = -1; + Nnodes = Ndof / DofsPerNode; + for (MyNode = 0; MyNode < Nnodes; MyNode++) VertLineId[MyNode] = -1; + for (MyNode = 0; MyNode < Nnodes; MyNode++) LayerId[MyNode] = -1; - if (MeshNumbering == VERTICAL) { - for (MyNode = 0; MyNode < Nnodes; MyNode++) { - LayerId[MyNode]= MyNode%NumNodesPerVertLine; - VertLineId[MyNode]= (MyNode- LayerId[MyNode])/NumNodesPerVertLine; - } + if (MeshNumbering == VERTICAL) { + for (MyNode = 0; MyNode < Nnodes; MyNode++) { + LayerId[MyNode] = MyNode % NumNodesPerVertLine; + VertLineId[MyNode] = (MyNode - LayerId[MyNode]) / NumNodesPerVertLine; } - else if (MeshNumbering == HORIZONTAL) { - NVertLines = Nnodes/NumNodesPerVertLine; - for (MyNode = 0; MyNode < Nnodes; MyNode++) { - VertLineId[MyNode] = MyNode%NVertLines; - LayerId[MyNode] = (MyNode- VertLineId[MyNode])/NVertLines; - } - } - else { - // coordinates mode: we distinguish between vertical line numbering for semi-coarsening and line smoothing - NumCoords = Ndof/DofsPerNode; - - // reserve temporary memory - Teuchos::ArrayRCP TOrigLoc= Teuchos::arcp(NumCoords); OrigLoc= TOrigLoc.getRawPtr(); - Teuchos::ArrayRCP Txtemp = Teuchos::arcp(NumCoords); xtemp = Txtemp.getRawPtr(); - Teuchos::ArrayRCP Tytemp = Teuchos::arcp(NumCoords); ytemp = Tytemp.getRawPtr(); - Teuchos::ArrayRCP Tztemp = Teuchos::arcp(NumCoords); ztemp = Tztemp.getRawPtr(); - - // build vertical line info for semi-coarsening - - // sort coordinates in {x,y,z}vals (returned in {x,y,z}temp) so that we can order things according to lines - // switch x and y coordinates for semi-coarsening... - sort_coordinates(NumCoords, OrigLoc, xvals, yvals, zvals, xtemp, ytemp, ztemp, /*true*/ true); - - LO NumBlocks = 0; - LO index = 0; - - while ( index < NumCoords ) { - xfirst = xtemp[index]; yfirst = ytemp[index]; - next = index+1; - while ( (next != NumCoords) && (xtemp[next] == xfirst) && - (ytemp[next] == yfirst)) - next++; - if (NumBlocks == 0) { - NumNodesPerVertLine = next-index; - } - // The number of vertical lines must be the same on all processors - // TAW: Sep 14, 2015: or zero as we allow for empty processors. - //TEUCHOS_TEST_FOR_EXCEPTION(next-index != NumNodesPerVertLine,Exceptions::RuntimeError, "Error code only works for constant block size now!!!\n"); - count = 0; - for (j= index; j < next; j++) { - VertLineId[OrigLoc[j]] = NumBlocks; - LayerId[OrigLoc[j]] = count++; - } - NumBlocks++; - index = next; - } + } else if (MeshNumbering == HORIZONTAL) { + NVertLines = Nnodes / NumNodesPerVertLine; + for (MyNode = 0; MyNode < Nnodes; MyNode++) { + VertLineId[MyNode] = MyNode % NVertLines; + LayerId[MyNode] = (MyNode - VertLineId[MyNode]) / NVertLines; } - - /* check that everyone was assigned */ - - for (i = 0; i < Nnodes; i++) { - if (VertLineId[i] == -1) { - GetOStream(Warnings1) << "Warning: did not assign " << i << " to a vertical line?????\n" << std::endl; + } else { + // coordinates mode: we distinguish between vertical line numbering for semi-coarsening and line smoothing + NumCoords = Ndof / DofsPerNode; + + // reserve temporary memory + Teuchos::ArrayRCP TOrigLoc = Teuchos::arcp(NumCoords); + OrigLoc = TOrigLoc.getRawPtr(); + Teuchos::ArrayRCP Txtemp = Teuchos::arcp(NumCoords); + xtemp = Txtemp.getRawPtr(); + Teuchos::ArrayRCP Tytemp = Teuchos::arcp(NumCoords); + ytemp = Tytemp.getRawPtr(); + Teuchos::ArrayRCP Tztemp = Teuchos::arcp(NumCoords); + ztemp = Tztemp.getRawPtr(); + + // build vertical line info for semi-coarsening + + // sort coordinates in {x,y,z}vals (returned in {x,y,z}temp) so that we can order things according to lines + // switch x and y coordinates for semi-coarsening... + sort_coordinates(NumCoords, OrigLoc, xvals, yvals, zvals, xtemp, ytemp, ztemp, /*true*/ true); + + LO NumBlocks = 0; + LO index = 0; + + while (index < NumCoords) { + xfirst = xtemp[index]; + yfirst = ytemp[index]; + next = index + 1; + while ((next != NumCoords) && (xtemp[next] == xfirst) && + (ytemp[next] == yfirst)) + next++; + if (NumBlocks == 0) { + NumNodesPerVertLine = next - index; } - if (LayerId[i] == -1) { - GetOStream(Warnings1) << "Warning: did not assign " << i << " to a Layer?????\n" << std::endl; + // The number of vertical lines must be the same on all processors + // TAW: Sep 14, 2015: or zero as we allow for empty processors. + // TEUCHOS_TEST_FOR_EXCEPTION(next-index != NumNodesPerVertLine,Exceptions::RuntimeError, "Error code only works for constant block size now!!!\n"); + count = 0; + for (j = index; j < next; j++) { + VertLineId[OrigLoc[j]] = NumBlocks; + LayerId[OrigLoc[j]] = count++; } + NumBlocks++; + index = next; } - - // TAW: Sep 14 2015: relax plausibility checks as we allow for empty processors - //MueLu_maxAll(&comm, NumNodesPerVertLine, i); - //if (NumNodesPerVertLine == -1) NumNodesPerVertLine = i; - //TEUCHOS_TEST_FOR_EXCEPTION(NumNodesPerVertLine != i,Exceptions::RuntimeError, "Different processors have different z direction line lengths?\n"); - - return NumNodesPerVertLine; } - /* Private member function to sort coordinates in arrays. This is an expert routine. Do not use or change.*/ - template - void LineDetectionFactory::sort_coordinates(LO numCoords, LO* OrigLoc, - typename Teuchos::ScalarTraits::coordinateType* xvals, - typename Teuchos::ScalarTraits::coordinateType* yvals, - typename Teuchos::ScalarTraits::coordinateType* zvals, - typename Teuchos::ScalarTraits::coordinateType* xtemp, - typename Teuchos::ScalarTraits::coordinateType* ytemp, - typename Teuchos::ScalarTraits::coordinateType* ztemp, - bool flipXY) const { - - if( flipXY == false ) { // for line-smoothing - for (LO i = 0; i < numCoords; i++) xtemp[i]= xvals[i]; - } else { // for semi-coarsening - for (LO i = 0; i < numCoords; i++) xtemp[i]= yvals[i]; - } - for (LO i = 0; i < numCoords; i++) OrigLoc[i]= i; + /* check that everyone was assigned */ - ML_az_dsort2(xtemp,numCoords,OrigLoc); - if( flipXY == false ) { // for line-smoothing - for (LO i = 0; i < numCoords; i++) ytemp[i]= yvals[OrigLoc[i]]; - } else { - for (LO i = 0; i < numCoords; i++) ytemp[i]= xvals[OrigLoc[i]]; + for (i = 0; i < Nnodes; i++) { + if (VertLineId[i] == -1) { + GetOStream(Warnings1) << "Warning: did not assign " << i << " to a vertical line?????\n" + << std::endl; } - - LO index = 0; - - while ( index < numCoords ) { - coordinate_type xfirst = xtemp[index]; - LO next = index+1; - while ( (next != numCoords) && (xtemp[next] == xfirst)) - next++; - ML_az_dsort2(&(ytemp[index]),next-index,&(OrigLoc[index])); - for (LO i = index; i < next; i++) ztemp[i]= zvals[OrigLoc[i]]; - /* One final sort so that the ztemps are in order */ - LO subindex = index; - while (subindex != next) { - coordinate_type yfirst = ytemp[subindex]; - LO subnext = subindex+1; - while ( (subnext != next) && (ytemp[subnext] == yfirst)) subnext++; - ML_az_dsort2(&(ztemp[subindex]),subnext-subindex,&(OrigLoc[subindex])); - subindex = subnext; - } - index = next; + if (LayerId[i] == -1) { + GetOStream(Warnings1) << "Warning: did not assign " << i << " to a Layer?????\n" + << std::endl; } - } - /* Sort coordinates and additional array accordingly (if provided). This is an expert routine borrowed from ML. Do not change.*/ - template - void LineDetectionFactory::ML_az_dsort2(typename Teuchos::ScalarTraits::coordinateType dlist[], LocalOrdinal N, LocalOrdinal list2[]) const { - LO l, r, j, i, flag; - LO RR2; - coordinate_type dRR, dK; - - // note: we use that routine for sorting coordinates only. No complex coordinates are assumed... - typedef Teuchos::ScalarTraits STS; - - if (N <= 1) return; - - l = N / 2 + 1; - r = N - 1; - l = l - 1; - dRR = dlist[l - 1]; - dK = dlist[l - 1]; + // TAW: Sep 14 2015: relax plausibility checks as we allow for empty processors + // MueLu_maxAll(&comm, NumNodesPerVertLine, i); + // if (NumNodesPerVertLine == -1) NumNodesPerVertLine = i; + // TEUCHOS_TEST_FOR_EXCEPTION(NumNodesPerVertLine != i,Exceptions::RuntimeError, "Different processors have different z direction line lengths?\n"); + + return NumNodesPerVertLine; +} + +/* Private member function to sort coordinates in arrays. This is an expert routine. Do not use or change.*/ +template +void LineDetectionFactory::sort_coordinates(LO numCoords, LO* OrigLoc, + typename Teuchos::ScalarTraits::coordinateType* xvals, + typename Teuchos::ScalarTraits::coordinateType* yvals, + typename Teuchos::ScalarTraits::coordinateType* zvals, + typename Teuchos::ScalarTraits::coordinateType* xtemp, + typename Teuchos::ScalarTraits::coordinateType* ytemp, + typename Teuchos::ScalarTraits::coordinateType* ztemp, + bool flipXY) const { + if (flipXY == false) { // for line-smoothing + for (LO i = 0; i < numCoords; i++) xtemp[i] = xvals[i]; + } else { // for semi-coarsening + for (LO i = 0; i < numCoords; i++) xtemp[i] = yvals[i]; + } + for (LO i = 0; i < numCoords; i++) OrigLoc[i] = i; - if (list2 != NULL) { - RR2 = list2[l - 1]; - while (r != 0) { - j = l; - flag = 1; + ML_az_dsort2(xtemp, numCoords, OrigLoc); + if (flipXY == false) { // for line-smoothing + for (LO i = 0; i < numCoords; i++) ytemp[i] = yvals[OrigLoc[i]]; + } else { + for (LO i = 0; i < numCoords; i++) ytemp[i] = xvals[OrigLoc[i]]; + } - while (flag == 1) { - i = j; - j = j + j; + LO index = 0; + + while (index < numCoords) { + coordinate_type xfirst = xtemp[index]; + LO next = index + 1; + while ((next != numCoords) && (xtemp[next] == xfirst)) + next++; + ML_az_dsort2(&(ytemp[index]), next - index, &(OrigLoc[index])); + for (LO i = index; i < next; i++) ztemp[i] = zvals[OrigLoc[i]]; + /* One final sort so that the ztemps are in order */ + LO subindex = index; + while (subindex != next) { + coordinate_type yfirst = ytemp[subindex]; + LO subnext = subindex + 1; + while ((subnext != next) && (ytemp[subnext] == yfirst)) subnext++; + ML_az_dsort2(&(ztemp[subindex]), subnext - subindex, &(OrigLoc[subindex])); + subindex = subnext; + } + index = next; + } +} + +/* Sort coordinates and additional array accordingly (if provided). This is an expert routine borrowed from ML. Do not change.*/ +template +void LineDetectionFactory::ML_az_dsort2(typename Teuchos::ScalarTraits::coordinateType dlist[], LocalOrdinal N, LocalOrdinal list2[]) const { + LO l, r, j, i, flag; + LO RR2; + coordinate_type dRR, dK; + + // note: we use that routine for sorting coordinates only. No complex coordinates are assumed... + typedef Teuchos::ScalarTraits STS; + + if (N <= 1) return; + + l = N / 2 + 1; + r = N - 1; + l = l - 1; + dRR = dlist[l - 1]; + dK = dlist[l - 1]; + + if (list2 != NULL) { + RR2 = list2[l - 1]; + while (r != 0) { + j = l; + flag = 1; + + while (flag == 1) { + i = j; + j = j + j; + + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (STS::real(dlist[j]) > STS::real(dlist[j - 1])) j = j + 1; - if (j > r + 1) + if (STS::real(dlist[j - 1]) > STS::real(dK)) { + dlist[i - 1] = dlist[j - 1]; + list2[i - 1] = list2[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (STS::real(dlist[j]) > STS::real(dlist[j - 1])) j = j + 1; - - if (STS::real(dlist[j - 1]) > STS::real(dK)) { - dlist[ i - 1] = dlist[ j - 1]; - list2[i - 1] = list2[j - 1]; - } - else { - flag = 0; - } } } - dlist[ i - 1] = dRR; - list2[i - 1] = RR2; - - if (l == 1) { - dRR = dlist [r]; - RR2 = list2[r]; - dK = dlist[r]; - dlist[r ] = dlist[0]; - list2[r] = list2[0]; - r = r - 1; - } - else { - l = l - 1; - dRR = dlist[ l - 1]; - RR2 = list2[l - 1]; - dK = dlist[l - 1]; - } } - dlist[ 0] = dRR; - list2[0] = RR2; + dlist[i - 1] = dRR; + list2[i - 1] = RR2; + + if (l == 1) { + dRR = dlist[r]; + RR2 = list2[r]; + dK = dlist[r]; + dlist[r] = dlist[0]; + list2[r] = list2[0]; + r = r - 1; + } else { + l = l - 1; + dRR = dlist[l - 1]; + RR2 = list2[l - 1]; + dK = dlist[l - 1]; + } } - else { - while (r != 0) { - j = l; - flag = 1; - while (flag == 1) { - i = j; - j = j + j; - if (j > r + 1) + dlist[0] = dRR; + list2[0] = RR2; + } else { + while (r != 0) { + j = l; + flag = 1; + while (flag == 1) { + i = j; + j = j + j; + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (STS::real(dlist[j]) > STS::real(dlist[j - 1])) j = j + 1; + if (STS::real(dlist[j - 1]) > STS::real(dK)) { + dlist[i - 1] = dlist[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (STS::real(dlist[j]) > STS::real(dlist[j - 1])) j = j + 1; - if (STS::real(dlist[j - 1]) > STS::real(dK)) { - dlist[ i - 1] = dlist[ j - 1]; - } - else { - flag = 0; - } } } - dlist[ i - 1] = dRR; - if (l == 1) { - dRR = dlist [r]; - dK = dlist[r]; - dlist[r ] = dlist[0]; - r = r - 1; - } - else { - l = l - 1; - dRR = dlist[ l - 1]; - dK = dlist[l - 1]; - } } - dlist[ 0] = dRR; + dlist[i - 1] = dRR; + if (l == 1) { + dRR = dlist[r]; + dK = dlist[r]; + dlist[r] = dlist[0]; + r = r - 1; + } else { + l = l - 1; + dRR = dlist[l - 1]; + dK = dlist[l - 1]; + } } - + dlist[0] = dRR; } -} //namespace MueLu +} +} // namespace MueLu -#endif // MUELU_LINEDETECTIONFACTORY_DEF_HPP +#endif // MUELU_LINEDETECTIONFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_decl.hpp index 0fb3650ef6a2..c16d867a3444 100644 --- a/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_decl.hpp @@ -58,7 +58,7 @@ namespace MueLu { /*! @class LocalOrdinalTransferFactory class. @brief Class for transferring a vector of local ordinals from a finer level to a coarser one, where each aggregate has a unique one - + ## Input/output of LocalOrdinalTransferFactory ## @@ -90,77 +90,77 @@ namespace MueLu { | TransferVec | LocalOrdinalTransferFactory | coarse level transfervec */ - - - template - class LocalOrdinalTransferFactory : public TwoLevelFactoryBase { +template +class LocalOrdinalTransferFactory : public TwoLevelFactoryBase { #undef MUELU_LOCALORDINALTRANSFERFACTORY_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - // Default constructor is distabled - LocalOrdinalTransferFactory() = delete; + // Default constructor is distabled + LocalOrdinalTransferFactory() = delete; - /*! @brief Constructor. + /*! @brief Constructor. - @param vectorName The name of the quantity to be restricted. - @param restrictionName The name of the restriction Matrix. + @param vectorName The name of the quantity to be restricted. + @param restrictionName The name of the restriction Matrix. - The operator associated with projectionName will be applied to the MultiVector associated with - vectorName. - */ - LocalOrdinalTransferFactory(const std::string & TransferVecName, const std::string & mode): TransferVecName_(TransferVecName) { - if(mode == "classical") useAggregatesMode_ = false; - else useAggregatesMode_ = true; - } + The operator associated with projectionName will be applied to the MultiVector associated with + vectorName. + */ + LocalOrdinalTransferFactory(const std::string &TransferVecName, const std::string &mode) + : TransferVecName_(TransferVecName) { + if (mode == "classical") + useAggregatesMode_ = false; + else + useAggregatesMode_ = true; + } - //! Destructor. - virtual ~LocalOrdinalTransferFactory() { } + //! Destructor. + virtual ~LocalOrdinalTransferFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! @name Input - //@{ + //! @name Input + //@{ - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + /*! @brief Specifies the data that this class needs, and the factories that generate that data. - If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class - will fall back to the settings in FactoryManager. - */ - void DeclareInput(Level &finelevel, Level &coarseLevel) const; + If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class + will fall back to the settings in FactoryManager. + */ + void DeclareInput(Level &finelevel, Level &coarseLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - //! Build an object with this factory. - void Build(Level & fineLevel, Level &coarseLevel) const; + //! Build an object with this factory. + void Build(Level &fineLevel, Level &coarseLevel) const; - //@} + //@} - private: + private: + void BuildAggregates(Level &fineLevel, Level &coarseLevel) const; - void BuildAggregates(Level & fineLevel, Level &coarseLevel) const; + void BuildFC(Level &fineLevel, Level &coarseLevel) const; - void BuildFC(Level & fineLevel, Level &coarseLevel) const; - - //! Use aggregates mode (as opposed to FC mode) - bool useAggregatesMode_; + //! Use aggregates mode (as opposed to FC mode) + bool useAggregatesMode_; - //! The name for the vector to be transfered. This allows us to have multiple factories for different variables - std::string TransferVecName_; + //! The name for the vector to be transfered. This allows us to have multiple factories for different variables + std::string TransferVecName_; - }; // class LocalOrdinalTransferFactory +}; // class LocalOrdinalTransferFactory -} // namespace MueLu +} // namespace MueLu #define MUELU_LOCALORDINALTRANSFERFACTORY_SHORT -#endif // MUELU_LOCALORDINALTRANSFER_FACTORY_DECL_HPP +#endif // MUELU_LOCALORDINALTRANSFER_FACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_def.hpp b/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_def.hpp index d870306b54b6..11228f79d0b3 100644 --- a/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_def.hpp @@ -61,201 +61,196 @@ namespace MueLu { - template - RCP LocalOrdinalTransferFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP LocalOrdinalTransferFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - validParamList->set >(TransferVecName_, Teuchos::null, "Factory for TransferVec generation"); - validParamList->set >("P Graph", Teuchos::null, "Factory for P generation"); - validParamList->set >("Aggregates", Teuchos::null, "Factory for aggregates generation"); - validParamList->set >("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); + validParamList->set >(TransferVecName_, Teuchos::null, "Factory for TransferVec generation"); + validParamList->set >("P Graph", Teuchos::null, "Factory for P generation"); + validParamList->set >("Aggregates", Teuchos::null, "Factory for aggregates generation"); + validParamList->set >("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); - return validParamList; - } + return validParamList; +} + +template +void LocalOrdinalTransferFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { + static bool isAvailableXfer = false; + if (coarseLevel.GetRequestMode() == Level::REQUEST) { + isAvailableXfer = coarseLevel.IsAvailable(TransferVecName_, this); + if (isAvailableXfer == false) { + Input(fineLevel, TransferVecName_); + Input(fineLevel, "CoarseMap"); - template - void LocalOrdinalTransferFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { - static bool isAvailableXfer = false; - if (coarseLevel.GetRequestMode() == Level::REQUEST) { - isAvailableXfer = coarseLevel.IsAvailable(TransferVecName_, this); - if (isAvailableXfer == false) { - Input(fineLevel, TransferVecName_); - Input(fineLevel, "CoarseMap"); - - if(useAggregatesMode_) - Input(fineLevel, "Aggregates"); - else { - Input(coarseLevel, "P Graph"); - } + if (useAggregatesMode_) + Input(fineLevel, "Aggregates"); + else { + Input(coarseLevel, "P Graph"); } } - } +} + +template +void LocalOrdinalTransferFactory::Build(Level &fineLevel, Level &coarseLevel) const { + if (useAggregatesMode_) + BuildAggregates(fineLevel, coarseLevel); + else + BuildFC(fineLevel, coarseLevel); +} + +template +void LocalOrdinalTransferFactory::BuildFC(Level &fineLevel, Level &coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); - template - void LocalOrdinalTransferFactory::Build(Level & fineLevel, Level &coarseLevel) const { - if(useAggregatesMode_) BuildAggregates(fineLevel,coarseLevel); - else BuildFC(fineLevel,coarseLevel); + GetOStream(Runtime0) << "Transferring " << TransferVecName_ << std::endl; + LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); + + if (coarseLevel.IsAvailable(TransferVecName_, this)) { + GetOStream(Runtime0) << "Reusing " << TransferVecName_ << std::endl; + return; } - template - void LocalOrdinalTransferFactory::BuildFC(Level & fineLevel, Level &coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); + // Get everything we need + RCP P = Get >(coarseLevel, "P Graph"); + RCP fineTV = Get >(fineLevel, TransferVecName_); + RCP coarseMap = Get >(fineLevel, "CoarseMap"); + RCP uniqueMap = fineTV->getMap(); + ArrayRCP fineData = fineTV->getData(0); - GetOStream(Runtime0) << "Transferring " <::invalid(); + // Allocate new LO Vector + RCP coarseTV = LocalOrdinalVectorFactory::Build(coarseMap, 1); + ArrayRCP coarseData = coarseTV->getDataNonConst(0); - if (coarseLevel.IsAvailable(TransferVecName_, this)) { - GetOStream(Runtime0) << "Reusing "< P = Get< RCP >(coarseLevel,"P Graph"); - RCP fineTV = Get< RCP >(fineLevel, TransferVecName_); - RCP coarseMap = Get< RCP > (fineLevel, "CoarseMap"); - RCP uniqueMap = fineTV->getMap(); - ArrayRCP fineData = fineTV->getData(0); - - // Allocate new LO Vector - RCP coarseTV = LocalOrdinalVectorFactory::Build(coarseMap,1); - ArrayRCP coarseData = coarseTV->getDataNonConst(0); - - // Invalidate everything first, to check for errors - for(LO i=0; igetDomainMap()->getLocalNumElements(); - for (LO row=0; row<(LO)P->getLocalNumRows(); row++) { - LO fineNumber = fineData[row]; - ArrayView indices; - P->getLocalRowView(row,indices); - - for(LO j=0; j<(LO)indices.size(); j++) { - LO col = indices[j]; - if (col >= domMapNumElements) { - // skip off rank entries of P - } else { - coarseData[col] = fineNumber; - } + // Fill in coarse TV + LO domMapNumElements = P->getDomainMap()->getLocalNumElements(); + for (LO row = 0; row < (LO)P->getLocalNumRows(); row++) { + LO fineNumber = fineData[row]; + ArrayView indices; + P->getLocalRowView(row, indices); + + for (LO j = 0; j < (LO)indices.size(); j++) { + LO col = indices[j]; + if (col >= domMapNumElements) { + // skip off rank entries of P + } else { + coarseData[col] = fineNumber; } } + } #ifdef HAVE_MUELU_DEBUG - size_t error_count = 0; - { - RCP coarseTVghosted; - RCP importer = P->getImporter(); - if (!importer.is_null()) { - coarseTVghosted = LocalOrdinalVectorFactory::Build(P->getColMap(),1); - coarseTVghosted->doImport(*coarseTV, *importer, Xpetra::INSERT); - } else { - coarseTVghosted = coarseTV; - } - ArrayRCP coarseDataGhosted = coarseTVghosted->getDataNonConst(0); - for (LO col=0; col<(LO)P->getColMap()->getLocalNumElements(); col++) { - if (coarseDataGhosted[col] == LO_INVALID) + size_t error_count = 0; + { + RCP coarseTVghosted; + RCP importer = P->getImporter(); + if (!importer.is_null()) { + coarseTVghosted = LocalOrdinalVectorFactory::Build(P->getColMap(), 1); + coarseTVghosted->doImport(*coarseTV, *importer, Xpetra::INSERT); + } else { + coarseTVghosted = coarseTV; + } + ArrayRCP coarseDataGhosted = coarseTVghosted->getDataNonConst(0); + for (LO col = 0; col < (LO)P->getColMap()->getLocalNumElements(); col++) { + if (coarseDataGhosted[col] == LO_INVALID) + error_count++; + } + for (LO row = 0; row < (LO)P->getLocalNumRows(); row++) { + LO fineNumber = fineData[row]; + ArrayView indices; + P->getLocalRowView(row, indices); + for (LO j = 0; j < (LO)indices.size(); j++) { + if (coarseDataGhosted[indices[j]] != fineNumber) error_count++; } - for (LO row=0; row<(LO)P->getLocalNumRows(); row++) { - LO fineNumber = fineData[row]; - ArrayView indices; - P->getLocalRowView(row,indices); - for(LO j=0; j<(LO)indices.size(); j++) { - if (coarseDataGhosted[indices[j]] != fineNumber) - error_count++; - } - } } + } - // Error checking: All nodes in an aggregate must share a local ordinal - if(error_count > 0) { - std::ostringstream ofs; - ofs << "LocalOrdinalTransferFactory("< 0) { + std::ostringstream ofs; + ofs << "LocalOrdinalTransferFactory(" << TransferVecName_ << "): ERROR: Each coarse dof must have a unique LO value. We had " << std::to_string(error_count) << " unknowns that did not match."; + throw std::runtime_error(ofs.str()); + } #endif - - Set >(coarseLevel, TransferVecName_, coarseTV); + Set >(coarseLevel, TransferVecName_, coarseTV); +} + +template +void LocalOrdinalTransferFactory::BuildAggregates(Level &fineLevel, Level &coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); + + GetOStream(Runtime0) << "Transferring " << TransferVecName_ << std::endl; + RCP coarseTV; + RCP fineTV; + LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); + + if (coarseLevel.IsAvailable(TransferVecName_, this)) { + GetOStream(Runtime0) << "Reusing " << TransferVecName_ << std::endl; + return; } - + RCP aggregates = Get >(fineLevel, "Aggregates"); + fineTV = Get >(fineLevel, TransferVecName_); + RCP coarseMap = Get >(fineLevel, "CoarseMap"); + RCP uniqueMap = fineTV->getMap(); - template - void LocalOrdinalTransferFactory::BuildAggregates(Level & fineLevel, Level &coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); + ArrayView elementAList = coarseMap->getLocalElementList(); - GetOStream(Runtime0) << "Transferring " < coarseTV; - RCP fineTV; - LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); + coarseTV = LocalOrdinalVectorFactory::Build(coarseMap, 1); - if (coarseLevel.IsAvailable(TransferVecName_, this)) { - GetOStream(Runtime0) << "Reusing "< aggregates = Get< RCP > (fineLevel, "Aggregates"); - fineTV = Get< RCP >(fineLevel, TransferVecName_); - RCP coarseMap = Get< RCP > (fineLevel, "CoarseMap"); - RCP uniqueMap = fineTV->getMap(); - - ArrayView elementAList = coarseMap->getLocalElementList(); - - coarseTV = LocalOrdinalVectorFactory::Build(coarseMap,1); - - // Create overlapped fine TV to reduce global communication - RCP ghostedTV = fineTV; - if (aggregates->AggregatesCrossProcessors()) { - - RCP nonUniqueMap = aggregates->GetMap(); - RCP importer = ImportFactory::Build(uniqueMap, nonUniqueMap); - - ghostedTV = LocalOrdinalVectorFactory::Build(nonUniqueMap, 1); - ghostedTV->doImport(*fineTV, *importer, Xpetra::INSERT); - } - - // Get some info about aggregates - int myPID = uniqueMap->getComm()->getRank(); - ArrayRCP aggSizes = aggregates->ComputeAggregateSizesArrayRCP(); - const ArrayRCP vertex2AggID = aggregates->GetVertex2AggId()->getData(0); - const ArrayRCP procWinner = aggregates->GetProcWinner()->getData(0); - - - ArrayRCP fineData = ghostedTV->getData(0); - ArrayRCP coarseData = coarseTV->getDataNonConst(0); - - // Invalidate everything first, to check for errors - for(LO i=0; i ghostedTV = fineTV; + if (aggregates->AggregatesCrossProcessors()) { + RCP nonUniqueMap = aggregates->GetMap(); + RCP importer = ImportFactory::Build(uniqueMap, nonUniqueMap); + + ghostedTV = LocalOrdinalVectorFactory::Build(nonUniqueMap, 1); + ghostedTV->doImport(*fineTV, *importer, Xpetra::INSERT); + } + + // Get some info about aggregates + int myPID = uniqueMap->getComm()->getRank(); + ArrayRCP aggSizes = aggregates->ComputeAggregateSizesArrayRCP(); + const ArrayRCP vertex2AggID = aggregates->GetVertex2AggId()->getData(0); + const ArrayRCP procWinner = aggregates->GetProcWinner()->getData(0); - // Error checking: All nodes in an aggregate must share a local ordinal - if(error_count > 0) { - std::ostringstream ofs; - ofs << "LocalOrdinalTransferFactory: ERROR: Each aggregate must have a unique LO value. We had "< fineData = ghostedTV->getData(0); + ArrayRCP coarseData = coarseTV->getDataNonConst(0); + + // Invalidate everything first, to check for errors + for (LO i = 0; i < coarseData.size(); i++) + coarseData[i] = LO_INVALID; + + // Fill in coarse TV + size_t error_count = 0; + for (LO lnode = 0; lnode < vertex2AggID.size(); lnode++) { + if (procWinner[lnode] == myPID && + // lnode < vertex2AggID.size() && + lnode < fineData.size() && // TAW do not access off-processor data + vertex2AggID[lnode] < coarseData.size()) { + if (coarseData[vertex2AggID[lnode]] == LO_INVALID) + coarseData[vertex2AggID[lnode]] = fineData[lnode]; + if (coarseData[vertex2AggID[lnode]] != fineData[lnode]) + error_count++; } - - Set >(coarseLevel, TransferVecName_, coarseTV); + } + // Error checking: All nodes in an aggregate must share a local ordinal + if (error_count > 0) { + std::ostringstream ofs; + ofs << "LocalOrdinalTransferFactory: ERROR: Each aggregate must have a unique LO value. We had " << std::to_string(error_count) << " unknowns that did not match."; + throw std::runtime_error(ofs.str()); } -} // namespace MueLu + Set >(coarseLevel, TransferVecName_, coarseTV); +} + +} // namespace MueLu -#endif // MUELU_LOCALORDINALTRANSFER_FACTORY_DEF_HPP +#endif // MUELU_LOCALORDINALTRANSFER_FACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_decl.hpp index 37ff1ce0e56e..a459dfe4b886 100644 --- a/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_decl.hpp @@ -56,141 +56,135 @@ namespace MueLu { - /*! - @class LowPrecisionFactory class. - @brief Factory for converting matrices to half precision operators - */ +/*! + @class LowPrecisionFactory class. + @brief Factory for converting matrices to half precision operators +*/ - template - class LowPrecisionFactory : public SingleLevelFactoryBase { +template +class LowPrecisionFactory : public SingleLevelFactoryBase { #undef MUELU_LOWPRECISIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ - - LowPrecisionFactory() { } + public: + //! @name Constructors/Destructors. + //@{ - //! Destructor. - virtual ~LowPrecisionFactory() { } + LowPrecisionFactory() {} - RCP GetValidParameterList() const; + //! Destructor. + virtual ~LowPrecisionFactory() {} - //@} + RCP GetValidParameterList() const; - //! Input - //@{ + //@} - void DeclareInput(Level& currentLevel) const; + //! Input + //@{ - //@} + void DeclareInput(Level& currentLevel) const; - //! @name Build methods. - //@{ + //@} - /*! - @brief Build method. + //! @name Build methods. + //@{ - Converts a matrix to half precision operators and returns it in currentLevel. - */ - void Build(Level& currentLevel) const; + /*! + @brief Build method. - //@} + Converts a matrix to half precision operators and returns it in currentLevel. + */ + void Build(Level& currentLevel) const; - }; //class LowPrecisionFactory + //@} +}; // class LowPrecisionFactory #if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT) - template - class LowPrecisionFactory : public SingleLevelFactoryBase { - typedef double Scalar; +template +class LowPrecisionFactory : public SingleLevelFactoryBase { + typedef double Scalar; #undef MUELU_LOWPRECISIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - LowPrecisionFactory() { } + LowPrecisionFactory() {} - //! Destructor. - virtual ~LowPrecisionFactory() { } + //! Destructor. + virtual ~LowPrecisionFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! - @brief Build method. + /*! + @brief Build method. - Converts a matrix to half precision operators and returns it in currentLevel. - */ - void Build(Level& currentLevel) const; + Converts a matrix to half precision operators and returns it in currentLevel. + */ + void Build(Level& currentLevel) const; - //@} + //@} - }; //class LowPrecisionFactory +}; // class LowPrecisionFactory #endif - #if defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && defined(HAVE_TPETRA_INST_COMPLEX_FLOAT) - template - class LowPrecisionFactory,LocalOrdinal,GlobalOrdinal,Node> : public SingleLevelFactoryBase { - typedef std::complex Scalar; +template +class LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, Node> : public SingleLevelFactoryBase { + typedef std::complex Scalar; #undef MUELU_LOWPRECISIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - LowPrecisionFactory() { } + LowPrecisionFactory() {} - //! Destructor. - virtual ~LowPrecisionFactory() { } + //! Destructor. + virtual ~LowPrecisionFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! - @brief Build method. + /*! + @brief Build method. - Converts a matrix to half precision operators and returns it in currentLevel. - */ - void Build(Level& currentLevel) const; + Converts a matrix to half precision operators and returns it in currentLevel. + */ + void Build(Level& currentLevel) const; - //@} + //@} - }; //class LowPrecisionFactory +}; // class LowPrecisionFactory #endif - -} //namespace MueLu +} // namespace MueLu #define MUELU_LOWPRECISIONFACTORY_SHORT -#endif // MUELU_LOWPRECISIONFACTORY_DECL_HPP +#endif // MUELU_LOWPRECISIONFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_def.hpp b/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_def.hpp index 5182d762e949..32e41c8cc7a4 100644 --- a/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_def.hpp @@ -56,142 +56,136 @@ #include "MueLu_Level.hpp" #include "MueLu_Monitor.hpp" - namespace MueLu { - template - RCP LowPrecisionFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set("matrix key", "A", ""); - validParamList->set< RCP >("R", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - validParamList->set< RCP >("P", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - - return validParamList; - } +template +RCP LowPrecisionFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - template - void LowPrecisionFactory::DeclareInput(Level& currentLevel) const { + validParamList->set("matrix key", "A", ""); + validParamList->set >("R", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); + validParamList->set >("P", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - const ParameterList& pL = GetParameterList(); - std::string matrixKey = pL.get("matrix key"); - Input(currentLevel, matrixKey); - } + return validParamList; +} - template - void LowPrecisionFactory::Build(Level& currentLevel) const { - using Teuchos::ParameterList; +template +void LowPrecisionFactory::DeclareInput(Level& currentLevel) const { + const ParameterList& pL = GetParameterList(); + std::string matrixKey = pL.get("matrix key"); + Input(currentLevel, matrixKey); +} - const ParameterList& pL = GetParameterList(); - std::string matrixKey = pL.get("matrix key"); +template +void LowPrecisionFactory::Build(Level& currentLevel) const { + using Teuchos::ParameterList; - FactoryMonitor m(*this, "Converting " + matrixKey + " to half precision", currentLevel); + const ParameterList& pL = GetParameterList(); + std::string matrixKey = pL.get("matrix key"); - RCP A = Get< RCP >(currentLevel, matrixKey); + FactoryMonitor m(*this, "Converting " + matrixKey + " to half precision", currentLevel); - GetOStream(Warnings) << "Matrix not converted to half precision. This only works for Tpetra and when both Scalar and HalfScalar have been instantiated." << std::endl; - Set(currentLevel, matrixKey, A); - } + RCP A = Get >(currentLevel, matrixKey); + GetOStream(Warnings) << "Matrix not converted to half precision. This only works for Tpetra and when both Scalar and HalfScalar have been instantiated." << std::endl; + Set(currentLevel, matrixKey, A); +} #if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT) - template - RCP LowPrecisionFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set("matrix key", "A", ""); - validParamList->set< RCP >("R", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - validParamList->set< RCP >("P", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - - return validParamList; +template +RCP LowPrecisionFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->set("matrix key", "A", ""); + validParamList->set >("R", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); + validParamList->set >("P", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); + + return validParamList; +} + +template +void LowPrecisionFactory::DeclareInput(Level& currentLevel) const { + const ParameterList& pL = GetParameterList(); + std::string matrixKey = pL.get("matrix key"); + Input(currentLevel, matrixKey); +} + +template +void LowPrecisionFactory::Build(Level& currentLevel) const { + using Teuchos::ParameterList; + using HalfScalar = typename Teuchos::ScalarTraits::halfPrecision; + + const ParameterList& pL = GetParameterList(); + std::string matrixKey = pL.get("matrix key"); + + FactoryMonitor m(*this, "Converting " + matrixKey + " to half precision", currentLevel); + + RCP A = Get >(currentLevel, matrixKey); + + if ((A->getRowMap()->lib() == Xpetra::UseTpetra) && std::is_same::value) { + auto tpA = rcp_dynamic_cast(rcp_dynamic_cast(A)->getCrsMatrix(), true)->getTpetra_CrsMatrix(); + auto tpLowA = tpA->template convert(); + auto tpLowOpA = rcp(new Tpetra::CrsMatrixMultiplyOp(tpLowA)); + auto xpTpLowOpA = rcp(new TpetraOperator(tpLowOpA)); + auto xpLowOpA = rcp_dynamic_cast(xpTpLowOpA); + Set(currentLevel, matrixKey, xpLowOpA); + return; } - template - void LowPrecisionFactory::DeclareInput(Level& currentLevel) const { - - const ParameterList& pL = GetParameterList(); - std::string matrixKey = pL.get("matrix key"); - Input(currentLevel, matrixKey); - } - - template - void LowPrecisionFactory::Build(Level& currentLevel) const { - using Teuchos::ParameterList; - using HalfScalar = typename Teuchos::ScalarTraits::halfPrecision; - - const ParameterList& pL = GetParameterList(); - std::string matrixKey = pL.get("matrix key"); - - FactoryMonitor m(*this, "Converting " + matrixKey + " to half precision", currentLevel); - - RCP A = Get< RCP >(currentLevel, matrixKey); - - if ((A->getRowMap()->lib() == Xpetra::UseTpetra) && std::is_same::value) { - auto tpA = rcp_dynamic_cast(rcp_dynamic_cast(A)->getCrsMatrix(), true)->getTpetra_CrsMatrix(); - auto tpLowA = tpA->template convert(); - auto tpLowOpA = rcp(new Tpetra::CrsMatrixMultiplyOp(tpLowA)); - auto xpTpLowOpA = rcp(new TpetraOperator(tpLowOpA)); - auto xpLowOpA = rcp_dynamic_cast(xpTpLowOpA); - Set(currentLevel, matrixKey, xpLowOpA); - return; - } - - GetOStream(Warnings) << "Matrix not converted to half precision. This only works for Tpetra and when both Scalar and HalfScalar have been instantiated." << std::endl; - Set(currentLevel, matrixKey, A); - } + GetOStream(Warnings) << "Matrix not converted to half precision. This only works for Tpetra and when both Scalar and HalfScalar have been instantiated." << std::endl; + Set(currentLevel, matrixKey, A); +} #endif - #if defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && defined(HAVE_TPETRA_INST_COMPLEX_FLOAT) - template - RCP LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, Node>::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set("matrix key", "A", ""); - validParamList->set< RCP >("R", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - validParamList->set< RCP >("P", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - - return validParamList; - } - - template - void LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, Node>::DeclareInput(Level& currentLevel) const { - - const ParameterList& pL = GetParameterList(); - std::string matrixKey = pL.get("matrix key"); - Input(currentLevel, matrixKey); +template +RCP LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, Node>::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->set("matrix key", "A", ""); + validParamList->set >("R", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); + validParamList->set >("P", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); + + return validParamList; +} + +template +void LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, Node>::DeclareInput(Level& currentLevel) const { + const ParameterList& pL = GetParameterList(); + std::string matrixKey = pL.get("matrix key"); + Input(currentLevel, matrixKey); +} + +template +void LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& currentLevel) const { + using Teuchos::ParameterList; + using HalfScalar = typename Teuchos::ScalarTraits::halfPrecision; + + const ParameterList& pL = GetParameterList(); + std::string matrixKey = pL.get("matrix key"); + + FactoryMonitor m(*this, "Converting " + matrixKey + " to half precision", currentLevel); + + RCP A = Get >(currentLevel, matrixKey); + + if ((A->getRowMap()->lib() == Xpetra::UseTpetra) && std::is_same >::value) { + auto tpA = rcp_dynamic_cast(rcp_dynamic_cast(A)->getCrsMatrix(), true)->getTpetra_CrsMatrix(); + auto tpLowA = tpA->template convert(); + auto tpLowOpA = rcp(new Tpetra::CrsMatrixMultiplyOp(tpLowA)); + auto xpTpLowOpA = rcp(new TpetraOperator(tpLowOpA)); + auto xpLowOpA = rcp_dynamic_cast(xpTpLowOpA); + Set(currentLevel, matrixKey, xpLowOpA); + return; } - template - void LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& currentLevel) const { - using Teuchos::ParameterList; - using HalfScalar = typename Teuchos::ScalarTraits::halfPrecision; - - const ParameterList& pL = GetParameterList(); - std::string matrixKey = pL.get("matrix key"); - - FactoryMonitor m(*this, "Converting " + matrixKey + " to half precision", currentLevel); - - RCP A = Get< RCP >(currentLevel, matrixKey); - - if ((A->getRowMap()->lib() == Xpetra::UseTpetra) && std::is_same >::value) { - auto tpA = rcp_dynamic_cast(rcp_dynamic_cast(A)->getCrsMatrix(), true)->getTpetra_CrsMatrix(); - auto tpLowA = tpA->template convert(); - auto tpLowOpA = rcp(new Tpetra::CrsMatrixMultiplyOp(tpLowA)); - auto xpTpLowOpA = rcp(new TpetraOperator(tpLowOpA)); - auto xpLowOpA = rcp_dynamic_cast(xpTpLowOpA); - Set(currentLevel, matrixKey, xpLowOpA); - return; - } - - GetOStream(Warnings) << "Matrix not converted to half precision. This only works for Tpetra and when both Scalar and HalfScalar have been instantiated." << std::endl; - Set(currentLevel, matrixKey, A); - } + GetOStream(Warnings) << "Matrix not converted to half precision. This only works for Tpetra and when both Scalar and HalfScalar have been instantiated." << std::endl; + Set(currentLevel, matrixKey, A); +} #endif -} //namespace MueLu +} // namespace MueLu -#endif // MUELU_LOWPRECISIONFACTORY_DEF_HPP +#endif // MUELU_LOWPRECISIONFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_MapTransferFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_MapTransferFactory_decl.hpp index 5ca98e15b840..53b93c588e60 100644 --- a/packages/muelu/src/Misc/MueLu_MapTransferFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_MapTransferFactory_decl.hpp @@ -52,89 +52,87 @@ namespace MueLu { - /*! - @class MapTransferFactory class. - @brief Factory to transfer a map from a fine to a coarse level +/*! + @class MapTransferFactory class. + @brief Factory to transfer a map from a fine to a coarse level - Factory that transfers a map (given by a variable name and a generating factory) for building - a coarse version of the map. The coarse map is stored on the coarse level using the same variable name - and generating factory than the original fine level map. + Factory that transfers a map (given by a variable name and a generating factory) for building + a coarse version of the map. The coarse map is stored on the coarse level using the same variable name + and generating factory than the original fine level map. - The transfer is based on the prolongator maps. The prolongator/nullspace can also contain rotational modes, - that might not be of interest for the map of interest. Use the option "nullspace vectors: limit to" to exclude such modes. + The transfer is based on the prolongator maps. The prolongator/nullspace can also contain rotational modes, + that might not be of interest for the map of interest. Use the option "nullspace vectors: limit to" to exclude such modes. - ## Input/output ## + ## Input/output ## - ### User parameters ### - Parameter | type | default | master.xml | validated | requested | description - ----------|------|---------|:----------:|:---------:|:---------:|------------ - map: name | string | "" | | * | * | Name of the map - map: factory | string | "null" | | * | * | Name of the generating factory - P | Factory | null | | * | * | Generating factory of prolongator - nullspace vectors: limit to | string | "all" | | * | * | Use only these nullspace vectors/columns of P to transfer the map (e.g. to drop rotations) + ### User parameters ### + Parameter | type | default | master.xml | validated | requested | description + ----------|------|---------|:----------:|:---------:|:---------:|------------ + map: name | string | "" | | * | * | Name of the map + map: factory | string | "null" | | * | * | Name of the generating factory + P | Factory | null | | * | * | Generating factory of prolongator + nullspace vectors: limit to | string | "all" | | * | * | Use only these nullspace vectors/columns of P to transfer the map (e.g. to drop rotations) - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see @c GetValidParameters() ).
- The * in the @c requested column states that the data is requested as input with all dependencies (see @c DeclareInput() ). + The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
+ The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see @c GetValidParameters() ).
+ The * in the @c requested column states that the data is requested as input with all dependencies (see @c DeclareInput() ). - ### Variables provided by this factory ### + ### Variables provided by this factory ### - After \c Build() , the following data is available (if requested): + After \c Build() , the following data is available (if requested): - Parameter | generated by | description - ----------|--------------|------------ - | map: name | MapTransferFactory | Coarse version of the input map + Parameter | generated by | description + ----------|--------------|------------ + | map: name | MapTransferFactory | Coarse version of the input map - */ +*/ - template - class MapTransferFactory : public TwoLevelFactoryBase { +template +class MapTransferFactory : public TwoLevelFactoryBase { #undef MUELU_MAPTRANSFERFACTORY_SHORT - #include "MueLu_UseShortNames.hpp" - - public: +#include "MueLu_UseShortNames.hpp" - //! Input - //@{ + public: + //! Input + //@{ - RCP GetValidParameterList() const override; + RCP GetValidParameterList() const override; - void DeclareInput(Level& fineLevel, Level& coarseLevel) const override; + void DeclareInput(Level& fineLevel, Level& coarseLevel) const override; - //@} + //@} - //@{ - //! @name Build methods. + //@{ + //! @name Build methods. - //! Build an object with this factory. - void Build(Level& fineLevel, Level& coarseLevel) const override; + //! Build an object with this factory. + void Build(Level& fineLevel, Level& coarseLevel) const override; - //@} + //@} - private: - - /*! - @brief Get the max number of entries per row of P to be considered for map transfer + private: + /*! + @brief Get the max number of entries per row of P to be considered for map transfer - To exclude some nullspace vectors (e.g. rotations in 2D or 3D elasticity), when doing the map transfer, - this routine translates the user wish to a maximal number of entries per row of P to be considered during the map transfer. + To exclude some nullspace vectors (e.g. rotations in 2D or 3D elasticity), when doing the map transfer, + this routine translates the user wish to a maximal number of entries per row of P to be considered during the map transfer. - \warning Rows of P are looped from left to right, so we rely on the usual ordering of the nullspace vectors (translations in x/y/z, then rotations around x,y,z). + \warning Rows of P are looped from left to right, so we rely on the usual ordering of the nullspace vectors (translations in x/y/z, then rotations around x,y,z). - @param[in] pL Parameter list with user-given configuration - @return Number of entries per row of the prolongator to be used for the map transfer - */ - int GetLimitOfProlongatorColumns(const ParameterList& pL) const; + @param[in] pL Parameter list with user-given configuration + @return Number of entries per row of the prolongator to be used for the map transfer + */ + int GetLimitOfProlongatorColumns(const ParameterList& pL) const; - //! Generating factory of input variable - mutable RCP mapFact_; + //! Generating factory of input variable + mutable RCP mapFact_; - }; // class MapTransferFactory +}; // class MapTransferFactory -} // namespace MueLu +} // namespace MueLu #define MUELU_MAPTRANSFERFACTORY_SHORT #endif /* MUELU_MAPTRANSFERFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_MapTransferFactory_def.hpp b/packages/muelu/src/Misc/MueLu_MapTransferFactory_def.hpp index b96ee324a0df..cd508ac82a50 100644 --- a/packages/muelu/src/Misc/MueLu_MapTransferFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_MapTransferFactory_def.hpp @@ -59,140 +59,133 @@ namespace MueLu { - template - RCP MapTransferFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->setEntry("map: name", Teuchos::ParameterEntry(std::string(""))); - validParamList->setEntry("map: factory", Teuchos::ParameterEntry(std::string("null"))); - - validParamList->set>("P", Teuchos::null, "Tentative prolongator factory"); - validParamList->set("nullspace vectors: limit to", "all", "Limit the number of nullspace vectors to be used for the map transfer (especially to exclude rotational vectors)."); - - return validParamList; +template +RCP MapTransferFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->setEntry("map: name", Teuchos::ParameterEntry(std::string(""))); + validParamList->setEntry("map: factory", Teuchos::ParameterEntry(std::string("null"))); + + validParamList->set>("P", Teuchos::null, "Tentative prolongator factory"); + validParamList->set("nullspace vectors: limit to", "all", "Limit the number of nullspace vectors to be used for the map transfer (especially to exclude rotational vectors)."); + + return validParamList; +} + +template +void MapTransferFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { + const ParameterList& pL = GetParameterList(); + const std::string mapFactName = pL.get("map: factory"); + const std::string mapName = pL.get("map: name"); + + if (fineLevel.GetLevelID() == 0) { + // Not needed, if the map is provided as user data + fineLevel.DeclareInput(mapName, NoFactory::get(), this); + } else { + // check whether user has provided a specific name for the MapFactory + if (mapFactName == "" || mapFactName == "NoFactory") + mapFact_ = MueLu::NoFactory::getRCP(); + else if (mapFactName != "null") + mapFact_ = coarseLevel.GetFactoryManager()->GetFactory(mapFactName); + + // request map generated by mapFact_ + fineLevel.DeclareInput(mapName, mapFact_.get(), this); } - template - void MapTransferFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { - const ParameterList & pL = GetParameterList(); - const std::string mapFactName = pL.get("map: factory"); - const std::string mapName = pL.get("map: name"); - - if (fineLevel.GetLevelID() == 0) - { - // Not needed, if the map is provided as user data - fineLevel.DeclareInput(mapName, NoFactory::get(), this); - } - else - { - // check whether user has provided a specific name for the MapFactory - if (mapFactName == "" || mapFactName == "NoFactory") - mapFact_ = MueLu::NoFactory::getRCP(); - else if (mapFactName != "null") - mapFact_ = coarseLevel.GetFactoryManager()->GetFactory(mapFactName); - - // request map generated by mapFact_ - fineLevel.DeclareInput(mapName, mapFact_.get(), this); - } - - // request Ptent - // note that "P" provided by the user (through XML file) is supposed to be of type TentativePFactory - Teuchos::RCP tentPFact = GetFactory("P"); - if (tentPFact == Teuchos::null) - tentPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); - coarseLevel.DeclareInput("P", tentPFact.get(), this); + // request Ptent + // note that "P" provided by the user (through XML file) is supposed to be of type TentativePFactory + Teuchos::RCP tentPFact = GetFactory("P"); + if (tentPFact == Teuchos::null) + tentPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); + coarseLevel.DeclareInput("P", tentPFact.get(), this); +} + +template +void MapTransferFactory::Build(Level& fineLevel, Level& coarseLevel) const { + Monitor m(*this, "MapTransferFactory"); + + const ParameterList& pL = GetParameterList(); + const std::string mapName = pL.get("map: name"); + const int maxNumProlongCols = GetLimitOfProlongatorColumns(pL); + + // fetch map from level + RCP transferMap = Teuchos::null; + if (fineLevel.GetLevelID() == 0) { + transferMap = fineLevel.Get>(mapName, NoFactory::get()); + } else { + if (fineLevel.IsAvailable(mapName, mapFact_.get()) == false) + GetOStream(Runtime0) << "MapTransferFactory::Build: User provided map \"" << mapName << "\" not found in Level class on level " << fineLevel.GetLevelID() << "." << std::endl; + transferMap = fineLevel.Get>(mapName, mapFact_.get()); } - template - void MapTransferFactory::Build(Level& fineLevel, Level& coarseLevel) const { - Monitor m(*this, "MapTransferFactory"); - - const ParameterList & pL = GetParameterList(); - const std::string mapName = pL.get("map: name"); - const int maxNumProlongCols = GetLimitOfProlongatorColumns(pL); - - // fetch map from level - RCP transferMap = Teuchos::null; - if (fineLevel.GetLevelID() == 0) { - transferMap = fineLevel.Get>(mapName, NoFactory::get()); - } else { - if (fineLevel.IsAvailable(mapName, mapFact_.get()) == false) - GetOStream(Runtime0) << "MapTransferFactory::Build: User provided map \"" << mapName << "\" not found in Level class on level " << fineLevel.GetLevelID() << "." << std::endl; - transferMap = fineLevel.Get>(mapName, mapFact_.get()); - } - - // Get default tentative prolongator factory - // Getting it that way ensures that the same factory instance will be used for both SaPFactory and NullspaceFactory. - RCP tentPFact = GetFactory("P"); - if (tentPFact == Teuchos::null) - tentPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); - TEUCHOS_TEST_FOR_EXCEPTION(!coarseLevel.IsAvailable("P", tentPFact.get()), Exceptions::RuntimeError, - "MueLu::MapTransferFactory::Build(): P (generated by TentativePFactory) not available."); - RCP Ptent = coarseLevel.Get >("P", tentPFact.get()); - - // loop over local rows of Ptent and figure out the corresponding coarse GIDs - Array coarseMapGids; - RCP prolongColMap = Ptent->getColMap(); - GO gRowID = -1; - int numColEntries = 0; - for (size_t row = 0; row < Ptent->getLocalNumRows(); ++row) { - gRowID = Ptent->getRowMap()->getGlobalElement(row); - - if (transferMap->isNodeGlobalElement(gRowID)) { - Teuchos::ArrayView indices; - Teuchos::ArrayView vals; - Ptent->getLocalRowView(row, indices, vals); - - numColEntries = as(indices.size()); - if (maxNumProlongCols > 0) - numColEntries = std::min(numColEntries, maxNumProlongCols); - - for (size_t col = 0; col < as(numColEntries); ++col) { - // mark all (selected) columns in Ptent(gRowID,*) to be coarse Dofs of next level transferMap - GO gcid = prolongColMap->getGlobalElement(indices[col]); - coarseMapGids.push_back(gcid); - } + // Get default tentative prolongator factory + // Getting it that way ensures that the same factory instance will be used for both SaPFactory and NullspaceFactory. + RCP tentPFact = GetFactory("P"); + if (tentPFact == Teuchos::null) + tentPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); + TEUCHOS_TEST_FOR_EXCEPTION(!coarseLevel.IsAvailable("P", tentPFact.get()), Exceptions::RuntimeError, + "MueLu::MapTransferFactory::Build(): P (generated by TentativePFactory) not available."); + RCP Ptent = coarseLevel.Get>("P", tentPFact.get()); + + // loop over local rows of Ptent and figure out the corresponding coarse GIDs + Array coarseMapGids; + RCP prolongColMap = Ptent->getColMap(); + GO gRowID = -1; + int numColEntries = 0; + for (size_t row = 0; row < Ptent->getLocalNumRows(); ++row) { + gRowID = Ptent->getRowMap()->getGlobalElement(row); + + if (transferMap->isNodeGlobalElement(gRowID)) { + Teuchos::ArrayView indices; + Teuchos::ArrayView vals; + Ptent->getLocalRowView(row, indices, vals); + + numColEntries = as(indices.size()); + if (maxNumProlongCols > 0) + numColEntries = std::min(numColEntries, maxNumProlongCols); + + for (size_t col = 0; col < as(numColEntries); ++col) { + // mark all (selected) columns in Ptent(gRowID,*) to be coarse Dofs of next level transferMap + GO gcid = prolongColMap->getGlobalElement(indices[col]); + coarseMapGids.push_back(gcid); } } - - // build coarse version of the input map - const GO INVALID = Teuchos::OrdinalTraits::invalid(); - std::sort(coarseMapGids.begin(), coarseMapGids.end()); - coarseMapGids.erase(std::unique(coarseMapGids.begin(), coarseMapGids.end()), coarseMapGids.end()); - RCP coarseTransferMap = MapFactory::Build(prolongColMap->lib(), INVALID, coarseMapGids(), - prolongColMap->getIndexBase(), prolongColMap->getComm()); - - // store map in coarse level - if (fineLevel.GetLevelID() == 0) - { - const std::string mapFactName = pL.get("map: factory"); - RCP mapFact = coarseLevel.GetFactoryManager()->GetFactory(mapFactName); - coarseLevel.Set(mapName, coarseTransferMap, mapFact.get()); - } - else - coarseLevel.Set(mapName, coarseTransferMap, mapFact_.get()); - - } - - template - int MapTransferFactory::GetLimitOfProlongatorColumns(const ParameterList& pL) const - { - const std::string useTheseNspVectors = pL.get("nullspace vectors: limit to"); - - // Leave right away, if no limit is prescribed by the user - if (useTheseNspVectors == "all" || useTheseNspVectors == "") - return -1; - - // Simplify? Maybe replace by boolean flag "nullspace: exclude rotations" - int maxNumProlongCols = -1; - if (useTheseNspVectors == "translations") - maxNumProlongCols = 1; - else - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::InvalidArgument, "Unknown subset of nullspace vectors to be used, when performing a map transfer.") - - return maxNumProlongCols; } -} // namespace MueLu + // build coarse version of the input map + const GO INVALID = Teuchos::OrdinalTraits::invalid(); + std::sort(coarseMapGids.begin(), coarseMapGids.end()); + coarseMapGids.erase(std::unique(coarseMapGids.begin(), coarseMapGids.end()), coarseMapGids.end()); + RCP coarseTransferMap = MapFactory::Build(prolongColMap->lib(), INVALID, coarseMapGids(), + prolongColMap->getIndexBase(), prolongColMap->getComm()); + + // store map in coarse level + if (fineLevel.GetLevelID() == 0) { + const std::string mapFactName = pL.get("map: factory"); + RCP mapFact = coarseLevel.GetFactoryManager()->GetFactory(mapFactName); + coarseLevel.Set(mapName, coarseTransferMap, mapFact.get()); + } else + coarseLevel.Set(mapName, coarseTransferMap, mapFact_.get()); +} + +template +int MapTransferFactory::GetLimitOfProlongatorColumns(const ParameterList& pL) const { + const std::string useTheseNspVectors = pL.get("nullspace vectors: limit to"); + + // Leave right away, if no limit is prescribed by the user + if (useTheseNspVectors == "all" || useTheseNspVectors == "") + return -1; + + // Simplify? Maybe replace by boolean flag "nullspace: exclude rotations" + int maxNumProlongCols = -1; + if (useTheseNspVectors == "translations") + maxNumProlongCols = 1; + else + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::InvalidArgument, "Unknown subset of nullspace vectors to be used, when performing a map transfer.") + + return maxNumProlongCols; +} + +} // namespace MueLu #endif /* MUELU_MAPTRANSFERFACTORY_DEF_HPP_ */ \ No newline at end of file diff --git a/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_decl.hpp index d5d4b459d55f..21ad1a067fb0 100644 --- a/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_decl.hpp @@ -58,49 +58,45 @@ #include "MueLu_FactoryBase_fwd.hpp" namespace MueLu { - /*! - @class MergedBlockedMatrix - @brief Factory provides a merged version of a blocked matrix - */ - template - class MergedBlockedMatrixFactory : public SingleLevelFactoryBase { +/*! + @class MergedBlockedMatrix + @brief Factory provides a merged version of a blocked matrix +*/ +template +class MergedBlockedMatrixFactory : public SingleLevelFactoryBase { #undef MUELU_MERGEDBLOCKEDMATRIXFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - MergedBlockedMatrixFactory(); + MergedBlockedMatrixFactory(); - virtual ~MergedBlockedMatrixFactory() { } - //@} + virtual ~MergedBlockedMatrixFactory() {} + //@} - //! @name Input - //@{ + //! @name Input + //@{ - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - void DeclareInput(Level ¤tLevel) const; + void DeclareInput(Level ¤tLevel) const; - //@} + //@} - //! @name Build methods. - //@{ - void Build(Level ¤tLevel) const; - //@} + //! @name Build methods. + //@{ + void Build(Level ¤tLevel) const; + //@} + private: +}; // class MergedBlockedMatrixFactory - - private: - - - }; //class MergedBlockedMatrixFactory - -} //namespace MueLu +} // namespace MueLu #define MUELU_MERGEDBLOCKEDMATRIXFACTORY_SHORT diff --git a/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_def.hpp b/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_def.hpp index ce3a8cf46d0e..b36bed3f75de 100644 --- a/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_def.hpp @@ -56,28 +56,25 @@ namespace MueLu { template -MergedBlockedMatrixFactory::MergedBlockedMatrixFactory() -{ } +MergedBlockedMatrixFactory::MergedBlockedMatrixFactory() {} template RCP MergedBlockedMatrixFactory::GetValidParameterList() const { RCP validParamList = rcp(new ParameterList()); - validParamList->set< RCP >("A", MueLu::NoFactory::getRCP()/*Teuchos::null*/, "Generating factory of the matrix A used for building SchurComplement (must be a 2x2 blocked operator, default = MueLu::NoFactory::getRCP())"); + validParamList->set >("A", MueLu::NoFactory::getRCP() /*Teuchos::null*/, "Generating factory of the matrix A used for building SchurComplement (must be a 2x2 blocked operator, default = MueLu::NoFactory::getRCP())"); return validParamList; } - template void MergedBlockedMatrixFactory::DeclareInput(Level ¤tLevel) const { Input(currentLevel, "A"); } template -void MergedBlockedMatrixFactory::Build(Level & currentLevel) const -{ - FactoryMonitor m(*this, "MergedBlockedMatrix", currentLevel); +void MergedBlockedMatrixFactory::Build(Level ¤tLevel) const { + FactoryMonitor m(*this, "MergedBlockedMatrix", currentLevel); Teuchos::RCP A = Get >(currentLevel, "A"); RCP bA = Teuchos::rcp_dynamic_cast(A); @@ -93,6 +90,6 @@ void MergedBlockedMatrixFactory::Buil } } -} // namespace MueLu +} // namespace MueLu #endif /* MUELU_MERGEDBLOCKEDMATRIXFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_decl.hpp index 75f8fdc123d2..a499fae0ac42 100644 --- a/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_decl.hpp @@ -55,71 +55,70 @@ namespace MueLu { - /*! - @class MultiVectorTransferFactory class. - @brief Class for restricting a MultiVector from a finer to a coarser level. - - This is to be used in conjunction with Muelu::RAPFactory::AddTransferFactory(). - */ - - template - class MultiVectorTransferFactory : public TwoLevelFactoryBase { +/*! + @class MultiVectorTransferFactory class. + @brief Class for restricting a MultiVector from a finer to a coarser level. + + This is to be used in conjunction with Muelu::RAPFactory::AddTransferFactory(). +*/ + +template +class MultiVectorTransferFactory : public TwoLevelFactoryBase { #undef MUELU_MULTIVECTORTRANSFERFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - /*! @brief Constructor. + /*! @brief Constructor. - @param vectorName The name of the quantity to be restricted. - @param restrictionName The name of the restriction Matrix. + @param vectorName The name of the quantity to be restricted. + @param restrictionName The name of the restriction Matrix. - The operator associated with projectionName will be applied to the MultiVector associated with - vectorName. - */ - MultiVectorTransferFactory() { } - - MultiVectorTransferFactory(std::string const & vectorName); // deprecated + The operator associated with projectionName will be applied to the MultiVector associated with + vectorName. + */ + MultiVectorTransferFactory() {} - //! Destructor. - virtual ~MultiVectorTransferFactory() { } + MultiVectorTransferFactory(std::string const &vectorName); // deprecated - RCP GetValidParameterList() const; + //! Destructor. + virtual ~MultiVectorTransferFactory() {} - //@} + RCP GetValidParameterList() const; - //! @name Input - //@{ + //@} - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + //! @name Input + //@{ - If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class - will fall back to the settings in FactoryManager. - */ - void DeclareInput(Level &finelevel, Level &coarseLevel) const; + /*! @brief Specifies the data that this class needs, and the factories that generate that data. - //@} + If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class + will fall back to the settings in FactoryManager. + */ + void DeclareInput(Level &finelevel, Level &coarseLevel) const; - //! @name Build methods. - //@{ + //@} - //! Build an object with this factory. - void Build(Level & fineLevel, Level &coarseLevel) const; + //! @name Build methods. + //@{ - //@} + //! Build an object with this factory. + void Build(Level &fineLevel, Level &coarseLevel) const; - private: + //@} - static ArrayRCP expandCoordinates(ArrayRCP coord, LocalOrdinal blksize); + private: + static ArrayRCP expandCoordinates(ArrayRCP coord, LocalOrdinal blksize); - }; // class MultiVectorTransferFactory +}; // class MultiVectorTransferFactory -} // namespace MueLu +} // namespace MueLu #define MUELU_MULTIVECTORTRANSFERFACTORY_SHORT -#endif // MUELU_MULTIVECTORTRANSFER_FACTORY_DECL_HPP +#endif // MUELU_MULTIVECTORTRANSFER_FACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_def.hpp b/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_def.hpp index 4ecf3bedfc09..00c1a91bd7c7 100644 --- a/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_def.hpp @@ -54,73 +54,73 @@ namespace MueLu { - template - RCP MultiVectorTransferFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP MultiVectorTransferFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - validParamList->set< std::string > ("Vector name", "undefined", "Name of the vector that will be transferred on the coarse grid (level key)"); // TODO: how to set a validator without default value? - validParamList->set< RCP >("Vector factory", Teuchos::null, "Factory of the vector"); - validParamList->set< RCP >("R", Teuchos::null, "Factory of the transfer operator (restriction)"); + validParamList->set("Vector name", "undefined", "Name of the vector that will be transferred on the coarse grid (level key)"); // TODO: how to set a validator without default value? + validParamList->set >("Vector factory", Teuchos::null, "Factory of the vector"); + validParamList->set >("R", Teuchos::null, "Factory of the transfer operator (restriction)"); - return validParamList; - } + return validParamList; +} - template - MultiVectorTransferFactory::MultiVectorTransferFactory(std::string const & vectorName) { - SetParameter("Vector name", ParameterEntry(vectorName)); - } +template +MultiVectorTransferFactory::MultiVectorTransferFactory(std::string const &vectorName) { + SetParameter("Vector name", ParameterEntry(vectorName)); +} - template - void MultiVectorTransferFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { - const ParameterList & pL = GetParameterList(); - std::string vectorName = pL.get("Vector name"); +template +void MultiVectorTransferFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { + const ParameterList &pL = GetParameterList(); + std::string vectorName = pL.get("Vector name"); - fineLevel.DeclareInput(vectorName, GetFactory("Vector factory").get(), this); - Input(coarseLevel, "R"); - } + fineLevel.DeclareInput(vectorName, GetFactory("Vector factory").get(), this); + Input(coarseLevel, "R"); +} - template - void MultiVectorTransferFactory::Build(Level & fineLevel, Level &coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); +template +void MultiVectorTransferFactory::Build(Level &fineLevel, Level &coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); - const ParameterList & pL = GetParameterList(); - std::string vectorName = pL.get("Vector name"); + const ParameterList &pL = GetParameterList(); + std::string vectorName = pL.get("Vector name"); - RCP fineVector = fineLevel.Get< RCP >(vectorName, GetFactory("Vector factory").get()); - RCP transferOp = Get >(coarseLevel, "R"); + RCP fineVector = fineLevel.Get >(vectorName, GetFactory("Vector factory").get()); + RCP transferOp = Get >(coarseLevel, "R"); - RCP coarseVector = MultiVectorFactory::Build(transferOp->getRangeMap(), fineVector->getNumVectors()); - GetOStream(Runtime0) << "Transferring multivector \"" << vectorName << "\"" << std::endl; + RCP coarseVector = MultiVectorFactory::Build(transferOp->getRangeMap(), fineVector->getNumVectors()); + GetOStream(Runtime0) << "Transferring multivector \"" << vectorName << "\"" << std::endl; - RCP onesVector = MultiVectorFactory::Build(transferOp->getDomainMap(), 1); - onesVector->putScalar(Teuchos::ScalarTraits::one()); - RCP rowSumVector = MultiVectorFactory::Build(transferOp->getRangeMap(), 1); - transferOp->apply(*onesVector, *rowSumVector); - transferOp->apply(*fineVector, *coarseVector); + RCP onesVector = MultiVectorFactory::Build(transferOp->getDomainMap(), 1); + onesVector->putScalar(Teuchos::ScalarTraits::one()); + RCP rowSumVector = MultiVectorFactory::Build(transferOp->getRangeMap(), 1); + transferOp->apply(*onesVector, *rowSumVector); + transferOp->apply(*fineVector, *coarseVector); - if (vectorName == "Coordinates") - TEUCHOS_TEST_FOR_EXCEPTION(true,Exceptions::RuntimeError,"Use CoordinatesTransferFactory to transfer coordinates instead of MultiVectorTransferFactory."); + if (vectorName == "Coordinates") + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Use CoordinatesTransferFactory to transfer coordinates instead of MultiVectorTransferFactory."); - Set >(coarseLevel, vectorName, coarseVector); + Set >(coarseLevel, vectorName, coarseVector); - } // Build +} // Build - template - ArrayRCP MultiVectorTransferFactory::expandCoordinates(ArrayRCP coordinates, LocalOrdinal blksize) { - if (blksize == 1) - return coordinates; +template +ArrayRCP MultiVectorTransferFactory::expandCoordinates(ArrayRCP coordinates, LocalOrdinal blksize) { + if (blksize == 1) + return coordinates; - ArrayRCP expandCoord(coordinates.size()*blksize); //TODO: how to avoid automatic initialization of the vector? using arcp()? + ArrayRCP expandCoord(coordinates.size() * blksize); // TODO: how to avoid automatic initialization of the vector? using arcp()? - for(int i=0; i - class RAPFactory : public TwoLevelFactoryBase { +/*! + @class RAPFactory + @brief Factory for building coarse matrices. +*/ +template +class RAPFactory : public TwoLevelFactoryBase { #undef MUELU_RAPFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - RAPFactory(); + RAPFactory(); - virtual ~RAPFactory() { } + virtual ~RAPFactory() {} - //@} + //@} - //! @name Input - //@{ + //! @name Input + //@{ - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - void DeclareInput(Level& fineLevel, Level& coarseLevel) const; + void DeclareInput(Level& fineLevel, Level& coarseLevel) const; - //@} + //@} - //! @name Build methods. - //@{ - void Build(Level& fineLevel, Level& coarseLevel) const; - //@} + //! @name Build methods. + //@{ + void Build(Level& fineLevel, Level& coarseLevel) const; + //@} - //@{ - /*! @brief Add transfer factory in the end of list of transfer factories in RepartitionAcFactory. + //@{ + /*! @brief Add transfer factory in the end of list of transfer factories in RepartitionAcFactory. - Transfer factories are derived from TwoLevelFactoryBase and project some data from the fine level to - the next coarser level. - */ - void AddTransferFactory(const RCP& factory); + Transfer factories are derived from TwoLevelFactoryBase and project some data from the fine level to + the next coarser level. + */ + void AddTransferFactory(const RCP& factory); - // TODO add a function to remove a specific transfer factory? + // TODO add a function to remove a specific transfer factory? - //! Returns number of transfer factories. - size_t NumTransferFactories() const { return transferFacts_.size(); } + //! Returns number of transfer factories. + size_t NumTransferFactories() const { return transferFacts_.size(); } - //@} + //@} - private: + private: + //@{ - //@{ - - mutable - bool hasDeclaredInput_; + mutable bool hasDeclaredInput_; - //@} + //@} - //@{ + //@{ - //! list of user-defined transfer Factories - std::vector > transferFacts_; + //! list of user-defined transfer Factories + std::vector > transferFacts_; - //@} + //@} - }; //class RAPFactory +}; // class RAPFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_RAPFACTORY_SHORT -#endif // MUELU_RAPFACTORY_DECL_HPP +#endif // MUELU_RAPFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_RAPFactory_def.hpp b/packages/muelu/src/Misc/MueLu_RAPFactory_def.hpp index 5e67cc295d48..1873c5176e02 100644 --- a/packages/muelu/src/Misc/MueLu_RAPFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_RAPFactory_def.hpp @@ -46,7 +46,6 @@ #ifndef MUELU_RAPFACTORY_DEF_HPP #define MUELU_RAPFACTORY_DEF_HPP - #include #include @@ -67,342 +66,349 @@ namespace MueLu { - template - RAPFactory::RAPFactory() - : hasDeclaredInput_(false) { } +template +RAPFactory::RAPFactory() + : hasDeclaredInput_(false) {} - template - RCP RAPFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP RAPFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("transpose: use implicit"); - SET_VALID_ENTRY("rap: triple product"); - SET_VALID_ENTRY("rap: fix zero diagonals"); - SET_VALID_ENTRY("rap: fix zero diagonals threshold"); - SET_VALID_ENTRY("rap: fix zero diagonals replacement"); - SET_VALID_ENTRY("rap: relative diagonal floor"); -#undef SET_VALID_ENTRY - validParamList->set< RCP >("A", null, "Generating factory of the matrix A used during the prolongator smoothing process"); - validParamList->set< RCP >("P", null, "Prolongator factory"); - validParamList->set< RCP >("R", null, "Restrictor factory"); - - validParamList->set< bool > ("CheckMainDiagonal", false, "Check main diagonal for zeros"); - validParamList->set< bool > ("RepairMainDiagonal", false, "Repair zeros on main diagonal"); - - // Make sure we don't recursively validate options for the matrixmatrix kernels - ParameterList norecurse; - norecurse.disableRecursiveValidation(); - validParamList->set ("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters"); - - return validParamList; - } + SET_VALID_ENTRY("transpose: use implicit"); + SET_VALID_ENTRY("rap: triple product"); + SET_VALID_ENTRY("rap: fix zero diagonals"); + SET_VALID_ENTRY("rap: fix zero diagonals threshold"); + SET_VALID_ENTRY("rap: fix zero diagonals replacement"); + SET_VALID_ENTRY("rap: relative diagonal floor"); +#undef SET_VALID_ENTRY + validParamList->set >("A", null, "Generating factory of the matrix A used during the prolongator smoothing process"); + validParamList->set >("P", null, "Prolongator factory"); + validParamList->set >("R", null, "Restrictor factory"); + + validParamList->set("CheckMainDiagonal", false, "Check main diagonal for zeros"); + validParamList->set("RepairMainDiagonal", false, "Repair zeros on main diagonal"); + + // Make sure we don't recursively validate options for the matrixmatrix kernels + ParameterList norecurse; + norecurse.disableRecursiveValidation(); + validParamList->set("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters"); + + return validParamList; +} + +template +void RAPFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { + const Teuchos::ParameterList& pL = GetParameterList(); + if (pL.get("transpose: use implicit") == false) + Input(coarseLevel, "R"); + + Input(fineLevel, "A"); + Input(coarseLevel, "P"); + + // call DeclareInput of all user-given transfer factories + for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) + (*it)->CallDeclareInput(coarseLevel); + + hasDeclaredInput_ = true; +} + +template +void RAPFactory::Build(Level& fineLevel, Level& coarseLevel) const { + const bool doTranspose = true; + const bool doFillComplete = true; + const bool doOptimizeStorage = true; + RCP Ac; + { + FactoryMonitor m(*this, "Computing Ac", coarseLevel); + std::ostringstream levelstr; + levelstr << coarseLevel.GetLevelID(); + std::string labelstr = FormattingHelper::getColonLabel(coarseLevel.getObjectLabel()); + + TEUCHOS_TEST_FOR_EXCEPTION(hasDeclaredInput_ == false, Exceptions::RuntimeError, + "MueLu::RAPFactory::Build(): CallDeclareInput has not been called before Build!"); - template - void RAPFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { const Teuchos::ParameterList& pL = GetParameterList(); - if (pL.get("transpose: use implicit") == false) - Input(coarseLevel, "R"); - - Input(fineLevel, "A"); - Input(coarseLevel, "P"); - - // call DeclareInput of all user-given transfer factories - for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) - (*it)->CallDeclareInput(coarseLevel); - - hasDeclaredInput_ = true; - } - - template - void RAPFactory::Build(Level& fineLevel, Level& coarseLevel) const { - const bool doTranspose = true; - const bool doFillComplete = true; - const bool doOptimizeStorage = true; - RCP Ac; - { - FactoryMonitor m(*this, "Computing Ac", coarseLevel); - std::ostringstream levelstr; - levelstr << coarseLevel.GetLevelID(); - std::string labelstr = FormattingHelper::getColonLabel(coarseLevel.getObjectLabel()); - - TEUCHOS_TEST_FOR_EXCEPTION(hasDeclaredInput_ == false, Exceptions::RuntimeError, - "MueLu::RAPFactory::Build(): CallDeclareInput has not been called before Build!"); - - const Teuchos::ParameterList& pL = GetParameterList(); - RCP A = Get< RCP >(fineLevel, "A"); - RCP P = Get< RCP >(coarseLevel, "P"), AP; - // We don't have a valid P (e.g., # global aggregates = 0) so we bail. - // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize() - if (P == Teuchos::null) { - Ac = Teuchos::null; - Set(coarseLevel, "A", Ac); - return; - } + RCP A = Get >(fineLevel, "A"); + RCP P = Get >(coarseLevel, "P"), AP; + // We don't have a valid P (e.g., # global aggregates = 0) so we bail. + // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize() + if (P == Teuchos::null) { + Ac = Teuchos::null; + Set(coarseLevel, "A", Ac); + return; + } - bool isEpetra = A->getRowMap()->lib() == Xpetra::UseEpetra; - bool isGPU = + bool isEpetra = A->getRowMap()->lib() == Xpetra::UseEpetra; + bool isGPU = #ifdef KOKKOS_ENABLE_CUDA - (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosCudaWrapperNode).name()) || + (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosCudaWrapperNode).name()) || #endif #ifdef KOKKOS_ENABLE_HIP - (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosHIPWrapperNode).name()) || + (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosHIPWrapperNode).name()) || #endif #ifdef KOKKOS_ENABLE_SYCL - (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosSYCLWrapperNode).name()) || + (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosSYCLWrapperNode).name()) || #endif - false; + false; - if (pL.get("rap: triple product") == false || isEpetra || isGPU) { - if (pL.get("rap: triple product") && isEpetra) - GetOStream(Warnings1) << "Switching from triple product to R x (A x P) since triple product has not been implemented for Epetra.\n"; + if (pL.get("rap: triple product") == false || isEpetra || isGPU) { + if (pL.get("rap: triple product") && isEpetra) + GetOStream(Warnings1) << "Switching from triple product to R x (A x P) since triple product has not been implemented for Epetra.\n"; #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || defined(KOKKOS_ENABLE_SYCL) - if (pL.get("rap: triple product") && isGPU) - GetOStream(Warnings1) << "Switching from triple product to R x (A x P) since triple product has not been implemented for " - << Node::execution_space::name() << std::endl; + if (pL.get("rap: triple product") && isGPU) + GetOStream(Warnings1) << "Switching from triple product to R x (A x P) since triple product has not been implemented for " + << Node::execution_space::name() << std::endl; #endif - // Reuse pattern if available (multiple solve) - RCP APparams = rcp(new ParameterList); - if(pL.isSublist("matrixmatrix: kernel params")) - APparams->sublist("matrixmatrix: kernel params") = pL.sublist("matrixmatrix: kernel params"); - - // By default, we don't need global constants for A*P - APparams->set("compute global constants: temporaries",APparams->get("compute global constants: temporaries",false)); - APparams->set("compute global constants",APparams->get("compute global constants",false)); - - if (coarseLevel.IsAvailable("AP reuse data", this)) { - GetOStream(static_cast(Runtime0 | Test)) << "Reusing previous AP data" << std::endl; - - APparams = coarseLevel.Get< RCP >("AP reuse data", this); - - if (APparams->isParameter("graph")) - AP = APparams->get< RCP >("graph"); - } - - { - SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel); - - AP = MatrixMatrix::Multiply(*A, !doTranspose, *P, !doTranspose, AP, GetOStream(Statistics2), - doFillComplete, doOptimizeStorage, labelstr+std::string("MueLu::A*P-")+levelstr.str(), APparams); - } - - // Reuse coarse matrix memory if available (multiple solve) - RCP RAPparams = rcp(new ParameterList); - if(pL.isSublist("matrixmatrix: kernel params")) - RAPparams->sublist("matrixmatrix: kernel params") = pL.sublist("matrixmatrix: kernel params"); - - if (coarseLevel.IsAvailable("RAP reuse data", this)) { - GetOStream(static_cast(Runtime0 | Test)) << "Reusing previous RAP data" << std::endl; - - RAPparams = coarseLevel.Get< RCP >("RAP reuse data", this); - - if (RAPparams->isParameter("graph")) - Ac = RAPparams->get< RCP >("graph"); - - // Some eigenvalue may have been cached with the matrix in the previous run. - // As the matrix values will be updated, we need to reset the eigenvalue. - Ac->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); - } - - // We *always* need global constants for the RAP, but not for the temps - RAPparams->set("compute global constants: temporaries",RAPparams->get("compute global constants: temporaries",false)); - RAPparams->set("compute global constants",true); - - // Allow optimization of storage. - // This is necessary for new faster Epetra MM kernels. - // Seems to work with matrix modifications to repair diagonal entries. - - if (pL.get("transpose: use implicit") == true) { - SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel); - - Ac = MatrixMatrix::Multiply(*P, doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), - doFillComplete, doOptimizeStorage, labelstr+std::string("MueLu::R*(AP)-implicit-")+levelstr.str(), RAPparams); - - } else { - RCP R = Get< RCP >(coarseLevel, "R"); - - SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel); - - Ac = MatrixMatrix::Multiply(*R, !doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), - doFillComplete, doOptimizeStorage, labelstr+std::string("MueLu::R*(AP)-explicit-")+levelstr.str(), RAPparams); - } - - Teuchos::ArrayView relativeFloor = pL.get >("rap: relative diagonal floor")(); - if(relativeFloor.size() > 0) { - Xpetra::MatrixUtils::RelativeDiagonalBoost(Ac, relativeFloor,GetOStream(Statistics2)); - } - - bool repairZeroDiagonals = pL.get("RepairMainDiagonal") || pL.get("rap: fix zero diagonals"); - bool checkAc = pL.get("CheckMainDiagonal")|| pL.get("rap: fix zero diagonals"); ; - if (checkAc || repairZeroDiagonals) { - using magnitudeType = typename Teuchos::ScalarTraits::magnitudeType; - magnitudeType threshold; - if (pL.isType("rap: fix zero diagonals threshold")) - threshold = pL.get("rap: fix zero diagonals threshold"); - else - threshold = Teuchos::as(pL.get("rap: fix zero diagonals threshold")); - Scalar replacement = Teuchos::as(pL.get("rap: fix zero diagonals replacement")); - Xpetra::MatrixUtils::CheckRepairMainDiagonal(Ac, repairZeroDiagonals, GetOStream(Warnings1), threshold, replacement); - } - - if (IsPrint(Statistics2)) { - RCP params = rcp(new ParameterList());; - params->set("printLoadBalancingInfo", true); - params->set("printCommInfo", true); - GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); - } - - if(!Ac.is_null()) {std::ostringstream oss; oss << "A_" << coarseLevel.GetLevelID(); Ac->setObjectLabel(oss.str());} - Set(coarseLevel, "A", Ac); - - if (!isGPU) { - APparams->set("graph", AP); - Set(coarseLevel, "AP reuse data", APparams); - } - if (!isGPU) { - RAPparams->set("graph", Ac); - Set(coarseLevel, "RAP reuse data", RAPparams); - } + // Reuse pattern if available (multiple solve) + RCP APparams = rcp(new ParameterList); + if (pL.isSublist("matrixmatrix: kernel params")) + APparams->sublist("matrixmatrix: kernel params") = pL.sublist("matrixmatrix: kernel params"); + + // By default, we don't need global constants for A*P + APparams->set("compute global constants: temporaries", APparams->get("compute global constants: temporaries", false)); + APparams->set("compute global constants", APparams->get("compute global constants", false)); + + if (coarseLevel.IsAvailable("AP reuse data", this)) { + GetOStream(static_cast(Runtime0 | Test)) << "Reusing previous AP data" << std::endl; + + APparams = coarseLevel.Get >("AP reuse data", this); + + if (APparams->isParameter("graph")) + AP = APparams->get >("graph"); + } + + { + SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel); + + AP = MatrixMatrix::Multiply(*A, !doTranspose, *P, !doTranspose, AP, GetOStream(Statistics2), + doFillComplete, doOptimizeStorage, labelstr + std::string("MueLu::A*P-") + levelstr.str(), APparams); + } + + // Reuse coarse matrix memory if available (multiple solve) + RCP RAPparams = rcp(new ParameterList); + if (pL.isSublist("matrixmatrix: kernel params")) + RAPparams->sublist("matrixmatrix: kernel params") = pL.sublist("matrixmatrix: kernel params"); + + if (coarseLevel.IsAvailable("RAP reuse data", this)) { + GetOStream(static_cast(Runtime0 | Test)) << "Reusing previous RAP data" << std::endl; + + RAPparams = coarseLevel.Get >("RAP reuse data", this); + + if (RAPparams->isParameter("graph")) + Ac = RAPparams->get >("graph"); + + // Some eigenvalue may have been cached with the matrix in the previous run. + // As the matrix values will be updated, we need to reset the eigenvalue. + Ac->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); + } + + // We *always* need global constants for the RAP, but not for the temps + RAPparams->set("compute global constants: temporaries", RAPparams->get("compute global constants: temporaries", false)); + RAPparams->set("compute global constants", true); + + // Allow optimization of storage. + // This is necessary for new faster Epetra MM kernels. + // Seems to work with matrix modifications to repair diagonal entries. + + if (pL.get("transpose: use implicit") == true) { + SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel); + + Ac = MatrixMatrix::Multiply(*P, doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), + doFillComplete, doOptimizeStorage, labelstr + std::string("MueLu::R*(AP)-implicit-") + levelstr.str(), RAPparams); + } else { - RCP RAPparams = rcp(new ParameterList); - if(pL.isSublist("matrixmatrix: kernel params")) - RAPparams->sublist("matrixmatrix: kernel params") = pL.sublist("matrixmatrix: kernel params"); + RCP R = Get >(coarseLevel, "R"); - if (coarseLevel.IsAvailable("RAP reuse data", this)) { - GetOStream(static_cast(Runtime0 | Test)) << "Reusing previous RAP data" << std::endl; + SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel); - RAPparams = coarseLevel.Get< RCP >("RAP reuse data", this); + Ac = MatrixMatrix::Multiply(*R, !doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), + doFillComplete, doOptimizeStorage, labelstr + std::string("MueLu::R*(AP)-explicit-") + levelstr.str(), RAPparams); + } + + Teuchos::ArrayView relativeFloor = pL.get >("rap: relative diagonal floor")(); + if (relativeFloor.size() > 0) { + Xpetra::MatrixUtils::RelativeDiagonalBoost(Ac, relativeFloor, GetOStream(Statistics2)); + } + + bool repairZeroDiagonals = pL.get("RepairMainDiagonal") || pL.get("rap: fix zero diagonals"); + bool checkAc = pL.get("CheckMainDiagonal") || pL.get("rap: fix zero diagonals"); + ; + if (checkAc || repairZeroDiagonals) { + using magnitudeType = typename Teuchos::ScalarTraits::magnitudeType; + magnitudeType threshold; + if (pL.isType("rap: fix zero diagonals threshold")) + threshold = pL.get("rap: fix zero diagonals threshold"); + else + threshold = Teuchos::as(pL.get("rap: fix zero diagonals threshold")); + Scalar replacement = Teuchos::as(pL.get("rap: fix zero diagonals replacement")); + Xpetra::MatrixUtils::CheckRepairMainDiagonal(Ac, repairZeroDiagonals, GetOStream(Warnings1), threshold, replacement); + } + + if (IsPrint(Statistics2)) { + RCP params = rcp(new ParameterList()); + ; + params->set("printLoadBalancingInfo", true); + params->set("printCommInfo", true); + GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); + } - if (RAPparams->isParameter("graph")) - Ac = RAPparams->get< RCP >("graph"); + if (!Ac.is_null()) { + std::ostringstream oss; + oss << "A_" << coarseLevel.GetLevelID(); + Ac->setObjectLabel(oss.str()); + } + Set(coarseLevel, "A", Ac); + + if (!isGPU) { + APparams->set("graph", AP); + Set(coarseLevel, "AP reuse data", APparams); + } + if (!isGPU) { + RAPparams->set("graph", Ac); + Set(coarseLevel, "RAP reuse data", RAPparams); + } + } else { + RCP RAPparams = rcp(new ParameterList); + if (pL.isSublist("matrixmatrix: kernel params")) + RAPparams->sublist("matrixmatrix: kernel params") = pL.sublist("matrixmatrix: kernel params"); - // Some eigenvalue may have been cached with the matrix in the previous run. - // As the matrix values will be updated, we need to reset the eigenvalue. - Ac->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); - } + if (coarseLevel.IsAvailable("RAP reuse data", this)) { + GetOStream(static_cast(Runtime0 | Test)) << "Reusing previous RAP data" << std::endl; - // We *always* need global constants for the RAP, but not for the temps - RAPparams->set("compute global constants: temporaries",RAPparams->get("compute global constants: temporaries",false)); - RAPparams->set("compute global constants",true); + RAPparams = coarseLevel.Get >("RAP reuse data", this); + + if (RAPparams->isParameter("graph")) + Ac = RAPparams->get >("graph"); + + // Some eigenvalue may have been cached with the matrix in the previous run. + // As the matrix values will be updated, we need to reset the eigenvalue. + Ac->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); + } - if (pL.get("transpose: use implicit") == true) { + // We *always* need global constants for the RAP, but not for the temps + RAPparams->set("compute global constants: temporaries", RAPparams->get("compute global constants: temporaries", false)); + RAPparams->set("compute global constants", true); - Ac = MatrixFactory::Build(P->getDomainMap(), Teuchos::as(0)); + if (pL.get("transpose: use implicit") == true) { + Ac = MatrixFactory::Build(P->getDomainMap(), Teuchos::as(0)); - SubFactoryMonitor m2(*this, "MxMxM: R x A x P (implicit)", coarseLevel); + SubFactoryMonitor m2(*this, "MxMxM: R x A x P (implicit)", coarseLevel); - Xpetra::TripleMatrixMultiply:: + Xpetra::TripleMatrixMultiply:: MultiplyRAP(*P, doTranspose, *A, !doTranspose, *P, !doTranspose, *Ac, doFillComplete, - doOptimizeStorage, labelstr+std::string("MueLu::R*A*P-implicit-")+levelstr.str(), - RAPparams); - } else { - RCP R = Get< RCP >(coarseLevel, "R"); - Ac = MatrixFactory::Build(R->getRowMap(), Teuchos::as(0)); + doOptimizeStorage, labelstr + std::string("MueLu::R*A*P-implicit-") + levelstr.str(), + RAPparams); + } else { + RCP R = Get >(coarseLevel, "R"); + Ac = MatrixFactory::Build(R->getRowMap(), Teuchos::as(0)); - SubFactoryMonitor m2(*this, "MxMxM: R x A x P (explicit)", coarseLevel); + SubFactoryMonitor m2(*this, "MxMxM: R x A x P (explicit)", coarseLevel); - Xpetra::TripleMatrixMultiply:: + Xpetra::TripleMatrixMultiply:: MultiplyRAP(*R, !doTranspose, *A, !doTranspose, *P, !doTranspose, *Ac, doFillComplete, - doOptimizeStorage, labelstr+std::string("MueLu::R*A*P-explicit-")+levelstr.str(), + doOptimizeStorage, labelstr + std::string("MueLu::R*A*P-explicit-") + levelstr.str(), RAPparams); - } - - Teuchos::ArrayView relativeFloor = pL.get >("rap: relative diagonal floor")(); - if(relativeFloor.size() > 0) { - Xpetra::MatrixUtils::RelativeDiagonalBoost(Ac, relativeFloor,GetOStream(Statistics2)); - } - - bool repairZeroDiagonals = pL.get("RepairMainDiagonal") || pL.get("rap: fix zero diagonals"); - bool checkAc = pL.get("CheckMainDiagonal")|| pL.get("rap: fix zero diagonals"); ; - if (checkAc || repairZeroDiagonals) { - using magnitudeType = typename Teuchos::ScalarTraits::magnitudeType; - magnitudeType threshold; - if (pL.isType("rap: fix zero diagonals threshold")) - threshold = pL.get("rap: fix zero diagonals threshold"); - else - threshold = Teuchos::as(pL.get("rap: fix zero diagonals threshold")); - Scalar replacement = Teuchos::as(pL.get("rap: fix zero diagonals replacement")); - Xpetra::MatrixUtils::CheckRepairMainDiagonal(Ac, repairZeroDiagonals, GetOStream(Warnings1), threshold, replacement); - } - - - if (IsPrint(Statistics2)) { - RCP params = rcp(new ParameterList());; - params->set("printLoadBalancingInfo", true); - params->set("printCommInfo", true); - GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); - } - - if(!Ac.is_null()) {std::ostringstream oss; oss << "A_" << coarseLevel.GetLevelID(); Ac->setObjectLabel(oss.str());} - Set(coarseLevel, "A", Ac); - - if (!isGPU) { - RAPparams->set("graph", Ac); - Set(coarseLevel, "RAP reuse data", RAPparams); - } } + Teuchos::ArrayView relativeFloor = pL.get >("rap: relative diagonal floor")(); + if (relativeFloor.size() > 0) { + Xpetra::MatrixUtils::RelativeDiagonalBoost(Ac, relativeFloor, GetOStream(Statistics2)); + } - } + bool repairZeroDiagonals = pL.get("RepairMainDiagonal") || pL.get("rap: fix zero diagonals"); + bool checkAc = pL.get("CheckMainDiagonal") || pL.get("rap: fix zero diagonals"); + ; + if (checkAc || repairZeroDiagonals) { + using magnitudeType = typename Teuchos::ScalarTraits::magnitudeType; + magnitudeType threshold; + if (pL.isType("rap: fix zero diagonals threshold")) + threshold = pL.get("rap: fix zero diagonals threshold"); + else + threshold = Teuchos::as(pL.get("rap: fix zero diagonals threshold")); + Scalar replacement = Teuchos::as(pL.get("rap: fix zero diagonals replacement")); + Xpetra::MatrixUtils::CheckRepairMainDiagonal(Ac, repairZeroDiagonals, GetOStream(Warnings1), threshold, replacement); + } -#ifdef HAVE_MUELU_DEBUG - MatrixUtils::checkLocalRowMapMatchesColMap(*Ac); -#endif // HAVE_MUELU_DEBUG - - if (transferFacts_.begin() != transferFacts_.end()) { - SubFactoryMonitor m(*this, "Projections", coarseLevel); - - // call Build of all user-given transfer factories - for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { - RCP fac = *it; - GetOStream(Runtime0) << "RAPFactory: call transfer factory: " << fac->description() << std::endl; - fac->CallBuild(coarseLevel); - // Coordinates transfer is marginally different from all other operations - // because it is *optional*, and not required. For instance, we may need - // coordinates only on level 4 if we start repartitioning from that level, - // but we don't need them on level 1,2,3. As our current Hierarchy setup - // assumes propagation of dependencies only through three levels, this - // means that we need to rely on other methods to propagate optional data. - // - // The method currently used is through RAP transfer factories, which are - // simply factories which are called at the end of RAP with a single goal: - // transfer some fine data to coarser level. Because these factories are - // kind of outside of the mainline factories, they behave different. In - // particular, we call their Build method explicitly, rather than through - // Get calls. This difference is significant, as the Get call is smart - // enough to know when to release all factory dependencies, and Build is - // dumb. This led to the following CoordinatesTransferFactory sequence: - // 1. Request level 0 - // 2. Request level 1 - // 3. Request level 0 - // 4. Release level 0 - // 5. Release level 1 - // - // The problem is missing "6. Release level 0". Because it was missing, - // we had outstanding request on "Coordinates", "Aggregates" and - // "CoarseMap" on level 0. - // - // This was fixed by explicitly calling Release on transfer factories in - // RAPFactory. I am still unsure how exactly it works, but now we have - // clear data requests for all levels. - coarseLevel.Release(*fac); + if (IsPrint(Statistics2)) { + RCP params = rcp(new ParameterList()); + ; + params->set("printLoadBalancingInfo", true); + params->set("printCommInfo", true); + GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); } - } + if (!Ac.is_null()) { + std::ostringstream oss; + oss << "A_" << coarseLevel.GetLevelID(); + Ac->setObjectLabel(oss.str()); + } + Set(coarseLevel, "A", Ac); + + if (!isGPU) { + RAPparams->set("graph", Ac); + Set(coarseLevel, "RAP reuse data", RAPparams); + } + } } - template - void RAPFactory::AddTransferFactory(const RCP& factory) { - // check if it's a TwoLevelFactoryBase based transfer factory - TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, - "MueLu::RAPFactory::AddTransferFactory: Transfer factory is not derived from TwoLevelFactoryBase. " - "This is very strange. (Note: you can remove this exception if there's a good reason for)"); - TEUCHOS_TEST_FOR_EXCEPTION(hasDeclaredInput_, Exceptions::RuntimeError, "MueLu::RAPFactory::AddTransferFactory: Factory is being added after we have already declared input"); - transferFacts_.push_back(factory); +#ifdef HAVE_MUELU_DEBUG + MatrixUtils::checkLocalRowMapMatchesColMap(*Ac); +#endif // HAVE_MUELU_DEBUG + + if (transferFacts_.begin() != transferFacts_.end()) { + SubFactoryMonitor m(*this, "Projections", coarseLevel); + + // call Build of all user-given transfer factories + for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { + RCP fac = *it; + GetOStream(Runtime0) << "RAPFactory: call transfer factory: " << fac->description() << std::endl; + fac->CallBuild(coarseLevel); + // Coordinates transfer is marginally different from all other operations + // because it is *optional*, and not required. For instance, we may need + // coordinates only on level 4 if we start repartitioning from that level, + // but we don't need them on level 1,2,3. As our current Hierarchy setup + // assumes propagation of dependencies only through three levels, this + // means that we need to rely on other methods to propagate optional data. + // + // The method currently used is through RAP transfer factories, which are + // simply factories which are called at the end of RAP with a single goal: + // transfer some fine data to coarser level. Because these factories are + // kind of outside of the mainline factories, they behave different. In + // particular, we call their Build method explicitly, rather than through + // Get calls. This difference is significant, as the Get call is smart + // enough to know when to release all factory dependencies, and Build is + // dumb. This led to the following CoordinatesTransferFactory sequence: + // 1. Request level 0 + // 2. Request level 1 + // 3. Request level 0 + // 4. Release level 0 + // 5. Release level 1 + // + // The problem is missing "6. Release level 0". Because it was missing, + // we had outstanding request on "Coordinates", "Aggregates" and + // "CoarseMap" on level 0. + // + // This was fixed by explicitly calling Release on transfer factories in + // RAPFactory. I am still unsure how exactly it works, but now we have + // clear data requests for all levels. + coarseLevel.Release(*fac); + } } +} + +template +void RAPFactory::AddTransferFactory(const RCP& factory) { + // check if it's a TwoLevelFactoryBase based transfer factory + TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, + "MueLu::RAPFactory::AddTransferFactory: Transfer factory is not derived from TwoLevelFactoryBase. " + "This is very strange. (Note: you can remove this exception if there's a good reason for)"); + TEUCHOS_TEST_FOR_EXCEPTION(hasDeclaredInput_, Exceptions::RuntimeError, "MueLu::RAPFactory::AddTransferFactory: Factory is being added after we have already declared input"); + transferFacts_.push_back(factory); +} -} //namespace MueLu +} // namespace MueLu #define MUELU_RAPFACTORY_SHORT -#endif // MUELU_RAPFACTORY_DEF_HPP +#endif // MUELU_RAPFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_RAPShiftFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_RAPShiftFactory_decl.hpp index 91cbf254d6fe..827e81695a2d 100644 --- a/packages/muelu/src/Misc/MueLu_RAPShiftFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_RAPShiftFactory_decl.hpp @@ -62,90 +62,88 @@ #include "MueLu_TwoLevelFactoryBase.hpp" namespace MueLu { - /*! - @class RAPShiftFactory - @brief Factory for building coarse grid matrices, when the matrix - is of the form K+a*M. Useful when you want to change the shift - variable ("a") at every level. Each level must store the stiffness - matrix K and mass matrix M separately. - */ - template - class RAPShiftFactory : public TwoLevelFactoryBase { +/*! + @class RAPShiftFactory + @brief Factory for building coarse grid matrices, when the matrix + is of the form K+a*M. Useful when you want to change the shift + variable ("a") at every level. Each level must store the stiffness + matrix K and mass matrix M separately. +*/ +template +class RAPShiftFactory : public TwoLevelFactoryBase { #undef MUELU_RAPSHIFTFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - RAPShiftFactory(); + public: + //! @name Constructors/Destructors. + //@{ - virtual ~RAPShiftFactory() { } + RAPShiftFactory(); - //@} + virtual ~RAPShiftFactory() {} - //! @name Input - //@{ + //@} - RCP GetValidParameterList() const; + //! @name Input + //@{ - void DeclareInput(Level &fineLevel, Level &coarseLevel) const; + RCP GetValidParameterList() const; - //@} + void DeclareInput(Level &fineLevel, Level &coarseLevel) const; - //! @name Build methods. - //@{ - void Build(Level &fineLevel, Level &coarseLevel) const; - //@} + //@} - //! @name Handling of user-defined transfer factories - //@{ + //! @name Build methods. + //@{ + void Build(Level &fineLevel, Level &coarseLevel) const; + //@} - //! Indicate that the restriction operator action should be implicitly defined by the transpose of the prolongator. - void SetImplicitTranspose(bool const &implicit) { - implicitTranspose_ = implicit; - } + //! @name Handling of user-defined transfer factories + //@{ - void SetShifts(std::vector& shifts) { - shifts_.clear(); - shifts_ = shifts; - } + //! Indicate that the restriction operator action should be implicitly defined by the transpose of the prolongator. + void SetImplicitTranspose(bool const &implicit) { + implicitTranspose_ = implicit; + } - //@} + void SetShifts(std::vector &shifts) { + shifts_.clear(); + shifts_ = shifts; + } - //@{ - /*! @brief Add transfer factory in the end of list of transfer factories in RepartitionAcFactory. + //@} - Transfer factories are derived from TwoLevelFactoryBase and project some data from the fine level to - the next coarser level. - */ - void AddTransferFactory(const RCP& factory); + //@{ + /*! @brief Add transfer factory in the end of list of transfer factories in RepartitionAcFactory. - // TODO add a function to remove a specific transfer factory? - - //! Returns number of transfer factories. - size_t NumTransferFactories() const { return transferFacts_.size(); } + Transfer factories are derived from TwoLevelFactoryBase and project some data from the fine level to + the next coarser level. + */ + void AddTransferFactory(const RCP &factory); - //@} + // TODO add a function to remove a specific transfer factory? - private: + //! Returns number of transfer factories. + size_t NumTransferFactories() const { return transferFacts_.size(); } - //! If true, the action of the restriction operator action is implicitly defined by the transpose of the prolongator. - bool implicitTranspose_; + //@} + private: + //! If true, the action of the restriction operator action is implicitly defined by the transpose of the prolongator. + bool implicitTranspose_; - //! list of user-defined transfer Factories - std::vector > transferFacts_; + //! list of user-defined transfer Factories + std::vector > transferFacts_; - // vector of shifting terms - std::vector shifts_; + // vector of shifting terms + std::vector shifts_; - }; //class RAPShiftFactory +}; // class RAPShiftFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_RAPSHIFTFACTORY_SHORT -#endif // MUELU_RAPSHIFTFACTORY_DECL_HPP +#endif // MUELU_RAPSHIFTFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_RAPShiftFactory_def.hpp b/packages/muelu/src/Misc/MueLu_RAPShiftFactory_def.hpp index b3c1c0833bb4..b3670f00b788 100644 --- a/packages/muelu/src/Misc/MueLu_RAPShiftFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_RAPShiftFactory_def.hpp @@ -54,7 +54,6 @@ #include #include - #include "MueLu_RAPShiftFactory_decl.hpp" #include "MueLu_MasterList.hpp" #include "MueLu_Monitor.hpp" @@ -62,338 +61,342 @@ namespace MueLu { - /*********************************************************************************************************/ - template - RAPShiftFactory::RAPShiftFactory() - : implicitTranspose_(false) { } - +/*********************************************************************************************************/ +template +RAPShiftFactory::RAPShiftFactory() + : implicitTranspose_(false) {} - /*********************************************************************************************************/ - template - RCP RAPShiftFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +/*********************************************************************************************************/ +template +RCP RAPShiftFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("transpose: use implicit"); - SET_VALID_ENTRY("rap: fix zero diagonals"); - SET_VALID_ENTRY("rap: shift"); - SET_VALID_ENTRY("rap: shift array"); - SET_VALID_ENTRY("rap: cfl array"); - SET_VALID_ENTRY("rap: shift diagonal M"); - SET_VALID_ENTRY("rap: shift low storage"); - SET_VALID_ENTRY("rap: relative diagonal floor"); -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A used during the prolongator smoothing process"); - validParamList->set< RCP >("M", Teuchos::null, "Generating factory of the matrix M used during the non-Galerkin RAP"); - validParamList->set< RCP >("Mdiag", Teuchos::null, "Generating factory of the matrix Mdiag used during the non-Galerkin RAP"); - validParamList->set< RCP >("K", Teuchos::null, "Generating factory of the matrix K used during the non-Galerkin RAP"); - validParamList->set< RCP >("P", Teuchos::null, "Prolongator factory"); - validParamList->set< RCP >("R", Teuchos::null, "Restrictor factory"); - - validParamList->set< bool > ("CheckMainDiagonal", false, "Check main diagonal for zeros"); - validParamList->set< bool > ("RepairMainDiagonal", false, "Repair zeros on main diagonal"); - - validParamList->set > ("deltaT", Teuchos::null, "user deltaT"); - validParamList->set > ("cfl", Teuchos::null, "user cfl"); - validParamList->set > ("cfl-based shift array", Teuchos::null, "MueLu-generated shift array for CFL-based shifting"); - - // Make sure we don't recursively validate options for the matrixmatrix kernels - ParameterList norecurse; - norecurse.disableRecursiveValidation(); - validParamList->set ("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters"); - - return validParamList; + SET_VALID_ENTRY("transpose: use implicit"); + SET_VALID_ENTRY("rap: fix zero diagonals"); + SET_VALID_ENTRY("rap: shift"); + SET_VALID_ENTRY("rap: shift array"); + SET_VALID_ENTRY("rap: cfl array"); + SET_VALID_ENTRY("rap: shift diagonal M"); + SET_VALID_ENTRY("rap: shift low storage"); + SET_VALID_ENTRY("rap: relative diagonal floor"); +#undef SET_VALID_ENTRY + + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A used during the prolongator smoothing process"); + validParamList->set >("M", Teuchos::null, "Generating factory of the matrix M used during the non-Galerkin RAP"); + validParamList->set >("Mdiag", Teuchos::null, "Generating factory of the matrix Mdiag used during the non-Galerkin RAP"); + validParamList->set >("K", Teuchos::null, "Generating factory of the matrix K used during the non-Galerkin RAP"); + validParamList->set >("P", Teuchos::null, "Prolongator factory"); + validParamList->set >("R", Teuchos::null, "Restrictor factory"); + + validParamList->set("CheckMainDiagonal", false, "Check main diagonal for zeros"); + validParamList->set("RepairMainDiagonal", false, "Repair zeros on main diagonal"); + + validParamList->set >("deltaT", Teuchos::null, "user deltaT"); + validParamList->set >("cfl", Teuchos::null, "user cfl"); + validParamList->set >("cfl-based shift array", Teuchos::null, "MueLu-generated shift array for CFL-based shifting"); + + // Make sure we don't recursively validate options for the matrixmatrix kernels + ParameterList norecurse; + norecurse.disableRecursiveValidation(); + validParamList->set("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters"); + + return validParamList; +} + +/*********************************************************************************************************/ +template +void RAPShiftFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { + const Teuchos::ParameterList &pL = GetParameterList(); + + bool use_mdiag = false; + if (pL.isParameter("rap: shift diagonal M")) + use_mdiag = pL.get("rap: shift diagonal M"); + + // The low storage version requires mdiag + bool use_low_storage = false; + if (pL.isParameter("rap: shift low storage")) { + use_low_storage = pL.get("rap: shift low storage"); + use_mdiag = use_low_storage ? true : use_mdiag; + } + + if (implicitTranspose_ == false) { + Input(coarseLevel, "R"); + } + + if (!use_low_storage) + Input(fineLevel, "K"); + else + Input(fineLevel, "A"); + Input(coarseLevel, "P"); + + if (!use_mdiag) + Input(fineLevel, "M"); + else + Input(fineLevel, "Mdiag"); + + // CFL array stuff + if (pL.isParameter("rap: cfl array") && pL.get >("rap: cfl array").size() > 0) { + if (fineLevel.GetLevelID() == 0) { + if (fineLevel.IsAvailable("deltaT", NoFactory::get())) { + fineLevel.DeclareInput("deltaT", NoFactory::get(), this); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("fine deltaT", NoFactory::get()), + Exceptions::RuntimeError, + "deltaT was not provided by the user on level0!"); + } + + if (fineLevel.IsAvailable("cfl", NoFactory::get())) { + fineLevel.DeclareInput("cfl", NoFactory::get(), this); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("fine cfl", NoFactory::get()), + Exceptions::RuntimeError, + "cfl was not provided by the user on level0!"); + } + } else { + Input(fineLevel, "cfl-based shift array"); + } + } + + // call DeclareInput of all user-given transfer factories + for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { + (*it)->CallDeclareInput(coarseLevel); } +} - /*********************************************************************************************************/ - template - void RAPShiftFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { - const Teuchos::ParameterList& pL = GetParameterList(); +template +void RAPShiftFactory::Build(Level &fineLevel, Level &coarseLevel) const { // FIXME make fineLevel const + { + FactoryMonitor m(*this, "Computing Ac", coarseLevel); + const Teuchos::ParameterList &pL = GetParameterList(); - bool use_mdiag = false; - if(pL.isParameter("rap: shift diagonal M")) - use_mdiag = pL.get("rap: shift diagonal M"); + bool M_is_diagonal = false; + if (pL.isParameter("rap: shift diagonal M")) + M_is_diagonal = pL.get("rap: shift diagonal M"); // The low storage version requires mdiag bool use_low_storage = false; - if(pL.isParameter("rap: shift low storage")) { + if (pL.isParameter("rap: shift low storage")) { use_low_storage = pL.get("rap: shift low storage"); - use_mdiag = use_low_storage ? true : use_mdiag; + M_is_diagonal = use_low_storage ? true : M_is_diagonal; } - if (implicitTranspose_ == false) { - Input(coarseLevel, "R"); + Teuchos::ArrayView doubleShifts; + Teuchos::ArrayRCP myshifts; + if (pL.isParameter("rap: shift array") && pL.get >("rap: shift array").size() > 0) { + // Do we have an array of shifts? If so, we set doubleShifts_ + doubleShifts = pL.get >("rap: shift array")(); } - - if(!use_low_storage) Input(fineLevel, "K"); - else Input(fineLevel, "A"); - Input(coarseLevel, "P"); - - if(!use_mdiag) Input(fineLevel, "M"); - else Input(fineLevel, "Mdiag"); - - // CFL array stuff - if(pL.isParameter("rap: cfl array") && pL.get >("rap: cfl array").size() > 0) { - if(fineLevel.GetLevelID() == 0) { - if(fineLevel.IsAvailable("deltaT", NoFactory::get())) { - fineLevel.DeclareInput("deltaT", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("fine deltaT", NoFactory::get()), - Exceptions::RuntimeError, - "deltaT was not provided by the user on level0!"); + if (pL.isParameter("rap: cfl array") && pL.get >("rap: cfl array").size() > 0) { + // Do we have an array of CFLs? If so, we calculated the shifts from them. + Teuchos::ArrayView CFLs = pL.get >("rap: cfl array")(); + if (fineLevel.GetLevelID() == 0) { + double dt = Get(fineLevel, "deltaT"); + double cfl = Get(fineLevel, "cfl"); + double ts_at_cfl1 = dt / cfl; + myshifts.resize(CFLs.size()); + Teuchos::Array myCFLs(CFLs.size()); + myCFLs[0] = cfl; + + // Never make the CFL bigger + for (int i = 1; i < (int)CFLs.size(); i++) + myCFLs[i] = (CFLs[i] > cfl) ? cfl : CFLs[i]; + + { + std::ostringstream ofs; + ofs << "RAPShiftFactory: CFL schedule = "; + for (int i = 0; i < (int)CFLs.size(); i++) + ofs << " " << myCFLs[i]; + GetOStream(Statistics0) << ofs.str() << std::endl; } - - if(fineLevel.IsAvailable("cfl", NoFactory::get())) { - fineLevel.DeclareInput("cfl", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("fine cfl", NoFactory::get()), - Exceptions::RuntimeError, - "cfl was not provided by the user on level0!"); - } - } - else { - Input(fineLevel,"cfl-based shift array"); + GetOStream(Statistics0) << "RAPShiftFactory: Timestep at CFL=1 is " << ts_at_cfl1 << " " << std::endl; + + // The shift array needs to be 1/dt + for (int i = 0; i < (int)myshifts.size(); i++) + myshifts[i] = 1.0 / (ts_at_cfl1 * myCFLs[i]); + doubleShifts = myshifts(); + + { + std::ostringstream ofs; + ofs << "RAPShiftFactory: shift schedule = "; + for (int i = 0; i < (int)doubleShifts.size(); i++) + ofs << " " << doubleShifts[i]; + GetOStream(Statistics0) << ofs.str() << std::endl; + } + Set(coarseLevel, "cfl-based shift array", myshifts); + } else { + myshifts = Get >(fineLevel, "cfl-based shift array"); + doubleShifts = myshifts(); + Set(coarseLevel, "cfl-based shift array", myshifts); + // NOTE: If we're not on level zero, then we should have a shift array } } - // call DeclareInput of all user-given transfer factories - for(std::vector >::const_iterator it = transferFacts_.begin(); it!=transferFacts_.end(); ++it) { - (*it)->CallDeclareInput(coarseLevel); - } - } + // Inputs: K, M, P + // Note: In the low-storage case we do not keep a separate "K", we just use A + RCP K; + RCP M; + RCP Mdiag; + + if (use_low_storage) + K = Get >(fineLevel, "A"); + else + K = Get >(fineLevel, "K"); + if (!M_is_diagonal) + M = Get >(fineLevel, "M"); + else + Mdiag = Get >(fineLevel, "Mdiag"); + + RCP P = Get >(coarseLevel, "P"); + + // Build Kc = RKP, Mc = RMP + RCP KP, MP; + + // Reuse pattern if available (multiple solve) + // FIXME: Old style reuse doesn't work any more + // if (IsAvailable(coarseLevel, "AP Pattern")) { + // KP = Get< RCP >(coarseLevel, "AP Pattern"); + // MP = Get< RCP >(coarseLevel, "AP Pattern"); + // } - template - void RAPShiftFactory::Build(Level &fineLevel, Level &coarseLevel) const { // FIXME make fineLevel const { - FactoryMonitor m(*this, "Computing Ac", coarseLevel); - const Teuchos::ParameterList& pL = GetParameterList(); - - bool M_is_diagonal = false; - if(pL.isParameter("rap: shift diagonal M")) - M_is_diagonal = pL.get("rap: shift diagonal M"); - - // The low storage version requires mdiag - bool use_low_storage = false; - if(pL.isParameter("rap: shift low storage")) { - use_low_storage = pL.get("rap: shift low storage"); - M_is_diagonal = use_low_storage ? true : M_is_diagonal; + SubFactoryMonitor subM(*this, "MxM: K x P", coarseLevel); + KP = Xpetra::MatrixMatrix::Multiply(*K, false, *P, false, KP, GetOStream(Statistics2)); + if (!M_is_diagonal) { + MP = Xpetra::MatrixMatrix::Multiply(*M, false, *P, false, MP, GetOStream(Statistics2)); + } else { + MP = Xpetra::MatrixFactory2::BuildCopy(P); + MP->leftScale(*Mdiag); } - Teuchos::ArrayView doubleShifts; - Teuchos::ArrayRCP myshifts; - if(pL.isParameter("rap: shift array") && pL.get >("rap: shift array").size() > 0 ) { - // Do we have an array of shifts? If so, we set doubleShifts_ - doubleShifts = pL.get >("rap: shift array")(); - } - if(pL.isParameter("rap: cfl array") && pL.get >("rap: cfl array").size() > 0) { - // Do we have an array of CFLs? If so, we calculated the shifts from them. - Teuchos::ArrayView CFLs = pL.get >("rap: cfl array")(); - if(fineLevel.GetLevelID() == 0) { - double dt = Get(fineLevel,"deltaT"); - double cfl = Get(fineLevel,"cfl"); - double ts_at_cfl1 = dt / cfl; - myshifts.resize(CFLs.size()); - Teuchos::Array myCFLs(CFLs.size()); - myCFLs[0] = cfl; - - // Never make the CFL bigger - for(int i=1; i<(int)CFLs.size(); i++) - myCFLs[i] = (CFLs[i]> cfl) ? cfl : CFLs[i]; - - { - std::ostringstream ofs; - ofs<<"RAPShiftFactory: CFL schedule = "; - for(int i=0; i<(int)CFLs.size(); i++) - ofs<<" "< > (fineLevel,"cfl-based shift array"); - doubleShifts = myshifts(); - Set(coarseLevel,"cfl-based shift array",myshifts); - // NOTE: If we're not on level zero, then we should have a shift array - } - } - - // Inputs: K, M, P - // Note: In the low-storage case we do not keep a separate "K", we just use A - RCP K; - RCP M; - RCP Mdiag; - - if(use_low_storage) K = Get< RCP >(fineLevel, "A"); - else K = Get< RCP >(fineLevel, "K"); - if(!M_is_diagonal) M = Get< RCP >(fineLevel, "M"); - else Mdiag = Get< RCP >(fineLevel, "Mdiag"); - - RCP P = Get< RCP >(coarseLevel, "P"); - - // Build Kc = RKP, Mc = RMP - RCP KP, MP; - - // Reuse pattern if available (multiple solve) - // FIXME: Old style reuse doesn't work any more - // if (IsAvailable(coarseLevel, "AP Pattern")) { - // KP = Get< RCP >(coarseLevel, "AP Pattern"); - // MP = Get< RCP >(coarseLevel, "AP Pattern"); - // } - - { - SubFactoryMonitor subM(*this, "MxM: K x P", coarseLevel); - KP = Xpetra::MatrixMatrix::Multiply(*K, false, *P, false, KP, GetOStream(Statistics2)); - if(!M_is_diagonal) { - MP = Xpetra::MatrixMatrix::Multiply(*M, false, *P, false, MP, GetOStream(Statistics2)); - } - else { - MP = Xpetra::MatrixFactory2::BuildCopy(P); - MP->leftScale(*Mdiag); - } - - Set(coarseLevel, "AP Pattern", KP); - } + Set(coarseLevel, "AP Pattern", KP); + } - bool doOptimizedStorage = true; + bool doOptimizedStorage = true; - RCP Ac, Kc, Mc; + RCP Ac, Kc, Mc; - // Reuse pattern if available (multiple solve) - // if (IsAvailable(coarseLevel, "RAP Pattern")) - // Ac = Get< RCP >(coarseLevel, "RAP Pattern"); + // Reuse pattern if available (multiple solve) + // if (IsAvailable(coarseLevel, "RAP Pattern")) + // Ac = Get< RCP >(coarseLevel, "RAP Pattern"); - bool doFillComplete=true; - if (implicitTranspose_) { - SubFactoryMonitor m2(*this, "MxM: P' x (KP) (implicit)", coarseLevel); - Kc = Xpetra::MatrixMatrix::Multiply(*P, true, *KP, false, Kc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); - Mc = Xpetra::MatrixMatrix::Multiply(*P, true, *MP, false, Mc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); - } - else { - RCP R = Get< RCP >(coarseLevel, "R"); - SubFactoryMonitor m2(*this, "MxM: R x (KP) (explicit)", coarseLevel); - Kc = Xpetra::MatrixMatrix::Multiply(*R, false, *KP, false, Kc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); - Mc = Xpetra::MatrixMatrix::Multiply(*R, false, *MP, false, Mc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); - } + bool doFillComplete = true; + if (implicitTranspose_) { + SubFactoryMonitor m2(*this, "MxM: P' x (KP) (implicit)", coarseLevel); + Kc = Xpetra::MatrixMatrix::Multiply(*P, true, *KP, false, Kc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); + Mc = Xpetra::MatrixMatrix::Multiply(*P, true, *MP, false, Mc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); + } else { + RCP R = Get >(coarseLevel, "R"); + SubFactoryMonitor m2(*this, "MxM: R x (KP) (explicit)", coarseLevel); + Kc = Xpetra::MatrixMatrix::Multiply(*R, false, *KP, false, Kc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); + Mc = Xpetra::MatrixMatrix::Multiply(*R, false, *MP, false, Mc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); + } - // Get the shift - // FIXME - We should really get rid of the shifts array and drive this the same way everything else works - // If we're using the recursive "low storage" version, we need to shift by ( \prod_{i=1}^k shift[i] - \prod_{i=1}^{k-1} shift[i]) to - // get the recursive relationships correct - int level = coarseLevel.GetLevelID(); - Scalar shift = Teuchos::ScalarTraits::zero(); - if(!use_low_storage) { - // High Storage version - if(level < (int)shifts_.size()) shift = shifts_[level]; - else shift = Teuchos::as(pL.get("rap: shift")); - } - else { - // Low Storage Version - if(level < (int)shifts_.size()) { - if(level==1) shift = shifts_[level]; - else { - Scalar prod1 = Teuchos::ScalarTraits::one(); - for(int i=1; i < level-1; i++) { - prod1 *= shifts_[i]; - } - shift = (prod1 * shifts_[level] - prod1); - } - } - else if(doubleShifts.size() != 0) { - double d_shift = 0.0; - if(level < doubleShifts.size()) - d_shift = doubleShifts[level] - doubleShifts[level-1]; - - if(d_shift < 0.0) - GetOStream(Warnings1) << "WARNING: RAPShiftFactory has detected a negative shift... This implies a less stable coarse grid."<(d_shift); - } + // Get the shift + // FIXME - We should really get rid of the shifts array and drive this the same way everything else works + // If we're using the recursive "low storage" version, we need to shift by ( \prod_{i=1}^k shift[i] - \prod_{i=1}^{k-1} shift[i]) to + // get the recursive relationships correct + int level = coarseLevel.GetLevelID(); + Scalar shift = Teuchos::ScalarTraits::zero(); + if (!use_low_storage) { + // High Storage version + if (level < (int)shifts_.size()) + shift = shifts_[level]; + else + shift = Teuchos::as(pL.get("rap: shift")); + } else { + // Low Storage Version + if (level < (int)shifts_.size()) { + if (level == 1) + shift = shifts_[level]; else { - double base_shift = pL.get("rap: shift"); - if(level == 1) shift = Teuchos::as(base_shift); - else shift = Teuchos::as(pow(base_shift,level) - pow(base_shift,level-1)); + Scalar prod1 = Teuchos::ScalarTraits::one(); + for (int i = 1; i < level - 1; i++) { + prod1 *= shifts_[i]; + } + shift = (prod1 * shifts_[level] - prod1); } + } else if (doubleShifts.size() != 0) { + double d_shift = 0.0; + if (level < doubleShifts.size()) + d_shift = doubleShifts[level] - doubleShifts[level - 1]; + + if (d_shift < 0.0) + GetOStream(Warnings1) << "WARNING: RAPShiftFactory has detected a negative shift... This implies a less stable coarse grid." << std::endl; + shift = Teuchos::as(d_shift); + } else { + double base_shift = pL.get("rap: shift"); + if (level == 1) + shift = Teuchos::as(base_shift); + else + shift = Teuchos::as(pow(base_shift, level) - pow(base_shift, level - 1)); } - GetOStream(Runtime0) << "RAPShiftFactory: Using shift " << shift << std::endl; - - - // recombine to get K+shift*M - { - SubFactoryMonitor m2(*this, "Add: RKP + s*RMP", coarseLevel); - Xpetra::MatrixMatrix::TwoMatrixAdd(*Kc, false, Teuchos::ScalarTraits::one(), *Mc, false, shift, Ac, GetOStream(Statistics2)); - Ac->fillComplete(); - } - - Teuchos::ArrayView relativeFloor = pL.get >("rap: relative diagonal floor")(); - if(relativeFloor.size() > 0) - Xpetra::MatrixUtils::RelativeDiagonalBoost(Ac, relativeFloor,GetOStream(Statistics2)); - - - bool repairZeroDiagonals = pL.get("RepairMainDiagonal") || pL.get("rap: fix zero diagonals"); - bool checkAc = pL.get("CheckMainDiagonal")|| pL.get("rap: fix zero diagonals"); ; - if (checkAc || repairZeroDiagonals) - Xpetra::MatrixUtils::CheckRepairMainDiagonal(Ac, repairZeroDiagonals, GetOStream(Warnings1)); - - RCP params = rcp(new ParameterList());; - params->set("printLoadBalancingInfo", true); - GetOStream(Statistics0) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); - - Set(coarseLevel, "A", Ac); - // We only need K in the 'high storage' mode - if(!use_low_storage) - Set(coarseLevel, "K", Kc); - - if(!M_is_diagonal) { - Set(coarseLevel, "M", Mc); - } - else { - // If M is diagonal, then we only pass that part down the hierarchy - // NOTE: Should we be doing some kind of rowsum instead? - RCP Mcv = Xpetra::VectorFactory::Build(Mc->getRowMap(),false); - Mc->getLocalDiagCopy(*Mcv); - Set(coarseLevel, "Mdiag", Mcv); - } + } + GetOStream(Runtime0) << "RAPShiftFactory: Using shift " << shift << std::endl; - // Set(coarseLevel, "RAP Pattern", Ac); + // recombine to get K+shift*M + { + SubFactoryMonitor m2(*this, "Add: RKP + s*RMP", coarseLevel); + Xpetra::MatrixMatrix::TwoMatrixAdd(*Kc, false, Teuchos::ScalarTraits::one(), *Mc, false, shift, Ac, GetOStream(Statistics2)); + Ac->fillComplete(); } - if (transferFacts_.begin() != transferFacts_.end()) { - SubFactoryMonitor m(*this, "Projections", coarseLevel); - - // call Build of all user-given transfer factories - for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { - RCP fac = *it; - GetOStream(Runtime0) << "RAPShiftFactory: call transfer factory: " << fac->description() << std::endl; - fac->CallBuild(coarseLevel); - // AP (11/11/13): I am not sure exactly why we need to call Release, but we do need it to get rid - // of dangling data for CoordinatesTransferFactory - coarseLevel.Release(*fac); - } + Teuchos::ArrayView relativeFloor = pL.get >("rap: relative diagonal floor")(); + if (relativeFloor.size() > 0) + Xpetra::MatrixUtils::RelativeDiagonalBoost(Ac, relativeFloor, GetOStream(Statistics2)); + + bool repairZeroDiagonals = pL.get("RepairMainDiagonal") || pL.get("rap: fix zero diagonals"); + bool checkAc = pL.get("CheckMainDiagonal") || pL.get("rap: fix zero diagonals"); + ; + if (checkAc || repairZeroDiagonals) + Xpetra::MatrixUtils::CheckRepairMainDiagonal(Ac, repairZeroDiagonals, GetOStream(Warnings1)); + + RCP params = rcp(new ParameterList()); + ; + params->set("printLoadBalancingInfo", true); + GetOStream(Statistics0) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); + + Set(coarseLevel, "A", Ac); + // We only need K in the 'high storage' mode + if (!use_low_storage) + Set(coarseLevel, "K", Kc); + + if (!M_is_diagonal) { + Set(coarseLevel, "M", Mc); + } else { + // If M is diagonal, then we only pass that part down the hierarchy + // NOTE: Should we be doing some kind of rowsum instead? + RCP Mcv = Xpetra::VectorFactory::Build(Mc->getRowMap(), false); + Mc->getLocalDiagCopy(*Mcv); + Set(coarseLevel, "Mdiag", Mcv); } + + // Set(coarseLevel, "RAP Pattern", Ac); } - template - void RAPShiftFactory::AddTransferFactory(const RCP& factory) { - // check if it's a TwoLevelFactoryBase based transfer factory - TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, "MueLu::RAPShiftFactory::AddTransferFactory: Transfer factory is not derived from TwoLevelFactoryBase. This is very strange. (Note: you can remove this exception if there's a good reason for)"); - transferFacts_.push_back(factory); + if (transferFacts_.begin() != transferFacts_.end()) { + SubFactoryMonitor m(*this, "Projections", coarseLevel); + + // call Build of all user-given transfer factories + for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { + RCP fac = *it; + GetOStream(Runtime0) << "RAPShiftFactory: call transfer factory: " << fac->description() << std::endl; + fac->CallBuild(coarseLevel); + // AP (11/11/13): I am not sure exactly why we need to call Release, but we do need it to get rid + // of dangling data for CoordinatesTransferFactory + coarseLevel.Release(*fac); + } } +} + +template +void RAPShiftFactory::AddTransferFactory(const RCP &factory) { + // check if it's a TwoLevelFactoryBase based transfer factory + TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, "MueLu::RAPShiftFactory::AddTransferFactory: Transfer factory is not derived from TwoLevelFactoryBase. This is very strange. (Note: you can remove this exception if there's a good reason for)"); + transferFacts_.push_back(factory); +} -} //namespace MueLu +} // namespace MueLu #define MUELU_RAPSHIFTFACTORY_SHORT -#endif // MUELU_RAPSHIFTFACTORY_DEF_HPP +#endif // MUELU_RAPSHIFTFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_SchurComplementFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_SchurComplementFactory_decl.hpp index e365ab23c75c..2719f96fabda 100644 --- a/packages/muelu/src/Misc/MueLu_SchurComplementFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_SchurComplementFactory_decl.hpp @@ -60,87 +60,84 @@ #include #include - #include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" - namespace MueLu { - /*! - @class SchurComplementFactory class. - @brief Factory for building the Schur Complement for a 2x2 block matrix. +/*! + @class SchurComplementFactory class. + @brief Factory for building the Schur Complement for a 2x2 block matrix. - ## Context, assumptions, and use cases ## + ## Context, assumptions, and use cases ## - This factory is intended to be used for building the schur complement for respective smoothers. The calculation is - for now restricted to 2x2 block matrices, where the schur complement is calculated on block A_11. + This factory is intended to be used for building the schur complement for respective smoothers. The calculation is + for now restricted to 2x2 block matrices, where the schur complement is calculated on block A_11. - For a blocked matrix \A = [A_00 A_01; A_10 A_11] it computes the Schur complement S = A_11 - 1/\omega A_10 Ainv A_01, - where \omega is some scaling factor and \Ainv an approximation of A_00^{-1} (from InverseApproximationFactory). + For a blocked matrix \A = [A_00 A_01; A_10 A_11] it computes the Schur complement S = A_11 - 1/\omega A_10 Ainv A_01, + where \omega is some scaling factor and \Ainv an approximation of A_00^{-1} (from InverseApproximationFactory). - ## Input/output of this factory ## + ## Input/output of this factory ## - ### User parameters of InterfaceAggregationFactory ### - Parameter | type | default | master.xml | validated | requested | description - ----------|------|---------|:----------:|:---------:|:---------:|------------ - A | Factory | null | | * | * | Generating factory of the matrix A - Ainv | Factory | null | | * | * | Generating factory of the approximate inverse of A (produced by the InverseApproximationFactory) - omega | double | 1.0 | | * | * | Scaling factor + ### User parameters of InterfaceAggregationFactory ### + Parameter | type | default | master.xml | validated | requested | description + ----------|------|---------|:----------:|:---------:|:---------:|------------ + A | Factory | null | | * | * | Generating factory of the matrix A + Ainv | Factory | null | | * | * | Generating factory of the approximate inverse of A (produced by the InverseApproximationFactory) + omega | double | 1.0 | | * | * | Scaling factor - The * in the master.xml column denotes that the parameter is defined in the master.xml file. - The * in the validated column means that the parameter is declared in the list of valid input parameters (see GetValidParameters() ). - The * in the requested column states that the data is requested as input with all dependencies (see DeclareInput() ). + The * in the master.xml column denotes that the parameter is defined in the master.xml file. + The * in the validated column means that the parameter is declared in the list of valid input parameters (see GetValidParameters() ). + The * in the requested column states that the data is requested as input with all dependencies (see DeclareInput() ). - ### Variables provided by this factory ### + ### Variables provided by this factory ### - After SchurComplementFactory::Build the following data is available (if requested) + After SchurComplementFactory::Build the following data is available (if requested) - Parameter | generated by | description - ----------|--------------|------------ - | A | SchurComplementFactory | The schur complement of the given block matrix. - */ + Parameter | generated by | description + ----------|--------------|------------ + | A | SchurComplementFactory | The schur complement of the given block matrix. +*/ - template - class SchurComplementFactory : public SingleLevelFactoryBase { +template +class SchurComplementFactory : public SingleLevelFactoryBase { #undef MUELU_SCHURCOMPLEMENTFACTORY_SHORT - #include "MueLu_UseShortNames.hpp" - - public: - //! @name Constructors/Destructors. - //@{ +#include "MueLu_UseShortNames.hpp" - //! Constructor. - SchurComplementFactory() = default; + public: + //! @name Constructors/Destructors. + //@{ - //! Input - //@{ + //! Constructor. + SchurComplementFactory() = default; - void DeclareInput(Level& currentLevel) const; + //! Input + //@{ - RCP GetValidParameterList() const; + void DeclareInput(Level& currentLevel) const; - //@} + RCP GetValidParameterList() const; - //@{ - //! @name Build methods. + //@} - //! Build an object with this factory. - void Build(Level& currentLevel) const; + //@{ + //! @name Build methods. - //@} + //! Build an object with this factory. + void Build(Level& currentLevel) const; + //@} - private: - //! Schur complement calculation method. - RCP ComputeSchurComplement(RCP& bA, RCP& Ainv) const; + private: + //! Schur complement calculation method. + RCP ComputeSchurComplement(RCP& bA, RCP& Ainv) const; - }; // class SchurComplementFactory +}; // class SchurComplementFactory -} // namespace MueLu +} // namespace MueLu #define MUELU_SCHURCOMPLEMENTFACTORY_SHORT #endif /* MUELU_SCHURCOMPLEMENTFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_SchurComplementFactory_def.hpp b/packages/muelu/src/Misc/MueLu_SchurComplementFactory_def.hpp index 66cbd74fa792..c4503eec0d17 100644 --- a/packages/muelu/src/Misc/MueLu_SchurComplementFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_SchurComplementFactory_def.hpp @@ -62,152 +62,148 @@ namespace MueLu { - template - RCP SchurComplementFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - const SC one = Teuchos::ScalarTraits::one(); - - validParamList->set >("A" , NoFactory::getRCP(), "Generating factory of the matrix A used for building Schur complement (must be a 2x2 blocked operator)"); - validParamList->set >("Ainv" , Teuchos::null, "Generating factory of the inverse matrix used in the Schur complement"); - - validParamList->set ("omega", one, "Scaling parameter in S = A(1,1) - 1/omega A(1,0) Ainv A(0,1)"); - - return validParamList; - } - - template - void SchurComplementFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - - // Get default or user-given inverse approximation factory - RCP AinvFact = GetFactory("Ainv"); - currentLevel.DeclareInput("Ainv", AinvFact.get(), this); - } - - template - void SchurComplementFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - - RCP A = Get >(currentLevel, "A"); - RCP bA = rcp_dynamic_cast(A); - - TEUCHOS_TEST_FOR_EXCEPTION(bA.is_null(), Exceptions::BadCast, - "MueLu::SchurComplementFactory::Build: input matrix A is not of type BlockedCrsMatrix!"); - TEUCHOS_TEST_FOR_EXCEPTION(bA->Rows() != 2 || bA->Cols() != 2, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: input matrix A is a " << bA->Rows() << "x" << bA->Cols() << " block matrix. We expect a 2x2 blocked operator."); - - // Calculate Schur Complement - RCP Ainv = currentLevel.Get >("Ainv", this->GetFactory("Ainv").get()); - RCP S = ComputeSchurComplement(bA, Ainv); - - GetOStream(Statistics1) << "S has " << S->getGlobalNumRows() << "x" << S->getGlobalNumCols() << " rows and columns." << std::endl; - - // NOTE: "A" generated by this factory is actually the Schur complement - // matrix, but it is required as all smoothers expect "A" - Set(currentLevel, "A", S); - } - - template - RCP> - SchurComplementFactory::ComputeSchurComplement(RCP& bA, RCP& Ainv) const { - - using STS = Teuchos::ScalarTraits; - const SC zero = STS::zero(), one = STS::one(); - - RCP A01 = bA->getMatrix(0,1); - RCP A10 = bA->getMatrix(1,0); - RCP A11 = bA->getMatrix(1,1); - - RCP bA01 = Teuchos::rcp_dynamic_cast(A01); - const bool isBlocked = (bA01 == Teuchos::null ? false : true); - - const ParameterList& pL = GetParameterList(); - const SC omega = pL.get("omega"); - - TEUCHOS_TEST_FOR_EXCEPTION(omega == zero, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: Scaling parameter omega must not be zero to avoid division by zero."); - - RCP S = Teuchos::null; // Schur complement - RCP D = Teuchos::null; // temporary result for A10*Ainv*A01 - - // only if the off-diagonal blocks A10 and A01 are non-zero we have to do the MM multiplication - if(A01.is_null() == false && A10.is_null() == false) { - // scale with -1/omega - Ainv->scale(Teuchos::as(-one/omega)); - - // build Schur complement operator - if (!isBlocked) { - RCP myparams = rcp(new ParameterList); - myparams->set("compute global constants", true); - - // -1/omega*Ainv*A01 - TEUCHOS_TEST_FOR_EXCEPTION(A01->getRangeMap()->isSameAs(*(Ainv->getDomainMap())) == false, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: RangeMap of A01 and domain map of Ainv are not the same."); - RCP C = MatrixMatrix::Multiply(*Ainv, false, *A01, false, GetOStream(Statistics2), true, true, std::string("SchurComplementFactory"), myparams); - - // -1/omega*A10*Ainv*A01 - TEUCHOS_TEST_FOR_EXCEPTION(A01->getRangeMap()->isSameAs(*(A10->getDomainMap())) == false, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: RangeMap of A10 and domain map A01 are not the same."); - D = MatrixMatrix::Multiply(*A10, false, *C, false, GetOStream(Statistics2), true, true, std::string("SchurComplementFactory"), myparams); - } - else { - // nested blocking - auto bA10 = Teuchos::rcp_dynamic_cast(A10); - auto bAinv = Teuchos::rcp_dynamic_cast(Ainv); - TEUCHOS_TEST_FOR_EXCEPTION(bAinv == Teuchos::null, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: Casting Ainv to BlockedCrsMatrix not possible."); - - // -1/omega*bAinv*bA01 - TEUCHOS_TEST_FOR_EXCEPTION(bA01->Rows() != bAinv->Cols(), Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: Block rows and cols of bA01 and bAinv are not compatible."); - RCP C = MatrixMatrix::TwoMatrixMultiplyBlock(*bAinv, false, *bA01, false, GetOStream(Statistics2)); - - // -1/omega*A10*Ainv*A01 - TEUCHOS_TEST_FOR_EXCEPTION(bA10->Rows() != bA01->Cols(), Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: Block rows and cols of bA10 and bA01 are not compatible."); - D = MatrixMatrix::TwoMatrixMultiplyBlock(*bA10, false, *C, false, GetOStream(Statistics2)); - } - if (!A11.is_null()) { - MatrixMatrix::TwoMatrixAdd(*A11, false, one, *D, false, one, S, GetOStream(Statistics2)); - S->fillComplete(); - - TEUCHOS_TEST_FOR_EXCEPTION(A11->getRangeMap()->isSameAs(*(S->getRangeMap())) == false, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: RangeMap of A11 and S are not the same."); - TEUCHOS_TEST_FOR_EXCEPTION(A11->getDomainMap()->isSameAs(*(S->getDomainMap())) == false, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: DomainMap of A11 and S are not the same."); - } - else { - S = MatrixFactory::BuildCopy(D); - } +template +RCP SchurComplementFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + const SC one = Teuchos::ScalarTraits::one(); + + validParamList->set>("A", NoFactory::getRCP(), "Generating factory of the matrix A used for building Schur complement (must be a 2x2 blocked operator)"); + validParamList->set>("Ainv", Teuchos::null, "Generating factory of the inverse matrix used in the Schur complement"); + + validParamList->set("omega", one, "Scaling parameter in S = A(1,1) - 1/omega A(1,0) Ainv A(0,1)"); + + return validParamList; +} + +template +void SchurComplementFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + + // Get default or user-given inverse approximation factory + RCP AinvFact = GetFactory("Ainv"); + currentLevel.DeclareInput("Ainv", AinvFact.get(), this); +} + +template +void SchurComplementFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + RCP A = Get>(currentLevel, "A"); + RCP bA = rcp_dynamic_cast(A); + + TEUCHOS_TEST_FOR_EXCEPTION(bA.is_null(), Exceptions::BadCast, + "MueLu::SchurComplementFactory::Build: input matrix A is not of type BlockedCrsMatrix!"); + TEUCHOS_TEST_FOR_EXCEPTION(bA->Rows() != 2 || bA->Cols() != 2, Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: input matrix A is a " << bA->Rows() << "x" << bA->Cols() << " block matrix. We expect a 2x2 blocked operator."); + + // Calculate Schur Complement + RCP Ainv = currentLevel.Get>("Ainv", this->GetFactory("Ainv").get()); + RCP S = ComputeSchurComplement(bA, Ainv); + + GetOStream(Statistics1) << "S has " << S->getGlobalNumRows() << "x" << S->getGlobalNumCols() << " rows and columns." << std::endl; + + // NOTE: "A" generated by this factory is actually the Schur complement + // matrix, but it is required as all smoothers expect "A" + Set(currentLevel, "A", S); +} + +template +RCP> +SchurComplementFactory::ComputeSchurComplement(RCP& bA, RCP& Ainv) const { + using STS = Teuchos::ScalarTraits; + const SC zero = STS::zero(), one = STS::one(); + + RCP A01 = bA->getMatrix(0, 1); + RCP A10 = bA->getMatrix(1, 0); + RCP A11 = bA->getMatrix(1, 1); + + RCP bA01 = Teuchos::rcp_dynamic_cast(A01); + const bool isBlocked = (bA01 == Teuchos::null ? false : true); + + const ParameterList& pL = GetParameterList(); + const SC omega = pL.get("omega"); + + TEUCHOS_TEST_FOR_EXCEPTION(omega == zero, Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: Scaling parameter omega must not be zero to avoid division by zero."); + + RCP S = Teuchos::null; // Schur complement + RCP D = Teuchos::null; // temporary result for A10*Ainv*A01 + + // only if the off-diagonal blocks A10 and A01 are non-zero we have to do the MM multiplication + if (A01.is_null() == false && A10.is_null() == false) { + // scale with -1/omega + Ainv->scale(Teuchos::as(-one / omega)); + + // build Schur complement operator + if (!isBlocked) { + RCP myparams = rcp(new ParameterList); + myparams->set("compute global constants", true); + + // -1/omega*Ainv*A01 + TEUCHOS_TEST_FOR_EXCEPTION(A01->getRangeMap()->isSameAs(*(Ainv->getDomainMap())) == false, Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: RangeMap of A01 and domain map of Ainv are not the same."); + RCP C = MatrixMatrix::Multiply(*Ainv, false, *A01, false, GetOStream(Statistics2), true, true, std::string("SchurComplementFactory"), myparams); + + // -1/omega*A10*Ainv*A01 + TEUCHOS_TEST_FOR_EXCEPTION(A01->getRangeMap()->isSameAs(*(A10->getDomainMap())) == false, Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: RangeMap of A10 and domain map A01 are not the same."); + D = MatrixMatrix::Multiply(*A10, false, *C, false, GetOStream(Statistics2), true, true, std::string("SchurComplementFactory"), myparams); + } else { + // nested blocking + auto bA10 = Teuchos::rcp_dynamic_cast(A10); + auto bAinv = Teuchos::rcp_dynamic_cast(Ainv); + TEUCHOS_TEST_FOR_EXCEPTION(bAinv == Teuchos::null, Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: Casting Ainv to BlockedCrsMatrix not possible."); + + // -1/omega*bAinv*bA01 + TEUCHOS_TEST_FOR_EXCEPTION(bA01->Rows() != bAinv->Cols(), Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: Block rows and cols of bA01 and bAinv are not compatible."); + RCP C = MatrixMatrix::TwoMatrixMultiplyBlock(*bAinv, false, *bA01, false, GetOStream(Statistics2)); + + // -1/omega*A10*Ainv*A01 + TEUCHOS_TEST_FOR_EXCEPTION(bA10->Rows() != bA01->Cols(), Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: Block rows and cols of bA10 and bA01 are not compatible."); + D = MatrixMatrix::TwoMatrixMultiplyBlock(*bA10, false, *C, false, GetOStream(Statistics2)); } - else { - if (!A11.is_null()) { - S = MatrixFactory::BuildCopy(A11); - } else { - S = MatrixFactory::Build(A11->getRowMap(), 10 /*A11->getLocalMaxNumRowEntries()*/); - S->fillComplete(A11->getDomainMap(),A11->getRangeMap()); - } + if (!A11.is_null()) { + MatrixMatrix::TwoMatrixAdd(*A11, false, one, *D, false, one, S, GetOStream(Statistics2)); + S->fillComplete(); + + TEUCHOS_TEST_FOR_EXCEPTION(A11->getRangeMap()->isSameAs(*(S->getRangeMap())) == false, Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: RangeMap of A11 and S are not the same."); + TEUCHOS_TEST_FOR_EXCEPTION(A11->getDomainMap()->isSameAs(*(S->getDomainMap())) == false, Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: DomainMap of A11 and S are not the same."); + } else { + S = MatrixFactory::BuildCopy(D); } - - // Check whether Schur complement operator is a 1x1 block matrix. - // If so, unwrap it and return the CrsMatrix based Matrix object - // We need this, as single-block smoothers expect it this way. - // In case of Thyra GIDs we obtain a Schur complement operator in Thyra GIDs - // This may make some special handling in feeding the SchurComplement solver Apply routine - // necessary! - if (isBlocked) { - RCP bS = Teuchos::rcp_dynamic_cast(S); - - if (bS != Teuchos::null && bS->Rows() == 1 && bS->Cols() == 1) { - RCP temp = bS->getCrsMatrix(); - S.swap(temp); - } + } else { + if (!A11.is_null()) { + S = MatrixFactory::BuildCopy(A11); + } else { + S = MatrixFactory::Build(A11->getRowMap(), 10 /*A11->getLocalMaxNumRowEntries()*/); + S->fillComplete(A11->getDomainMap(), A11->getRangeMap()); } + } - return S; + // Check whether Schur complement operator is a 1x1 block matrix. + // If so, unwrap it and return the CrsMatrix based Matrix object + // We need this, as single-block smoothers expect it this way. + // In case of Thyra GIDs we obtain a Schur complement operator in Thyra GIDs + // This may make some special handling in feeding the SchurComplement solver Apply routine + // necessary! + if (isBlocked) { + RCP bS = Teuchos::rcp_dynamic_cast(S); + + if (bS != Teuchos::null && bS->Rows() == 1 && bS->Cols() == 1) { + RCP temp = bS->getCrsMatrix(); + S.swap(temp); + } } -} // namespace MueLu + return S; +} + +} // namespace MueLu #endif /* MUELU_SCHURCOMPLEMENTFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_SegregatedAFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_SegregatedAFactory_decl.hpp index 698be7f781aa..18f87c51dd0c 100644 --- a/packages/muelu/src/Misc/MueLu_SegregatedAFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_SegregatedAFactory_decl.hpp @@ -55,62 +55,60 @@ namespace MueLu { - /*! - @class SegregatedAFactory class. - @brief Factory for building a new "segregated" A operator. Here, "segregated" means that the user - provides a map (containing a subset of the row gids of the input matrix A) and the factory - drops the off-diagonal entries (a,b) and (b,a) in A where "a" denotes a GID entry in the provided map - and "b" denotes a GID that is not contained in the provided map. - - The idea is to use the output matrix A as input for the aggregation factory to have control over - the aggregates and make sure that aggregates do not cross certain areas. - - Note: we have to drop the entries (i.e. not just set them to zero) as the CoalesceDropFactory - does not distinguish between matrix entries which are zero and nonzero. - */ - - template - class SegregatedAFactory : public SingleLevelFactoryBase { +/*! + @class SegregatedAFactory class. + @brief Factory for building a new "segregated" A operator. Here, "segregated" means that the user + provides a map (containing a subset of the row gids of the input matrix A) and the factory + drops the off-diagonal entries (a,b) and (b,a) in A where "a" denotes a GID entry in the provided map + and "b" denotes a GID that is not contained in the provided map. + + The idea is to use the output matrix A as input for the aggregation factory to have control over + the aggregates and make sure that aggregates do not cross certain areas. + + Note: we have to drop the entries (i.e. not just set them to zero) as the CoalesceDropFactory + does not distinguish between matrix entries which are zero and nonzero. +*/ + +template +class SegregatedAFactory : public SingleLevelFactoryBase { #undef MUELU_SEGREGATEDAFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! Constructor. - SegregatedAFactory() = default; + public: + //! Constructor. + SegregatedAFactory() = default; - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! - @brief Build method. - - Builds filtered matrix and returns it in currentLevel. - */ - void Build(Level& currentLevel) const; + /*! + @brief Build method. - //@} + Builds filtered matrix and returns it in currentLevel. + */ + void Build(Level& currentLevel) const; - private: + //@} - //! Generating factory of input variable - mutable RCP mapFact_; + private: + //! Generating factory of input variable + mutable RCP mapFact_; - }; //class SegregatedAFactory +}; // class SegregatedAFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_SEGREGATEDAFACTORY_SHORT -#endif // MUELU_SEGREGATEDAFACTORY_DECL_HPP +#endif // MUELU_SEGREGATEDAFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_SegregatedAFactory_def.hpp b/packages/muelu/src/Misc/MueLu_SegregatedAFactory_def.hpp index c2993c84253b..27d2daee78a3 100644 --- a/packages/muelu/src/Misc/MueLu_SegregatedAFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_SegregatedAFactory_def.hpp @@ -57,112 +57,109 @@ namespace MueLu { - template - RCP SegregatedAFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP SegregatedAFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) -#undef SET_VALID_ENTRY +#undef SET_VALID_ENTRY - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A used for filtering"); + validParamList->set>("A", Teuchos::null, "Generating factory of the matrix A used for filtering"); - validParamList->set< std::string > ("map: name", "", "Name of map (Xpetra::Map) provided by user containing the special DOFs."); - validParamList->set< std::string > ("map: factory", "", "Name of generating factory for 'map: name'"); + validParamList->set("map: name", "", "Name of map (Xpetra::Map) provided by user containing the special DOFs."); + validParamList->set("map: factory", "", "Name of generating factory for 'map: name'"); - return validParamList; - } + return validParamList; +} - template - void SegregatedAFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); +template +void SegregatedAFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); - const ParameterList& pL = GetParameterList(); - std::string mapName = pL.get ("map: name"); - std::string mapFactName = pL.get ("map: factory"); + const ParameterList& pL = GetParameterList(); + std::string mapName = pL.get("map: name"); + std::string mapFactName = pL.get("map: factory"); - if (currentLevel.GetLevelID() == 0) - { - // Not needed, if the map is provided as user data - currentLevel.DeclareInput(mapName, NoFactory::get(), this); - } - else - { - // check whether user has provided a specific name for the MapFactory - if (mapFactName == "" || mapFactName == "NoFactory") - mapFact_ = MueLu::NoFactory::getRCP(); - else if (mapFactName != "null") - mapFact_ = currentLevel.GetFactoryManager()->GetFactory(mapFactName); - - // request map generated by mapFact_ - currentLevel.DeclareInput(mapName, mapFact_.get(), this); - } + if (currentLevel.GetLevelID() == 0) { + // Not needed, if the map is provided as user data + currentLevel.DeclareInput(mapName, NoFactory::get(), this); + } else { + // check whether user has provided a specific name for the MapFactory + if (mapFactName == "" || mapFactName == "NoFactory") + mapFact_ = MueLu::NoFactory::getRCP(); + else if (mapFactName != "null") + mapFact_ = currentLevel.GetFactoryManager()->GetFactory(mapFactName); + + // request map generated by mapFact_ + currentLevel.DeclareInput(mapName, mapFact_.get(), this); + } +} + +template +void SegregatedAFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Matrix filtering (segregation)", currentLevel); + + RCP Ain = Get>(currentLevel, "A"); + + const ParameterList& pL = GetParameterList(); + std::string mapName = pL.get("map: name"); + std::string mapFact = pL.get("map: factory"); + + // fetch map from level + RCP map = Teuchos::null; + if (currentLevel.GetLevelID() == 0) { + map = currentLevel.Get>(mapName, NoFactory::get()); + GetOStream(Statistics0) << "User provided map \"" << mapName << "\": length dimension=" << map->getGlobalNumElements() << std::endl; + } else { + if (currentLevel.IsAvailable(mapName, mapFact_.get()) == false) + GetOStream(Runtime0) << "User provided map \"" << mapName << "\" not found in Level class on level " << currentLevel.GetLevelID() << "." << std::endl; + map = currentLevel.Get>(mapName, mapFact_.get()); } - template - void SegregatedAFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Matrix filtering (segregation)", currentLevel); - - RCP Ain = Get< RCP >(currentLevel, "A"); - - const ParameterList& pL = GetParameterList(); - std::string mapName = pL.get ("map: name"); - std::string mapFact = pL.get ("map: factory"); - - // fetch map from level - RCP map = Teuchos::null; - if (currentLevel.GetLevelID() == 0) { - map = currentLevel.Get>(mapName, NoFactory::get()); - GetOStream(Statistics0) << "User provided map \"" << mapName << "\": length dimension=" << map->getGlobalNumElements() << std::endl; - } else { - if (currentLevel.IsAvailable(mapName, mapFact_.get()) == false) - GetOStream(Runtime0) << "User provided map \"" << mapName << "\" not found in Level class on level " << currentLevel.GetLevelID() << "." << std::endl; - map = currentLevel.Get>(mapName, mapFact_.get()); - } + // create new empty Operator + Teuchos::RCP Aout = MatrixFactory::Build(Ain->getRowMap(), Ain->getGlobalMaxNumRowEntries()); - // create new empty Operator - Teuchos::RCP Aout = MatrixFactory::Build(Ain->getRowMap(), Ain->getGlobalMaxNumRowEntries()); - - size_t numLocalRows = Ain->getLocalNumRows(); - for(size_t row=0; rowgetRowMap()->getGlobalElement(row); // global row id - bool isInMap = map->isNodeGlobalElement(grid); - - // extract row information from input matrix - Teuchos::ArrayView indices; - Teuchos::ArrayView vals; - Ain->getLocalRowView(row, indices, vals); - - // just copy all values in output - Teuchos::ArrayRCP indout(indices.size(),Teuchos::ScalarTraits::zero()); - Teuchos::ArrayRCP valout(indices.size(),Teuchos::ScalarTraits::zero()); - - size_t nNonzeros = 0; - for(size_t i=0; i<(size_t)indices.size(); i++) { // or can be parallelize this loop? - GlobalOrdinal gcid = Ain->getColMap()->getGlobalElement(indices[i]); // global column id - bool isInMap2 = map->isNodeGlobalElement(gcid); - - if (isInMap == isInMap2) { - indout [nNonzeros] = gcid; - valout [nNonzeros] = vals[i]; - nNonzeros++; - } - } - indout.resize(nNonzeros); - valout.resize(nNonzeros); + size_t numLocalRows = Ain->getLocalNumRows(); + for (size_t row = 0; row < numLocalRows; row++) { // how can i replace this by a parallel for? + GlobalOrdinal grid = Ain->getRowMap()->getGlobalElement(row); // global row id + bool isInMap = map->isNodeGlobalElement(grid); - Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size())); - } + // extract row information from input matrix + Teuchos::ArrayView indices; + Teuchos::ArrayView vals; + Ain->getLocalRowView(row, indices, vals); - Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap()); + // just copy all values in output + Teuchos::ArrayRCP indout(indices.size(), Teuchos::ScalarTraits::zero()); + Teuchos::ArrayRCP valout(indices.size(), Teuchos::ScalarTraits::zero()); - // copy block size information - Aout->SetFixedBlockSize(Ain->GetFixedBlockSize()); + size_t nNonzeros = 0; + for (size_t i = 0; i < (size_t)indices.size(); i++) { // or can be parallelize this loop? + GlobalOrdinal gcid = Ain->getColMap()->getGlobalElement(indices[i]); // global column id + bool isInMap2 = map->isNodeGlobalElement(gcid); - GetOStream(Statistics0, 0) << "Nonzeros in A (input): " << Ain->getGlobalNumEntries() << ", Nonzeros after filtering A: " << Aout->getGlobalNumEntries() << std::endl; + if (isInMap == isInMap2) { + indout[nNonzeros] = gcid; + valout[nNonzeros] = vals[i]; + nNonzeros++; + } + } + indout.resize(nNonzeros); + valout.resize(nNonzeros); - Set(currentLevel, "A", Aout); + Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), indout.view(0, indout.size()), valout.view(0, valout.size())); } -} //namespace MueLu + Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap()); + + // copy block size information + Aout->SetFixedBlockSize(Ain->GetFixedBlockSize()); + + GetOStream(Statistics0, 0) << "Nonzeros in A (input): " << Ain->getGlobalNumEntries() << ", Nonzeros after filtering A: " << Aout->getGlobalNumEntries() << std::endl; + + Set(currentLevel, "A", Aout); +} + +} // namespace MueLu -#endif // MUELU_SEGREGATEDAFACTORY_DEF_HPP +#endif // MUELU_SEGREGATEDAFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_decl.hpp index 86491d0ad5af..cf177680a5bd 100644 --- a/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_decl.hpp @@ -54,57 +54,55 @@ namespace MueLu { - /*! - @class StructuredLineDetectionFactory class. - @brief Factory building line detection information on structured meshes - */ - - template - class StructuredLineDetectionFactory : public SingleLevelFactoryBase { +/*! + @class StructuredLineDetectionFactory class. + @brief Factory building line detection information on structured meshes +*/ + +template +class StructuredLineDetectionFactory : public SingleLevelFactoryBase { #undef MUELU_STRUCTUREDLINEDETECTIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - StructuredLineDetectionFactory() { } + StructuredLineDetectionFactory() {} - //! Destructor. - virtual ~StructuredLineDetectionFactory() { } + //! Destructor. + virtual ~StructuredLineDetectionFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! - @brief Build method. - - Builds line detection information and stores it in currentLevel - */ - void Build(Level& currentLevel) const; + /*! + @brief Build method. - //@} + Builds line detection information and stores it in currentLevel + */ + void Build(Level& currentLevel) const; - private: + //@} - }; //class StructuredLineDetectionFactory + private: +}; // class StructuredLineDetectionFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_STRUCTUREDLINEDETECTIONFACTORY_SHORT -#endif // MUELU_STRUCTUREDLINEDETECTIONFACTORY_DECL_HPP +#endif // MUELU_STRUCTUREDLINEDETECTIONFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_def.hpp b/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_def.hpp index 86c2e0493498..54c6dc7a5116 100644 --- a/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_def.hpp @@ -54,76 +54,75 @@ namespace MueLu { - template - RCP StructuredLineDetectionFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< std::string > ("orientation", "Z", "Lines orientation"); - validParamList->set< RCP >("lNodesPerDim", Teuchos::null, "Number of nodes per spatial dimension provided by CoordinatesTransferFactory."); - - return validParamList; - } - - template - void StructuredLineDetectionFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - // Request the global number of nodes per dimensions - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { - currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("gNodesPerDim", NoFactory::get()), - Exceptions::RuntimeError, - "lNodesPerDim was not provided by the user on level0!"); - } +template +RCP StructuredLineDetectionFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set("orientation", "Z", "Lines orientation"); + validParamList->set >("lNodesPerDim", Teuchos::null, "Number of nodes per spatial dimension provided by CoordinatesTransferFactory."); + + return validParamList; +} + +template +void StructuredLineDetectionFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + // Request the global number of nodes per dimensions + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { + currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); } else { - Input(currentLevel, "lNodesPerDim"); + TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("gNodesPerDim", NoFactory::get()), + Exceptions::RuntimeError, + "lNodesPerDim was not provided by the user on level0!"); } + } else { + Input(currentLevel, "lNodesPerDim"); + } +} + +template +void StructuredLineDetectionFactory::Build(Level& currentLevel) const { + // The following three variables are needed by the line smoothers in Ifpack/Ifpack2 + LO NumZDir = 0; + Teuchos::ArrayRCP VertLineId = Teuchos::arcp(0); + + // collect information provided by user + const ParameterList& pL = GetParameterList(); + const std::string lineOrientation = pL.get("orientation"); + + // Extract data from currentLevel + RCP A = Get >(currentLevel, "A"); + Array lNodesPerDir = Get >(currentLevel, "lNodesPerDim"); + LO numNodes = lNodesPerDir[0] * lNodesPerDir[1] * lNodesPerDir[2]; + VertLineId.resize(numNodes); + if (lineOrientation == "X") { + NumZDir = lNodesPerDir[0]; + } else if (lineOrientation == "Y") { + NumZDir = lNodesPerDir[1]; + } else if (lineOrientation == "Z") { + NumZDir = lNodesPerDir[2]; } - template - void StructuredLineDetectionFactory::Build(Level& currentLevel) const { - - // The following three variables are needed by the line smoothers in Ifpack/Ifpack2 - LO NumZDir = 0; - Teuchos::ArrayRCP VertLineId = Teuchos::arcp(0); - - // collect information provided by user - const ParameterList& pL = GetParameterList(); - const std::string lineOrientation = pL.get("orientation"); - - // Extract data from currentLevel - RCP A = Get< RCP >(currentLevel, "A"); - Array lNodesPerDir = Get > (currentLevel, "lNodesPerDim"); - LO numNodes = lNodesPerDir[0]*lNodesPerDir[1]*lNodesPerDir[2]; - VertLineId.resize(numNodes); - if(lineOrientation == "X") { - NumZDir = lNodesPerDir[0]; - } else if(lineOrientation == "Y") { - NumZDir = lNodesPerDir[1]; - } else if(lineOrientation == "Z") { - NumZDir = lNodesPerDir[2]; - } - - for(LO k = 0; k < lNodesPerDir[2]; ++k) { - for(LO j = 0; j < lNodesPerDir[1]; ++j) { - for(LO i = 0; i < lNodesPerDir[0]; ++i) { - if(lineOrientation == "X") { - VertLineId[k*lNodesPerDir[1]*lNodesPerDir[0] + j*lNodesPerDir[0] + i] = k*lNodesPerDir[1] + j; - } else if(lineOrientation == "Y") { - VertLineId[k*lNodesPerDir[1]*lNodesPerDir[0] + j*lNodesPerDir[0] + i] = k*lNodesPerDir[0] + i; - } else if(lineOrientation == "Z") { - VertLineId[k*lNodesPerDir[1]*lNodesPerDir[0] + j*lNodesPerDir[0] + i] = j*lNodesPerDir[0] + i; - } + for (LO k = 0; k < lNodesPerDir[2]; ++k) { + for (LO j = 0; j < lNodesPerDir[1]; ++j) { + for (LO i = 0; i < lNodesPerDir[0]; ++i) { + if (lineOrientation == "X") { + VertLineId[k * lNodesPerDir[1] * lNodesPerDir[0] + j * lNodesPerDir[0] + i] = k * lNodesPerDir[1] + j; + } else if (lineOrientation == "Y") { + VertLineId[k * lNodesPerDir[1] * lNodesPerDir[0] + j * lNodesPerDir[0] + i] = k * lNodesPerDir[0] + i; + } else if (lineOrientation == "Z") { + VertLineId[k * lNodesPerDir[1] * lNodesPerDir[0] + j * lNodesPerDir[0] + i] = j * lNodesPerDir[0] + i; } } } - - Set(currentLevel, "CoarseNumZLayers", NumZDir); - Set(currentLevel, "LineDetection_VertLineIds", VertLineId); } -} //namespace MueLu + Set(currentLevel, "CoarseNumZLayers", NumZDir); + Set(currentLevel, "LineDetection_VertLineIds", VertLineId); +} + +} // namespace MueLu -#endif // MUELU_STRUCTUREDLINEDETECTIONFACTORY_DEF_HPP +#endif // MUELU_STRUCTUREDLINEDETECTIONFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_decl.hpp index 5f798befa2e8..5f57aac97893 100644 --- a/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_decl.hpp @@ -59,52 +59,51 @@ namespace MueLu { - /*! - @class ThresholdAFilterFactory class. - @brief Factory for building a thresholded operator. +/*! + @class ThresholdAFilterFactory class. + @brief Factory for building a thresholded operator. - */ +*/ - template - class ThresholdAFilterFactory : public SingleLevelFactoryBase { +template +class ThresholdAFilterFactory : public SingleLevelFactoryBase { #undef MUELU_THRESHOLDAFILTERFACTORY_SHORT - #include "MueLu_UseShortNames.hpp" +#include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - ThresholdAFilterFactory(const std::string& ename, const Scalar threshold, const bool keepDiagonal=true, const GlobalOrdinal expectedNNZperRow=-1); + //! Constructor. + ThresholdAFilterFactory(const std::string& ename, const Scalar threshold, const bool keepDiagonal = true, const GlobalOrdinal expectedNNZperRow = -1); - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level ¤tLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //@{ - //! @name Build methods. + //@{ + //! @name Build methods. - //! Build an object with this factory. - void Build(Level & currentLevel) const; + //! Build an object with this factory. + void Build(Level& currentLevel) const; - //@} + //@} - private: - std::string varName_; ///< name of input and output variable - const Scalar threshold_; ///< threshold parameter - const bool keepDiagonal_; - const GlobalOrdinal expectedNNZperRow_; + private: + std::string varName_; ///< name of input and output variable + const Scalar threshold_; ///< threshold parameter + const bool keepDiagonal_; + const GlobalOrdinal expectedNNZperRow_; +}; // class ThresholdAFilterFactory - }; // class ThresholdAFilterFactory - -} // namespace MueLu +} // namespace MueLu #define MUELU_THRESHOLDAFILTERFACTORY_SHORT -#endif // MUELU_THRESHOLDAFILTERFACTORY_DECL_HPP +#endif // MUELU_THRESHOLDAFILTERFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_def.hpp b/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_def.hpp index 10da9befa53d..ad8621640bb4 100644 --- a/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_def.hpp @@ -56,30 +56,31 @@ namespace MueLu { - template - ThresholdAFilterFactory::ThresholdAFilterFactory(const std::string& ename, const Scalar threshold, const bool keepDiagonal, const GlobalOrdinal expectedNNZperRow) - : varName_(ename), threshold_(threshold), keepDiagonal_(keepDiagonal), expectedNNZperRow_(expectedNNZperRow) - { } +template +ThresholdAFilterFactory::ThresholdAFilterFactory(const std::string& ename, const Scalar threshold, const bool keepDiagonal, const GlobalOrdinal expectedNNZperRow) + : varName_(ename) + , threshold_(threshold) + , keepDiagonal_(keepDiagonal) + , expectedNNZperRow_(expectedNNZperRow) {} - template - void ThresholdAFilterFactory::DeclareInput(Level ¤tLevel) const { - Input(currentLevel, varName_); - } +template +void ThresholdAFilterFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, varName_); +} - template - void ThresholdAFilterFactory:: - Build (Level & currentLevel) const - { - FactoryMonitor m (*this, "A filter (thresholding)", currentLevel); +template +void ThresholdAFilterFactory:: + Build(Level& currentLevel) const { + FactoryMonitor m(*this, "A filter (thresholding)", currentLevel); - RCP Ain = Get< RCP >(currentLevel, varName_); - RCP Aout = - MueLu::Utilities::GetThresholdedMatrix(Ain, threshold_, keepDiagonal_, expectedNNZperRow_); + RCP Ain = Get >(currentLevel, varName_); + RCP Aout = + MueLu::Utilities::GetThresholdedMatrix(Ain, threshold_, keepDiagonal_, expectedNNZperRow_); - GetOStream(Statistics0) << "Nonzeros in " << varName_ << "(input): " << Ain->getGlobalNumEntries() << ", Nonzeros after filtering " << varName_ << " (parameter: " << threshold_ << "): " << Aout->getGlobalNumEntries() << std::endl; - currentLevel.Set(varName_, Teuchos::rcp_dynamic_cast(Aout), this); - } + GetOStream(Statistics0) << "Nonzeros in " << varName_ << "(input): " << Ain->getGlobalNumEntries() << ", Nonzeros after filtering " << varName_ << " (parameter: " << threshold_ << "): " << Aout->getGlobalNumEntries() << std::endl; + currentLevel.Set(varName_, Teuchos::rcp_dynamic_cast(Aout), this); +} -} // namespace MueLu +} // namespace MueLu -#endif // MUELU_THRESHOLDAFILTERFACTORY_DEF_HPP +#endif // MUELU_THRESHOLDAFILTERFACTORY_DEF_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_BaseClass.hpp b/packages/muelu/src/MueCentral/MueLu_BaseClass.hpp index d291d05c4a9f..4b3f61292bca 100644 --- a/packages/muelu/src/MueCentral/MueLu_BaseClass.hpp +++ b/packages/muelu/src/MueCentral/MueLu_BaseClass.hpp @@ -52,44 +52,42 @@ namespace MueLu { - /*! - @class BaseClass class. - @brief Base class for MueLu classes +/*! + @class BaseClass class. + @brief Base class for MueLu classes - @ingroup MueLuBaseClasses - */ - class BaseClass - : public VerboseObject, public Describable - { + @ingroup MueLuBaseClasses +*/ +class BaseClass + : public VerboseObject, + public Describable { + public: + //! @name Constructors/Destructors + //@{ - public: + //! Destructor. + virtual ~BaseClass() {} - //! @name Constructors/Destructors - //@{ + //@} - //! Destructor. - virtual ~BaseClass() {} +}; // class BaseClass - //@} - - }; // class BaseClass - -} // namespace MueLu +} // namespace MueLu //! Helper macro for implementing Describable::describe() for BaseClass objects. // This macro defines ostream out0 that print only on root node. It print description() and indent the ostream. // Note: Runtime1 displays basic parameter information when Parameters0 is not enabled. -#define MUELU_DESCRIBE \ - using std::endl; \ +#define MUELU_DESCRIBE \ + using std::endl; \ Teuchos::FancyOStream& out0 = (VerboseObject::GetProcRankVerbose() == 0) ? out : VerboseObject::GetBlackHole(); \ - \ - if ((verbLevel & Runtime1) && (!(verbLevel & Parameters0))) \ - out << description() << std::endl; \ - else if (verbLevel & Runtime0) \ - out << BaseClass::description() << std::endl; \ - \ - Teuchos::OSTab tab1(out); \ + \ + if ((verbLevel & Runtime1) && (!(verbLevel & Parameters0))) \ + out << description() << std::endl; \ + else if (verbLevel & Runtime0) \ + out << BaseClass::description() << std::endl; \ + \ + Teuchos::OSTab tab1(out); \ // #define MUELU_BASECLASS_SHORT -#endif // ifndef MUELU_BASECLASS_HPP +#endif // ifndef MUELU_BASECLASS_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_Describable.cpp b/packages/muelu/src/MueCentral/MueLu_Describable.cpp index c2bba36bc122..88d04d8023b3 100644 --- a/packages/muelu/src/MueCentral/MueLu_Describable.cpp +++ b/packages/muelu/src/MueCentral/MueLu_Describable.cpp @@ -50,51 +50,50 @@ namespace MueLu { - Describable::~Describable() { } +Describable::~Describable() {} - void Describable::describe(Teuchos::FancyOStream &out_arg, const VerbLevel /* verbLevel */) const { - Teuchos::RCP out = rcp(&out_arg,false); //JG: no idea why we have to do that, but it's how Teuchos::Describable::describe() is implemented - Teuchos::OSTab tab(out); - *out << this->description() << std::endl; - } +void Describable::describe(Teuchos::FancyOStream &out_arg, const VerbLevel /* verbLevel */) const { + Teuchos::RCP out = rcp(&out_arg, false); // JG: no idea why we have to do that, but it's how Teuchos::Describable::describe() is implemented + Teuchos::OSTab tab(out); + *out << this->description() << std::endl; +} - std::string Describable::description() const { - std::string str = Teuchos::Describable::description(); +std::string Describable::description() const { + std::string str = Teuchos::Describable::description(); - // remove template parameters - size_t found = str.find_first_of("<"); - if (found != std::string::npos) - return str.substr(0, found); + // remove template parameters + size_t found = str.find_first_of("<"); + if (found != std::string::npos) + return str.substr(0, found); - return str; - } + return str; +} - void Describable::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const { describe(out, toMueLuVerbLevel(verbLevel)); } +void Describable::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const { describe(out, toMueLuVerbLevel(verbLevel)); } - std::string Describable::ShortClassName() const { - if ( shortClassName_.empty() ) - { - std::string str = Teuchos::Describable::description(); +std::string Describable::ShortClassName() const { + if (shortClassName_.empty()) { + std::string str = Teuchos::Describable::description(); - // remove template parameters - { - size_t found = str.find_first_of("<"); - if (found != std::string::npos) - str = str.substr(0, found); - } + // remove template parameters + { + size_t found = str.find_first_of("<"); + if (found != std::string::npos) + str = str.substr(0, found); + } - // remove namespace - { - size_t found = str.find_last_of(":"); - if (found != std::string::npos) - str = str.substr(found+1); - } - shortClassName_ = str; - } - return shortClassName_; + // remove namespace + { + size_t found = str.find_last_of(":"); + if (found != std::string::npos) + str = str.substr(found + 1); } + shortClassName_ = str; + } + return shortClassName_; +} -} // namespace MueLu +} // namespace MueLu #define MUELU_DESCRIBABLE_SHORT -#endif // MUELU_DESCRIBABLE_HPP +#endif // MUELU_DESCRIBABLE_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_Describable.hpp b/packages/muelu/src/MueCentral/MueLu_Describable.hpp index 7dbb4dc08811..e6341b8246cc 100644 --- a/packages/muelu/src/MueCentral/MueLu_Describable.hpp +++ b/packages/muelu/src/MueCentral/MueLu_Describable.hpp @@ -46,55 +46,53 @@ #ifndef MUELU_DESCRIBABLE_DECL_HPP #define MUELU_DESCRIBABLE_DECL_HPP -#include // for string -#include "Teuchos_FancyOStream.hpp" // for FancyOStream -#include "Teuchos_VerbosityLevel.hpp" // for EVerbosityLevel +#include // for string +#include "Teuchos_FancyOStream.hpp" // for FancyOStream +#include "Teuchos_VerbosityLevel.hpp" // for EVerbosityLevel #include "Teuchos_Describable.hpp" #include "MueLu_VerbosityLevel.hpp" namespace MueLu { - /*! - @class Describable - @brief Base class for MueLu classes +/*! + @class Describable + @brief Base class for MueLu classes - @ingroup MueLuBaseClasses - */ - class Describable - : public Teuchos::Describable - { - mutable std::string shortClassName_ = ""; // cached so that we don't have to call demangleName() every time; mutable so that ShortClassName() can initialize lazily while remaining const + @ingroup MueLuBaseClasses +*/ +class Describable + : public Teuchos::Describable { + mutable std::string shortClassName_ = ""; // cached so that we don't have to call demangleName() every time; mutable so that ShortClassName() can initialize lazily while remaining const - public: + public: + //! Destructor. + virtual ~Describable(); - //! Destructor. - virtual ~Describable(); + //! @name MueLu Describe + //@{ - //! @name MueLu Describe - //@{ + virtual void describe(Teuchos::FancyOStream &out_arg, const VerbLevel verbLevel = Default) const; - virtual void describe(Teuchos::FancyOStream &out_arg, const VerbLevel verbLevel = Default) const; + //@} - //@} + //! @name Overridden from Teuchos::Describable + //@{ - //! @name Overridden from Teuchos::Describable - //@{ + //! Return a simple one-line description of this object. + virtual std::string description() const; - //! Return a simple one-line description of this object. - virtual std::string description() const; + //! Print the object with some verbosity level to an FancyOStream object. + void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel = Teuchos::Describable::verbLevel_default) const; - //! Print the object with some verbosity level to an FancyOStream object. - void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel = Teuchos::Describable::verbLevel_default) const; + //@} - //@} + //! Return the class name of the object, without template parameters and without namespace + virtual std::string ShortClassName() const; - //! Return the class name of the object, without template parameters and without namespace - virtual std::string ShortClassName() const; +}; // class Describable - }; // class Describable - -} // namespace MueLu +} // namespace MueLu #define MUELU_DESCRIBABLE_SHORT -#endif // MUELU_DESCRIBABLE_DECL_HPP +#endif // MUELU_DESCRIBABLE_DECL_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_Factory.cpp b/packages/muelu/src/MueCentral/MueLu_Factory.cpp index 64c7032ce5bb..4b099c244891 100644 --- a/packages/muelu/src/MueCentral/MueLu_Factory.cpp +++ b/packages/muelu/src/MueCentral/MueLu_Factory.cpp @@ -48,9 +48,9 @@ namespace MueLu { - bool Factory::timerSync_ = false; +bool Factory::timerSync_ = false; #ifdef HAVE_MUELU_DEBUG - Factory::multipleCallCheckEnum Factory::multipleCallCheckGlobal_ = ENABLED; +Factory::multipleCallCheckEnum Factory::multipleCallCheckGlobal_ = ENABLED; #endif -} // namespace MueLu +} // namespace MueLu diff --git a/packages/muelu/src/MueCentral/MueLu_Factory.hpp b/packages/muelu/src/MueCentral/MueLu_Factory.hpp index 0be93d76bdff..ef38498019c1 100644 --- a/packages/muelu/src/MueCentral/MueLu_Factory.hpp +++ b/packages/muelu/src/MueCentral/MueLu_Factory.hpp @@ -47,14 +47,14 @@ #define MUELU_FACTORY_HPP #include -#include // for _Deque_iterator, operator!= -#include // for operator<<, etc -#include "Teuchos_ENull.hpp" // for ENull::null +#include // for _Deque_iterator, operator!= +#include // for operator<<, etc +#include "Teuchos_ENull.hpp" // for ENull::null #include "Teuchos_FilteredIterator.hpp" // for FilteredIterator, etc -#include "Teuchos_ParameterEntry.hpp" // for ParameterEntry -#include "Teuchos_ParameterList.hpp" // for ParameterList, etc -#include "Teuchos_RCPDecl.hpp" // for RCP -#include "Teuchos_RCPNode.hpp" // for operator<< +#include "Teuchos_ParameterEntry.hpp" // for ParameterEntry +#include "Teuchos_ParameterList.hpp" // for ParameterList, etc +#include "Teuchos_RCPDecl.hpp" // for RCP +#include "Teuchos_RCPNode.hpp" // for operator<< #include "Teuchos_StringIndexedOrderedValueObjectContainer.hpp" #include "Teuchos_RCP.hpp" @@ -66,171 +66,170 @@ namespace MueLu { - class Factory : public FactoryBase, public FactoryAcceptor, public ParameterListAcceptorImpl { +class Factory : public FactoryBase, public FactoryAcceptor, public ParameterListAcceptorImpl { + public: + //@{ Constructors/Destructors. - public: - //@{ Constructors/Destructors. - - //! Constructor. - Factory() + //! Constructor. + Factory() #ifdef HAVE_MUELU_DEBUG - : multipleCallCheck_(FIRSTCALL), lastLevelID_(-1) + : multipleCallCheck_(FIRSTCALL) + , lastLevelID_(-1) #endif - { } - - //! Destructor. - virtual ~Factory() { } - //@} - - //@{ - //! Configuration - - //! SetFactory is for expert users only. To change configuration of the preconditioner, use a factory manager. - virtual void SetFactory(const std::string& varName, const RCP& factory) { - RCP f = factory; - SetParameter(varName, ParameterEntry(f)); // parameter validation done in ParameterListAcceptorImpl + { + } + + //! Destructor. + virtual ~Factory() {} + //@} + + //@{ + //! Configuration + + //! SetFactory is for expert users only. To change configuration of the preconditioner, use a factory manager. + virtual void SetFactory(const std::string& varName, const RCP& factory) { + RCP f = factory; + SetParameter(varName, ParameterEntry(f)); // parameter validation done in ParameterListAcceptorImpl + } + + //! Default implementation of FactoryAcceptor::GetFactory() + const RCP GetFactory(const std::string& varName) const { + // Special treatment for "NoFactory" + if (varName == "NoFactory") + return MueLu::NoFactory::getRCP(); + + if (!GetParameterList().isParameter(varName) && GetValidParameterList() == Teuchos::null) { + // If the parameter is not on the list and there is not validator, the defaults values for 'varName' is not set. + // Failback by using directly the FactoryManager + // NOTE: call to GetValidParameterList() can be costly for classes that validate parameters. + // But it get called only (lazy '&&' operator) if the parameter 'varName' is not on the paramlist and + // the parameter 'varName' is always on the list when validator is present and 'varName' is valid (at least the default value is set). + return Teuchos::null; } - //! Default implementation of FactoryAcceptor::GetFactory() - const RCP GetFactory(const std::string& varName) const { - - // Special treatment for "NoFactory" - if (varName == "NoFactory") - return MueLu::NoFactory::getRCP(); - - if (!GetParameterList().isParameter(varName)&& GetValidParameterList() == Teuchos::null) { - // If the parameter is not on the list and there is not validator, the defaults values for 'varName' is not set. - // Failback by using directly the FactoryManager - // NOTE: call to GetValidParameterList() can be costly for classes that validate parameters. - // But it get called only (lazy '&&' operator) if the parameter 'varName' is not on the paramlist and - // the parameter 'varName' is always on the list when validator is present and 'varName' is valid (at least the default value is set). - return Teuchos::null; - } - - return GetParameterList().get< RCP >(varName); + return GetParameterList().get >(varName); + } + + RCP RemoveFactoriesFromList(const ParameterList& list) const { + RCP paramList = rcp(new ParameterList(list)); + // Remove FactoryBase entries from the list + // The solution would be much more elegant if ParameterList support std::list like operations + // In that case, we could simply write: + // for (ParameterList::ConstIterator it = paramList.begin(); it != paramList.end(); it++) + // if (paramList.isType >(it->first)) + // it = paramList.erase(it); + // else + // it++; + ParameterList::ConstIterator it = paramList->begin(); + while (it != paramList->end()) { + it = paramList->begin(); + + for (; it != paramList->end(); it++) + if (paramList->isType >(it->first)) + paramList->remove(it->first); } - - RCP RemoveFactoriesFromList(const ParameterList& list) const { - RCP paramList = rcp(new ParameterList(list)); - // Remove FactoryBase entries from the list - // The solution would be much more elegant if ParameterList support std::list like operations - // In that case, we could simply write: - // for (ParameterList::ConstIterator it = paramList.begin(); it != paramList.end(); it++) - // if (paramList.isType >(it->first)) - // it = paramList.erase(it); - // else - // it++; - ParameterList::ConstIterator it = paramList->begin(); - while (it != paramList->end()) { - it = paramList->begin(); - - for (; it != paramList->end(); it++) - if (paramList->isType >(it->first)) - paramList->remove(it->first); - } - return paramList; - } - - // SetParameterList(...); - - // GetParameterList(...); - - //@} - - virtual RCP GetValidParameterList() const { - return Teuchos::null; // Teuchos::null == GetValidParameterList() not implemented == skip validation and no default values (dangerous) - } - - protected: - - void Input(Level& level, const std::string& varName) const { - level.DeclareInput(varName, GetFactory(varName).get(), this); - } - // Similar to the other Input, but we have an alias (varParamName) to the generated data name (varName) - void Input(Level& level, const std::string& varName, const std::string& varParamName) const { - level.DeclareInput(varName, GetFactory(varParamName).get(), this); - } - - template - T Get(Level& level, const std::string& varName) const { - return level.Get(varName, GetFactory(varName).get()); - } - // Similar to the other Get, but we have an alias (varParamName) to the generated data name (varName) - template - T Get(Level& level, const std::string& varName, const std::string& varParamName) const { - return level.Get(varName, GetFactory(varParamName).get()); - } - - template - void Set(Level& level, const std::string& varName, const T& data) const { - return level.Set(varName, data, this); - } - - template - bool IsType(Level& level, const std::string& varName) const { - return level.IsType(varName, GetFactory(varName).get()); - } - - bool IsAvailable(Level& level, const std::string& varName) const { - return level.IsAvailable(varName, GetFactory(varName).get()); - } - - public: - static void EnableTimerSync() { timerSync_ = true; } - static void DisableTimerSync() { timerSync_ = false; } - - protected: - static bool timerSync_; + return paramList; + } + + // SetParameterList(...); + + // GetParameterList(...); + + //@} + + virtual RCP GetValidParameterList() const { + return Teuchos::null; // Teuchos::null == GetValidParameterList() not implemented == skip validation and no default values (dangerous) + } + + protected: + void Input(Level& level, const std::string& varName) const { + level.DeclareInput(varName, GetFactory(varName).get(), this); + } + // Similar to the other Input, but we have an alias (varParamName) to the generated data name (varName) + void Input(Level& level, const std::string& varName, const std::string& varParamName) const { + level.DeclareInput(varName, GetFactory(varParamName).get(), this); + } + + template + T Get(Level& level, const std::string& varName) const { + return level.Get(varName, GetFactory(varName).get()); + } + // Similar to the other Get, but we have an alias (varParamName) to the generated data name (varName) + template + T Get(Level& level, const std::string& varName, const std::string& varParamName) const { + return level.Get(varName, GetFactory(varParamName).get()); + } + + template + void Set(Level& level, const std::string& varName, const T& data) const { + return level.Set(varName, data, this); + } + + template + bool IsType(Level& level, const std::string& varName) const { + return level.IsType(varName, GetFactory(varName).get()); + } + + bool IsAvailable(Level& level, const std::string& varName) const { + return level.IsAvailable(varName, GetFactory(varName).get()); + } + + public: + static void EnableTimerSync() { timerSync_ = true; } + static void DisableTimerSync() { timerSync_ = false; } + + protected: + static bool timerSync_; #ifdef HAVE_MUELU_DEBUG - public: - enum multipleCallCheckEnum { ENABLED, DISABLED, FIRSTCALL }; - - void EnableMultipleCallCheck() const { multipleCallCheck_ = ENABLED; } - void DisableMultipleCallCheck() const { multipleCallCheck_ = DISABLED; } - void ResetDebugData() const { - if (multipleCallCheck_ == FIRSTCALL && lastLevelID_ == -1) - return; - - multipleCallCheck_ = FIRSTCALL; - lastLevelID_ = -1; - - const ParameterList& paramList = GetParameterList(); - - // We cannot use just FactoryManager to specify which factories call ResetDebugData(). - // The problem is that some factories are not present in the manager, but - // instead are only accessible through a parameter list of some factory. - // For instance, FilteredAFactory is only accessible from SaPFactory but - // nowhere else. So we miss those, and do not reset the data, resulting - // in problems. - // Therefore, for each factory we need to go through its dependent - // factories, and call reset on them. - for (ParameterList::ConstIterator it = paramList.begin(); it != paramList.end(); it++) - if (paramList.isType >(it->first)) { - RCP fact = rcp_dynamic_cast(paramList.get >(it->first)); - if (fact != Teuchos::null && fact != NoFactory::getRCP()) - fact->ResetDebugData(); - } - } + public: + enum multipleCallCheckEnum{ENABLED, DISABLED, FIRSTCALL}; + + void EnableMultipleCallCheck() const { multipleCallCheck_ = ENABLED; } + void DisableMultipleCallCheck() const { multipleCallCheck_ = DISABLED; } + void ResetDebugData() const { + if (multipleCallCheck_ == FIRSTCALL && lastLevelID_ == -1) + return; + + multipleCallCheck_ = FIRSTCALL; + lastLevelID_ = -1; + + const ParameterList& paramList = GetParameterList(); + + // We cannot use just FactoryManager to specify which factories call ResetDebugData(). + // The problem is that some factories are not present in the manager, but + // instead are only accessible through a parameter list of some factory. + // For instance, FilteredAFactory is only accessible from SaPFactory but + // nowhere else. So we miss those, and do not reset the data, resulting + // in problems. + // Therefore, for each factory we need to go through its dependent + // factories, and call reset on them. + for (ParameterList::ConstIterator it = paramList.begin(); it != paramList.end(); it++) + if (paramList.isType >(it->first)) { + RCP fact = rcp_dynamic_cast(paramList.get >(it->first)); + if (fact != Teuchos::null && fact != NoFactory::getRCP()) + fact->ResetDebugData(); + } + } - static void EnableMultipleCheckGlobally() { multipleCallCheckGlobal_ = ENABLED; } - static void DisableMultipleCheckGlobally() { multipleCallCheckGlobal_ = DISABLED; } + static void EnableMultipleCheckGlobally() { multipleCallCheckGlobal_ = ENABLED; } + static void DisableMultipleCheckGlobally() { multipleCallCheckGlobal_ = DISABLED; } - protected: - mutable multipleCallCheckEnum multipleCallCheck_; - static multipleCallCheckEnum multipleCallCheckGlobal_; - mutable int lastLevelID_; + protected: + mutable multipleCallCheckEnum multipleCallCheck_; + static multipleCallCheckEnum multipleCallCheckGlobal_; + mutable int lastLevelID_; #else - public: - void EnableMultipleCallCheck() const { } - void DisableMultipleCallCheck() const { } - void ResetDebugData() const { } - static void EnableMultipleCheckGlobally() { } - static void DisableMultipleCheckGlobally() { } + public: + void EnableMultipleCallCheck() const {} + void DisableMultipleCallCheck() const {} + void ResetDebugData() const {} + static void EnableMultipleCheckGlobally() {} + static void DisableMultipleCheckGlobally() {} #endif - }; //class Factory +}; // class Factory -} //namespace MueLu +} // namespace MueLu #define MUELU_FACTORY_SHORT -#endif //ifndef MUELU_FACTORY_HPP +#endif // ifndef MUELU_FACTORY_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_FactoryAcceptor.hpp b/packages/muelu/src/MueCentral/MueLu_FactoryAcceptor.hpp index efe14effc27a..4a23f4e6dce6 100644 --- a/packages/muelu/src/MueCentral/MueLu_FactoryAcceptor.hpp +++ b/packages/muelu/src/MueCentral/MueLu_FactoryAcceptor.hpp @@ -54,29 +54,27 @@ namespace MueLu { - class FactoryAcceptor { +class FactoryAcceptor { + public: + virtual ~FactoryAcceptor() {} - public: + //@{ + //! Configuration - virtual ~FactoryAcceptor() { } + //! SetFactory is for expert users only. To change configuration of the preconditioner, use a factory manager. + virtual void SetFactory(const std::string& varName, const RCP& factory) = 0; - //@{ - //! Configuration + virtual const RCP GetFactory(const std::string& varName) const = 0; - //! SetFactory is for expert users only. To change configuration of the preconditioner, use a factory manager. - virtual void SetFactory(const std::string & varName, const RCP & factory) = 0; + // SetParameterList(...); - virtual const RCP GetFactory(const std::string & varName) const = 0; + // GetParameterList(...); - // SetParameterList(...); + //@} - // GetParameterList(...); +}; // class FactoryAcceptor - //@} - - }; //class FactoryAcceptor - -} //namespace MueLu +} // namespace MueLu #define MUELU_FACTORYACCEPTOR_SHORT -#endif //ifndef MUELU_FACTORYACCEPTOR_HPP +#endif // ifndef MUELU_FACTORYACCEPTOR_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_FactoryBase.cpp b/packages/muelu/src/MueCentral/MueLu_FactoryBase.cpp index 07c413c94e8f..ab5d21912b59 100644 --- a/packages/muelu/src/MueCentral/MueLu_FactoryBase.cpp +++ b/packages/muelu/src/MueCentral/MueLu_FactoryBase.cpp @@ -48,9 +48,9 @@ namespace MueLu { - int FactoryBase::GenerateUniqueId() { - static int i = 0; - return i++; - } +int FactoryBase::GenerateUniqueId() { + static int i = 0; + return i++; +} -} // namespace MueLu +} // namespace MueLu diff --git a/packages/muelu/src/MueCentral/MueLu_FactoryBase.hpp b/packages/muelu/src/MueCentral/MueLu_FactoryBase.hpp index b962d7f6f510..13e422fa3400 100644 --- a/packages/muelu/src/MueCentral/MueLu_FactoryBase.hpp +++ b/packages/muelu/src/MueCentral/MueLu_FactoryBase.hpp @@ -52,54 +52,51 @@ namespace MueLu { - /*! - @class FactoryBase - @brief Base class for factories (e.g., R, P, and A_coarse). - @ingroup MueLuBaseClasses - */ - class FactoryBase : public virtual BaseClass { - - public: - //@{ Constructors/Destructors. - - //! Constructor. - FactoryBase() - : id_(FactoryBase::GenerateUniqueId()) - { } - - //! Destructor. - virtual ~FactoryBase() { } - //@} +/*! + @class FactoryBase + @brief Base class for factories (e.g., R, P, and A_coarse). + @ingroup MueLuBaseClasses +*/ +class FactoryBase : public virtual BaseClass { + public: + //@{ Constructors/Destructors. - //@{ - //! @name Build methods. + //! Constructor. + FactoryBase() + : id_(FactoryBase::GenerateUniqueId()) {} - virtual void CallBuild(Level & requestedLevel) const = 0; + //! Destructor. + virtual ~FactoryBase() {} + //@} - virtual void CallDeclareInput(Level & requestedLevel) const = 0; - //@} + //@{ + //! @name Build methods. + + virtual void CallBuild(Level& requestedLevel) const = 0; - //@{ - //! @name Access factory properties + virtual void CallDeclareInput(Level& requestedLevel) const = 0; + //@} - /// return unique factory id - int GetID() const { return id_; }; + //@{ + //! @name Access factory properties + + /// return unique factory id + int GetID() const { return id_; }; //@} #ifdef HAVE_MUELU_DEBUG - virtual void ResetDebugData() const = 0; + virtual void ResetDebugData() const = 0; #endif - private: - - static int GenerateUniqueId(); + private: + static int GenerateUniqueId(); - const int id_; + const int id_; - }; //class FactoryBase +}; // class FactoryBase -} //namespace MueLu +} // namespace MueLu #define MUELU_FACTORYBASE_SHORT -#endif //ifndef MUELU_FACTORYBASE_HPP +#endif // ifndef MUELU_FACTORYBASE_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_FactoryManagerBase.hpp b/packages/muelu/src/MueCentral/MueLu_FactoryManagerBase.hpp index 90b2c8f86089..742cd5c7c6ee 100644 --- a/packages/muelu/src/MueCentral/MueLu_FactoryManagerBase.hpp +++ b/packages/muelu/src/MueCentral/MueLu_FactoryManagerBase.hpp @@ -55,57 +55,57 @@ namespace MueLu { - /*! - @class FactoryManagerBase - @brief Class that provides default factories within Needs class. - @ingroup MueLuBaseClasses - */ - class FactoryManagerBase : public BaseClass { +/*! + @class FactoryManagerBase + @brief Class that provides default factories within Needs class. + @ingroup MueLuBaseClasses +*/ +class FactoryManagerBase : public BaseClass { + public: + //@{ Constructors/Destructors. + FactoryManagerBase() + : bIgnoreUserData_(false) {} - public: - //@{ Constructors/Destructors. - FactoryManagerBase() : bIgnoreUserData_(false) { } + //! Destructor. + virtual ~FactoryManagerBase() {} - //! Destructor. - virtual ~FactoryManagerBase() { } + //@} - //@} + //@{ Get/Set functions. - //@{ Get/Set functions. + //! Get + // Return ref because user also give ref to the Hierarchy. + const virtual RCP GetFactory(const std::string& varName) const = 0; + //@} - //! Get - // Return ref because user also give ref to the Hierarchy. - const virtual RCP GetFactory(const std::string& varName) const = 0; - //@} + //! Check + // Return true if Factory associated with varName is registered + virtual bool hasFactory(const std::string& varName) const = 0; - //! Check - // Return true if Factory associated with varName is registered - virtual bool hasFactory(const std::string& varName) const = 0; - - // Free temporarily hold data at the end of Hierarchy::Setup() - // This method is const because the clean concerns only mutable data. - virtual void Clean() const { } // TODO: should be used inside of MueLu::Hierarchy + // Free temporarily hold data at the end of Hierarchy::Setup() + // This method is const because the clean concerns only mutable data. + virtual void Clean() const {} // TODO: should be used inside of MueLu::Hierarchy #ifdef HAVE_MUELU_DEBUG - virtual void ResetDebugData() const = 0; + virtual void ResetDebugData() const = 0; #endif - //! get IgnoreUserData flag - bool IgnoreUserData() const { return bIgnoreUserData_; } + //! get IgnoreUserData flag + bool IgnoreUserData() const { return bIgnoreUserData_; } - //! set IgnoreUserData flag - void SetIgnoreUserData(bool bIgnoreUserData = false) { bIgnoreUserData_ = bIgnoreUserData; } + //! set IgnoreUserData flag + void SetIgnoreUserData(bool bIgnoreUserData = false) { bIgnoreUserData_ = bIgnoreUserData; } - private: - //! boolean flag that controls behaviour of Level::GetFactory - //! if bIgnoreUserData == true, the Level::GetFactory function always asks the Factory manager for a valid factory given a variable name - //! if bIgnoreUserData == false, the Level::GetFactory prefers user-provided data for a variable name if available. Otherwise the factory manager is asked for a valid factory - //! default: bIgnoreUserData = false; - bool bIgnoreUserData_; + private: + //! boolean flag that controls behaviour of Level::GetFactory + //! if bIgnoreUserData == true, the Level::GetFactory function always asks the Factory manager for a valid factory given a variable name + //! if bIgnoreUserData == false, the Level::GetFactory prefers user-provided data for a variable name if available. Otherwise the factory manager is asked for a valid factory + //! default: bIgnoreUserData = false; + bool bIgnoreUserData_; - }; // class FactoryManagerBase +}; // class FactoryManagerBase -} // namespace MueLu +} // namespace MueLu #define MUELU_FACTORYMANAGERBASE_SHORT -#endif //ifndef MUELU_FACTORYMANAGERBASE_HPP +#endif // ifndef MUELU_FACTORYMANAGERBASE_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_FactoryManager_decl.hpp b/packages/muelu/src/MueCentral/MueLu_FactoryManager_decl.hpp index 2f4618bb44c0..38348fcff50e 100644 --- a/packages/muelu/src/MueCentral/MueLu_FactoryManager_decl.hpp +++ b/packages/muelu/src/MueCentral/MueLu_FactoryManager_decl.hpp @@ -75,7 +75,6 @@ #include "MueLu_InterfaceMappingTransferFactory_fwd.hpp" #include "MueLu_InterfaceAggregationFactory_fwd.hpp" - #include "MueLu_CoalesceDropFactory_kokkos_fwd.hpp" #include "MueLu_NullspaceFactory_kokkos_fwd.hpp" #include "MueLu_SaPFactory_kokkos_fwd.hpp" @@ -84,143 +83,139 @@ namespace MueLu { - /*! - @class FactoryManager class. - @brief This class specifies the default factory that should generate some data on a Level if the data does not exist and - the generating factory has not been specified. - - Consider the following example. - - @code - RCP Afact; - Level currentLevel; - RCP thisLevelA; - thisLevelA = currentLevel.Get("A", Afact.get()); - @endcode - - @todo If Afact is null (actually, Teuchos::null), then the FactoryManager associated with currentLevel will determine whether a default factory has - been specified for creating A. If "yes", then that factory will be called, A will be stored in currentLevel, and an RCP will be returned by - the Get call. If "no", then the FactoryManager will throw an exception indicating that it does not know how to generate A. - */ - - template - class FactoryManager : public FactoryManagerBase { +/*! + @class FactoryManager class. + @brief This class specifies the default factory that should generate some data on a Level if the data does not exist and + the generating factory has not been specified. + + Consider the following example. + + @code + RCP Afact; + Level currentLevel; + RCP thisLevelA; + thisLevelA = currentLevel.Get("A", Afact.get()); + @endcode + + @todo If Afact is null (actually, Teuchos::null), then the FactoryManager associated with currentLevel will determine whether a default factory has + been specified for creating A. If "yes", then that factory will be called, A will be stored in currentLevel, and an RCP will be returned by + the Get call. If "no", then the FactoryManager will throw an exception indicating that it does not know how to generate A. +*/ + +template +class FactoryManager : public FactoryManagerBase { #undef MUELU_FACTORYMANAGER_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructor/Destructors - //@{ - - //! @brief Constructor. - FactoryManager() { - SetIgnoreUserData(false); // set IgnorUserData flag to false (default behaviour) - useKokkos_ = !Node::is_serial; - } + public: + //! @name Constructor/Destructors + //@{ - //! Constructor used by HierarchyFactory (temporary, will be removed) - FactoryManager(const std::map >& factoryTable) { - factoryTable_ = factoryTable; - SetIgnoreUserData(false); // set IgnorUserData flag to false (default behaviour) //TODO: use parent class constructor instead - useKokkos_ = !Node::is_serial; - } + //! @brief Constructor. + FactoryManager() { + SetIgnoreUserData(false); // set IgnorUserData flag to false (default behaviour) + useKokkos_ = !Node::is_serial; + } - //! Destructor. - virtual ~FactoryManager() { } + //! Constructor used by HierarchyFactory (temporary, will be removed) + FactoryManager(const std::map >& factoryTable) { + factoryTable_ = factoryTable; + SetIgnoreUserData(false); // set IgnorUserData flag to false (default behaviour) //TODO: use parent class constructor instead + useKokkos_ = !Node::is_serial; + } - //@} + //! Destructor. + virtual ~FactoryManager() {} - //! @name Get/Set functions. - //@{ + //@} - /*! @brief Set Factory + //! @name Get/Set functions. + //@{ - Register the factory that should generate data if said factory is not specified in the request. + /*! @brief Set Factory - @param[in] name of variable - @param[in] factory that generates the data - */ - void SetFactory(const std::string & varName, const RCP& factory); + Register the factory that should generate data if said factory is not specified in the request. - /*! @brief Get factory associated with a particular data name. + @param[in] name of variable + @param[in] factory that generates the data + */ + void SetFactory(const std::string& varName, const RCP& factory); - @param[in] varName name of variable. + /*! @brief Get factory associated with a particular data name. - */ - const RCP GetFactory(const std::string& varName) const; + @param[in] varName name of variable. - /*! @brief Get factory associated with a particular data name (NONCONST version) + */ + const RCP GetFactory(const std::string& varName) const; - @param[in] varName name of variable. + /*! @brief Get factory associated with a particular data name (NONCONST version) - */ - const RCP GetFactoryNonConst(const std::string& varName); + @param[in] varName name of variable. - //! Check - // Return true if Factory associated with varName is registered - bool hasFactory(const std::string& varName) const; + */ + const RCP GetFactoryNonConst(const std::string& varName); + //! Check + // Return true if Factory associated with varName is registered + bool hasFactory(const std::string& varName) const; - //! - const RCP GetDefaultFactory(const std::string& varName) const; + //! + const RCP GetDefaultFactory(const std::string& varName) const; - void SetKokkosRefactor(const bool useKokkos) { - useKokkos_ = useKokkos; - } + void SetKokkosRefactor(const bool useKokkos) { + useKokkos_ = useKokkos; + } - bool GetKokkosRefactor() const { return useKokkos_; } + bool GetKokkosRefactor() const { return useKokkos_; } - //@} + //@} - void Clean() const { defaultFactoryTable_.clear(); } + void Clean() const { defaultFactoryTable_.clear(); } #ifdef HAVE_MUELU_DEBUG - void ResetDebugData() const; + void ResetDebugData() const; #endif - void Print() const; - - private: + void Print() const; - //! @name Helper functions - //@{ + private: + //! @name Helper functions + //@{ - /*! Add a factory to the default factory list and return it. This helper function is used by GetDefaultFactory() + /*! Add a factory to the default factory list and return it. This helper function is used by GetDefaultFactory() - @todo TODO factory->setObjectLabel("Default " + varName + "Factory"); - */ + @todo TODO factory->setObjectLabel("Default " + varName + "Factory"); + */ - const RCP SetAndReturnDefaultFactory(const std::string& varName, const RCP& factory) const; - //@} + const RCP SetAndReturnDefaultFactory(const std::string& varName, const RCP& factory) const; + //@} - /*! @brief User-defined factories. - * - * User may overwrite default behaviour. The user provided factories are stored in a separate table. When we try to determine - * which factory generates the data, this table is searched first. + /*! @brief User-defined factories. + * + * User may overwrite default behaviour. The user provided factories are stored in a separate table. When we try to determine + * which factory generates the data, this table is searched first. - Note: we distinguish 'user defined factory' and 'default factory' to allow the deallocation of default factories separately. - */ - std::map > factoryTable_; + Note: we distinguish 'user defined factory' and 'default factory' to allow the deallocation of default factories separately. + */ + std::map > factoryTable_; - /*! @brief Table that holds default factories. + /*! @brief Table that holds default factories. - -# We distinguish 'user defined factory' and 'default factory' to allow the deallocation of default factories separately. - -# defaultFactoryTable_ is mutable because default factories are only added to the list when they are requested - to avoid allocation of unused factories. - */ - mutable - std::map > defaultFactoryTable_; + -# We distinguish 'user defined factory' and 'default factory' to allow the deallocation of default factories separately. + -# defaultFactoryTable_ is mutable because default factories are only added to the list when they are requested + to avoid allocation of unused factories. + */ + mutable std::map > defaultFactoryTable_; - //! Whether or not to use kokkos factories. - bool useKokkos_; + //! Whether or not to use kokkos factories. + bool useKokkos_; - }; // class +}; // class -} // namespace MueLu +} // namespace MueLu #define MUELU_FACTORYMANAGER_SHORT -#endif // MUELU_FACTORYMANAGER_DECL_HPP +#endif // MUELU_FACTORYMANAGER_DECL_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_FactoryManager_def.hpp b/packages/muelu/src/MueCentral/MueLu_FactoryManager_def.hpp index f7004c043c69..6da6298d54dc 100644 --- a/packages/muelu/src/MueCentral/MueLu_FactoryManager_def.hpp +++ b/packages/muelu/src/MueCentral/MueLu_FactoryManager_def.hpp @@ -85,229 +85,227 @@ #include "MueLu_FactoryManager_decl.hpp" - namespace MueLu { -#define MUELU_KOKKOS_FACTORY(varName, oldFactory, newFactory) \ - (!useKokkos_) ? SetAndReturnDefaultFactory(varName, rcp(new oldFactory())) : \ - SetAndReturnDefaultFactory(varName, rcp(new newFactory())); - - template - void FactoryManager::SetFactory(const std::string& varName, const RCP& factory) { - factoryTable_[varName] = factory; - } +#define MUELU_KOKKOS_FACTORY(varName, oldFactory, newFactory) \ + (!useKokkos_) ? SetAndReturnDefaultFactory(varName, rcp(new oldFactory())) : SetAndReturnDefaultFactory(varName, rcp(new newFactory())); - template - const RCP FactoryManager::GetFactory(const std::string& varName) const { - if (factoryTable_.count(varName)) { - // Search user provided factories - return factoryTable_.find(varName)->second; - } +template +void FactoryManager::SetFactory(const std::string& varName, const RCP& factory) { + factoryTable_[varName] = factory; +} - // Search/create default factory for this name - return GetDefaultFactory(varName); +template +const RCP FactoryManager::GetFactory(const std::string& varName) const { + if (factoryTable_.count(varName)) { + // Search user provided factories + return factoryTable_.find(varName)->second; } - template - const RCP FactoryManager::GetFactoryNonConst(const std::string& varName) { - return Teuchos::rcp_const_cast(GetFactory(varName)); - } + // Search/create default factory for this name + return GetDefaultFactory(varName); +} + +template +const RCP FactoryManager::GetFactoryNonConst(const std::string& varName) { + return Teuchos::rcp_const_cast(GetFactory(varName)); +} + +template +bool FactoryManager::hasFactory(const std::string& varName) const { + if (factoryTable_.count(varName)) return true; + return false; +} + +template +const RCP FactoryManager::GetDefaultFactory(const std::string& varName) const { + if (defaultFactoryTable_.count(varName)) { + // The factory for this name was already created (possibly, for previous level, if we reuse factory manager) + return defaultFactoryTable_.find(varName)->second; + + } else { + // No factory was created for this name, but we may know which one to create + if (varName == "A") return SetAndReturnDefaultFactory(varName, rcp(new RAPFactory())); + if (varName == "Ainv") return SetAndReturnDefaultFactory(varName, rcp(new InverseApproximationFactory())); + if (varName == "RAP Pattern") return GetFactory("A"); + if (varName == "AP Pattern") return GetFactory("A"); + if (varName == "Ptent") return MUELU_KOKKOS_FACTORY(varName, TentativePFactory, TentativePFactory_kokkos); + if (varName == "P") { + // GetFactory("Ptent"): we need to use the same factory instance for both "P" and "Nullspace" + RCP factory; + if (useKokkos_) + factory = rcp(new SaPFactory_kokkos()); + else + factory = rcp(new SaPFactory()); + factory->SetFactory("P", GetFactory("Ptent")); + return SetAndReturnDefaultFactory(varName, factory); + } + if (varName == "Nullspace") { + // GetFactory("Ptent"): we need to use the same factory instance for both "P" and "Nullspace" + RCP factory; + if (useKokkos_) + factory = rcp(new NullspaceFactory_kokkos()); + else + factory = rcp(new NullspaceFactory()); + factory->SetFactory("Nullspace", GetFactory("Ptent")); + return SetAndReturnDefaultFactory(varName, factory); + } + if (varName == "Scaled Nullspace") return SetAndReturnDefaultFactory(varName, rcp(new ScaledNullspaceFactory())); - template - bool FactoryManager::hasFactory(const std::string& varName) const { - if (factoryTable_.count(varName)) return true; - return false; - } + if (varName == "Coordinates") return GetFactory("Ptent"); + if (varName == "Node Comm") return GetFactory("Ptent"); - template - const RCP FactoryManager::GetDefaultFactory(const std::string& varName) const { - if (defaultFactoryTable_.count(varName)) { - // The factory for this name was already created (possibly, for previous level, if we reuse factory manager) - return defaultFactoryTable_.find(varName)->second; - - } else { - // No factory was created for this name, but we may know which one to create - if (varName == "A") return SetAndReturnDefaultFactory(varName, rcp(new RAPFactory())); - if (varName == "Ainv") return SetAndReturnDefaultFactory(varName, rcp(new InverseApproximationFactory())); - if (varName == "RAP Pattern") return GetFactory("A"); - if (varName == "AP Pattern") return GetFactory("A"); - if (varName == "Ptent") return MUELU_KOKKOS_FACTORY(varName, TentativePFactory, TentativePFactory_kokkos); - if (varName == "P") { - // GetFactory("Ptent"): we need to use the same factory instance for both "P" and "Nullspace" - RCP factory; - if (useKokkos_) - factory = rcp(new SaPFactory_kokkos()); - else - factory = rcp(new SaPFactory()); - factory->SetFactory("P", GetFactory("Ptent")); - return SetAndReturnDefaultFactory(varName, factory); - } - if (varName == "Nullspace") { - // GetFactory("Ptent"): we need to use the same factory instance for both "P" and "Nullspace" - RCP factory; - if (useKokkos_) - factory = rcp(new NullspaceFactory_kokkos()); - else - factory = rcp(new NullspaceFactory()); - factory->SetFactory("Nullspace", GetFactory("Ptent")); - return SetAndReturnDefaultFactory(varName, factory); - } - if (varName == "Scaled Nullspace") return SetAndReturnDefaultFactory(varName, rcp(new ScaledNullspaceFactory())); - - if (varName == "Coordinates") return GetFactory("Ptent"); - if (varName == "Node Comm") return GetFactory("Ptent"); - - if (varName == "R") return SetAndReturnDefaultFactory(varName, rcp(new TransPFactory())); - if (varName == "RfromPfactory") return GetFactory("P"); + if (varName == "R") return SetAndReturnDefaultFactory(varName, rcp(new TransPFactory())); + if (varName == "RfromPfactory") return GetFactory("P"); #if defined(HAVE_MUELU_ZOLTAN) && defined(HAVE_MPI) - if (varName == "Partition") return SetAndReturnDefaultFactory(varName, rcp(new ZoltanInterface())); -#endif //ifdef HAVE_MPI + if (varName == "Partition") return SetAndReturnDefaultFactory(varName, rcp(new ZoltanInterface())); +#endif // ifdef HAVE_MPI - if (varName == "Importer") { + if (varName == "Importer") { #ifdef HAVE_MPI - return SetAndReturnDefaultFactory(varName, rcp(new RepartitionFactory())); + return SetAndReturnDefaultFactory(varName, rcp(new RepartitionFactory())); #else - return SetAndReturnDefaultFactory(varName, NoFactory::getRCP()); + return SetAndReturnDefaultFactory(varName, NoFactory::getRCP()); #endif - } - if (varName == "number of partitions") { + } + if (varName == "number of partitions") { #ifdef HAVE_MPI - return SetAndReturnDefaultFactory(varName, rcp(new RepartitionHeuristicFactory())); + return SetAndReturnDefaultFactory(varName, rcp(new RepartitionHeuristicFactory())); #else - return SetAndReturnDefaultFactory(varName, NoFactory::getRCP()); -#endif - } - if (varName == "repartition: heuristic target rows per process") return GetFactory("number of partitions"); - - if (varName == "Graph") return MUELU_KOKKOS_FACTORY(varName, CoalesceDropFactory, CoalesceDropFactory_kokkos); - if (varName == "UnAmalgamationInfo") return SetAndReturnDefaultFactory(varName, rcp(new AmalgamationFactory())); - if (varName == "Aggregates") return MUELU_KOKKOS_FACTORY(varName, UncoupledAggregationFactory, UncoupledAggregationFactory_kokkos); - if (varName == "AggregateQualities") return SetAndReturnDefaultFactory(varName, rcp(new AggregateQualityEstimateFactory())); - if (varName == "CoarseMap") return SetAndReturnDefaultFactory(varName, rcp(new CoarseMapFactory())); - if (varName == "DofsPerNode") return GetFactory("Graph"); - if (varName == "Filtering") return GetFactory("Graph"); - if (varName == "BlockNumber") return SetAndReturnDefaultFactory(varName, rcp(new InitialBlockNumberFactory())); - if (varName == "LineDetection_VertLineIds") return SetAndReturnDefaultFactory(varName, rcp(new LineDetectionFactory())); - if (varName == "LineDetection_Layers") return GetFactory("LineDetection_VertLineIds"); - if (varName == "CoarseNumZLayers") return GetFactory("LineDetection_VertLineIds"); - - // Structured - if (varName == "structuredInterpolationOrder") return SetAndReturnDefaultFactory(varName, rcp(new StructuredAggregationFactory())); - - // Non-Galerkin - if (varName == "K") return GetFactory("A"); - if (varName == "M") return GetFactory("A"); - if (varName == "Mdiag") return GetFactory("A"); - if (varName == "cfl-based shift array") return GetFactory("A"); - - // Same factory for both Pre and Post Smoother. Factory for key "Smoother" can be set by users. - if (varName == "PreSmoother") return GetFactory("Smoother"); - if (varName == "PostSmoother") return GetFactory("Smoother"); - - if (varName == "Ppattern") { - RCP PpFact = rcp(new PatternFactory); - PpFact->SetFactory("P", GetFactory("Ptent")); - return SetAndReturnDefaultFactory(varName, PpFact); - } - if (varName == "Constraint") return SetAndReturnDefaultFactory(varName, rcp(new ConstraintFactory())); - - if (varName == "Smoother") { - Teuchos::ParameterList smootherParamList; - smootherParamList.set("relaxation: type", "Symmetric Gauss-Seidel"); - smootherParamList.set("relaxation: sweeps", Teuchos::OrdinalTraits::one()); - smootherParamList.set("relaxation: damping factor", Teuchos::ScalarTraits::one()); - return SetAndReturnDefaultFactory(varName, rcp(new SmootherFactory(rcp(new TrilinosSmoother("RELAXATION", smootherParamList))))); - } - if (varName == "CoarseSolver") return SetAndReturnDefaultFactory(varName, rcp(new SmootherFactory(rcp(new DirectSolver()), Teuchos::null))); - - if (varName == "DualNodeID2PrimalNodeID") return SetAndReturnDefaultFactory(varName, rcp(new InterfaceMappingTransferFactory())); - if (varName == "CoarseDualNodeID2PrimalNodeID") return SetAndReturnDefaultFactory(varName, rcp(new InterfaceAggregationFactory())); -#ifdef HAVE_MUELU_INTREPID2 - // If we're asking for it, find who made P - if (varName == "pcoarsen: element to node map") return GetFactory("P"); + return SetAndReturnDefaultFactory(varName, NoFactory::getRCP()); #endif - - // NOTE: These are user data, but we might want to print them, so they need a default factory - if (varName == "Pnodal") return NoFactory::getRCP(); - if (varName == "NodeMatrix") return NoFactory::getRCP(); - if (varName == "NodeAggMatrix") return NoFactory::getRCP(); - - - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "MueLu::FactoryManager::GetDefaultFactory(): No default factory available for building '" + varName + "'."); } - } - - template - const RCP FactoryManager::SetAndReturnDefaultFactory(const std::string& varName, const RCP& factory) const { - TEUCHOS_TEST_FOR_EXCEPTION(factory.is_null(), Exceptions::RuntimeError, "The default factory for building '" << varName << "' is null"); + if (varName == "repartition: heuristic target rows per process") return GetFactory("number of partitions"); + + if (varName == "Graph") return MUELU_KOKKOS_FACTORY(varName, CoalesceDropFactory, CoalesceDropFactory_kokkos); + if (varName == "UnAmalgamationInfo") return SetAndReturnDefaultFactory(varName, rcp(new AmalgamationFactory())); + if (varName == "Aggregates") return MUELU_KOKKOS_FACTORY(varName, UncoupledAggregationFactory, UncoupledAggregationFactory_kokkos); + if (varName == "AggregateQualities") return SetAndReturnDefaultFactory(varName, rcp(new AggregateQualityEstimateFactory())); + if (varName == "CoarseMap") return SetAndReturnDefaultFactory(varName, rcp(new CoarseMapFactory())); + if (varName == "DofsPerNode") return GetFactory("Graph"); + if (varName == "Filtering") return GetFactory("Graph"); + if (varName == "BlockNumber") return SetAndReturnDefaultFactory(varName, rcp(new InitialBlockNumberFactory())); + if (varName == "LineDetection_VertLineIds") return SetAndReturnDefaultFactory(varName, rcp(new LineDetectionFactory())); + if (varName == "LineDetection_Layers") return GetFactory("LineDetection_VertLineIds"); + if (varName == "CoarseNumZLayers") return GetFactory("LineDetection_VertLineIds"); + + // Structured + if (varName == "structuredInterpolationOrder") return SetAndReturnDefaultFactory(varName, rcp(new StructuredAggregationFactory())); + + // Non-Galerkin + if (varName == "K") return GetFactory("A"); + if (varName == "M") return GetFactory("A"); + if (varName == "Mdiag") return GetFactory("A"); + if (varName == "cfl-based shift array") return GetFactory("A"); + + // Same factory for both Pre and Post Smoother. Factory for key "Smoother" can be set by users. + if (varName == "PreSmoother") return GetFactory("Smoother"); + if (varName == "PostSmoother") return GetFactory("Smoother"); + + if (varName == "Ppattern") { + RCP PpFact = rcp(new PatternFactory); + PpFact->SetFactory("P", GetFactory("Ptent")); + return SetAndReturnDefaultFactory(varName, PpFact); + } + if (varName == "Constraint") return SetAndReturnDefaultFactory(varName, rcp(new ConstraintFactory())); + + if (varName == "Smoother") { + Teuchos::ParameterList smootherParamList; + smootherParamList.set("relaxation: type", "Symmetric Gauss-Seidel"); + smootherParamList.set("relaxation: sweeps", Teuchos::OrdinalTraits::one()); + smootherParamList.set("relaxation: damping factor", Teuchos::ScalarTraits::one()); + return SetAndReturnDefaultFactory(varName, rcp(new SmootherFactory(rcp(new TrilinosSmoother("RELAXATION", smootherParamList))))); + } + if (varName == "CoarseSolver") return SetAndReturnDefaultFactory(varName, rcp(new SmootherFactory(rcp(new DirectSolver()), Teuchos::null))); - GetOStream(Runtime1) << "Using default factory (" << factory->ShortClassName() <<"["<GetID()<<"]) for building '" << varName << "'." << std::endl; + if (varName == "DualNodeID2PrimalNodeID") return SetAndReturnDefaultFactory(varName, rcp(new InterfaceMappingTransferFactory())); + if (varName == "CoarseDualNodeID2PrimalNodeID") return SetAndReturnDefaultFactory(varName, rcp(new InterfaceAggregationFactory())); +#ifdef HAVE_MUELU_INTREPID2 + // If we're asking for it, find who made P + if (varName == "pcoarsen: element to node map") return GetFactory("P"); +#endif - defaultFactoryTable_[varName] = factory; + // NOTE: These are user data, but we might want to print them, so they need a default factory + if (varName == "Pnodal") return NoFactory::getRCP(); + if (varName == "NodeMatrix") return NoFactory::getRCP(); + if (varName == "NodeAggMatrix") return NoFactory::getRCP(); - return defaultFactoryTable_[varName]; + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "MueLu::FactoryManager::GetDefaultFactory(): No default factory available for building '" + varName + "'."); } - - template - void FactoryManager::Print() const { - std::map >::const_iterator it; - Teuchos::FancyOStream& fancy = GetOStream(Debug); - //auto & fancy = std::cout;// For debugging - - - fancy << "Users factory table (factoryTable_):" << std::endl; - for (it = factoryTable_.begin(); it != factoryTable_.end(); it++) { - fancy << " " << it->first << " -> "; - if (it->second.get() == NoFactory::get()) fancy << "NoFactory"; - else if (!it->second.get()) fancy<< "NULL"; - else { - fancy << it->second.get()->ShortClassName()<<"["<second.get()->GetID()<<"]"; +} + +template +const RCP FactoryManager::SetAndReturnDefaultFactory(const std::string& varName, const RCP& factory) const { + TEUCHOS_TEST_FOR_EXCEPTION(factory.is_null(), Exceptions::RuntimeError, "The default factory for building '" << varName << "' is null"); + + GetOStream(Runtime1) << "Using default factory (" << factory->ShortClassName() << "[" << factory->GetID() << "]) for building '" << varName << "'." << std::endl; + + defaultFactoryTable_[varName] = factory; + + return defaultFactoryTable_[varName]; +} + +template +void FactoryManager::Print() const { + std::map >::const_iterator it; + Teuchos::FancyOStream& fancy = GetOStream(Debug); + // auto & fancy = std::cout;// For debugging + + fancy << "Users factory table (factoryTable_):" << std::endl; + for (it = factoryTable_.begin(); it != factoryTable_.end(); it++) { + fancy << " " << it->first << " -> "; + if (it->second.get() == NoFactory::get()) + fancy << "NoFactory"; + else if (!it->second.get()) + fancy << "NULL"; + else { + fancy << it->second.get()->ShortClassName() << "[" << it->second.get()->GetID() << "]"; #ifdef HAVE_MUELU_DEBUG - fancy<<"("<second.get()) <<")"; + fancy << "(" << Teuchos::toString(it->second.get()) << ")"; #endif - } - fancy<< std::endl; } + fancy << std::endl; + } - fancy << "Default factory table (defaultFactoryTable_):" << std::endl; - for (it = defaultFactoryTable_.begin(); it != defaultFactoryTable_.end(); it++) { - fancy << " " << it->first << " -> "; - if (it->second.get() == NoFactory::get()) fancy << "NoFactory"; - else if (!it->second.get()) fancy<< "NULL"; - else { - fancy << it->second.get()->ShortClassName()<<"["<second.get()->GetID()<<"]"; + fancy << "Default factory table (defaultFactoryTable_):" << std::endl; + for (it = defaultFactoryTable_.begin(); it != defaultFactoryTable_.end(); it++) { + fancy << " " << it->first << " -> "; + if (it->second.get() == NoFactory::get()) + fancy << "NoFactory"; + else if (!it->second.get()) + fancy << "NULL"; + else { + fancy << it->second.get()->ShortClassName() << "[" << it->second.get()->GetID() << "]"; #ifdef HAVE_MUELU_DEBUG - fancy<<"("<second.get()) <<")"; + fancy << "(" << Teuchos::toString(it->second.get()) << ")"; #endif - } - fancy<< std::endl; } - + fancy << std::endl; } +} #ifdef HAVE_MUELU_DEBUG - template - void FactoryManager::ResetDebugData() const { - std::map >::const_iterator it; - - for (it = factoryTable_.begin(); it != factoryTable_.end(); it++) - if (!it->second.is_null()) - it->second->ResetDebugData(); - - for (it = defaultFactoryTable_.begin(); it != defaultFactoryTable_.end(); it++) - if (!it->second.is_null()) - it->second->ResetDebugData(); - } +template +void FactoryManager::ResetDebugData() const { + std::map >::const_iterator it; + + for (it = factoryTable_.begin(); it != factoryTable_.end(); it++) + if (!it->second.is_null()) + it->second->ResetDebugData(); + + for (it = defaultFactoryTable_.begin(); it != defaultFactoryTable_.end(); it++) + if (!it->second.is_null()) + it->second->ResetDebugData(); +} #endif - #undef MUELU_KOKKOS_FACTORY -} // namespace MueLu +} // namespace MueLu -//TODO: add operator[] -//TODO: should we use a parameterList instead of a std::map? It might be useful to tag which factory have been used and report unused factory. -//TODO: add an option 'NoDefault' to check if we are using any default factory. -//TODO: use Teuchos::ConstNonConstObjectContainer to allow user to modify factories after a GetFactory() +// TODO: add operator[] +// TODO: should we use a parameterList instead of a std::map? It might be useful to tag which factory have been used and report unused factory. +// TODO: add an option 'NoDefault' to check if we are using any default factory. +// TODO: use Teuchos::ConstNonConstObjectContainer to allow user to modify factories after a GetFactory() -#endif // MUELU_FACTORYMANAGER_DEF_HPP +#endif // MUELU_FACTORYMANAGER_DEF_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_decl.hpp b/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_decl.hpp index 2bfb4b97378b..593f6f69865f 100644 --- a/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_decl.hpp +++ b/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_decl.hpp @@ -61,84 +61,76 @@ namespace MueLu { - //! An exception safe way to call the method 'Level::SetFactoryManager()' - class SetFactoryManager { - - public: - - //@{ - - /*! - @brief Constructor - - Set a given factory manager on a specific level - */ - SetFactoryManager(const RCP & level, const RCP & factoryManager) - : level_(level), prevFactoryManager_(level->GetFactoryManager()) - { - // set new factory manager - level->SetFactoryManager(factoryManager); - } - - //! Destructor. - virtual ~SetFactoryManager() { - // restore previous factory manager - level_->SetFactoryManager(prevFactoryManager_); - } - - //@} - - private: - //! needed to save & restore previous factoryManager - const RCP level_; - const RCP prevFactoryManager_; - }; - - - - - template - class HierarchyUtils { +//! An exception safe way to call the method 'Level::SetFactoryManager()' +class SetFactoryManager { + public: + //@{ + + /*! + @brief Constructor + + Set a given factory manager on a specific level + */ + SetFactoryManager(const RCP& level, const RCP& factoryManager) + : level_(level) + , prevFactoryManager_(level->GetFactoryManager()) { + // set new factory manager + level->SetFactoryManager(factoryManager); + } + + //! Destructor. + virtual ~SetFactoryManager() { + // restore previous factory manager + level_->SetFactoryManager(prevFactoryManager_); + } + + //@} + + private: + //! needed to save & restore previous factoryManager + const RCP level_; + const RCP prevFactoryManager_; +}; + +template +class HierarchyUtils { #undef MUELU_HIERARCHYUTILS_SHORT #include "MueLu_UseShortNames.hpp" - public: - /*! - \brief Add non-serializable data to Hierarchy - - Add non-serializable data given level-specific sublist \c nonSerialList to the Hierarchy \c H. - Calling \c AddLevel() along the way, if necessary. - - Non-serializable data to be added: - - Operator "A" - - Prolongator "P" - - Restrictor "R" - - "M" - - "Mdiag" - - "K" - - Nullspace information "Nullspace" - - Coordinate information "Coordinates" - - "Node Comm" - - Primal-to-dual node mapping "DualNodeID2PrimalNodeID" - - "Primal interface DOF map" - - "pcoarsen: element to node map - - This routine is used by the CreateXpetraPreconditioner() routine. - - @param HM Hierarhcy manager - @param H Hierarchy, where non-serializable data needs to be added - @param nonSerialList Parameter list containing non-serializable data - */ - static void AddNonSerializableDataToHierarchy(HierarchyManager& HM, Hierarchy& H, const ParameterList& nonSerialList); - static void CopyBetweenHierarchies(Hierarchy& fromHierarchy, Hierarchy& toHierarchy, const std::string fromLabel, const std::string toLabel, const std::string dataType); - }; - - - - -} // namespace MueLu + public: + /*! + \brief Add non-serializable data to Hierarchy + + Add non-serializable data given level-specific sublist \c nonSerialList to the Hierarchy \c H. + Calling \c AddLevel() along the way, if necessary. + + Non-serializable data to be added: + - Operator "A" + - Prolongator "P" + - Restrictor "R" + - "M" + - "Mdiag" + - "K" + - Nullspace information "Nullspace" + - Coordinate information "Coordinates" + - "Node Comm" + - Primal-to-dual node mapping "DualNodeID2PrimalNodeID" + - "Primal interface DOF map" + - "pcoarsen: element to node map + + This routine is used by the CreateXpetraPreconditioner() routine. + + @param HM Hierarhcy manager + @param H Hierarchy, where non-serializable data needs to be added + @param nonSerialList Parameter list containing non-serializable data + */ + static void AddNonSerializableDataToHierarchy(HierarchyManager& HM, Hierarchy& H, const ParameterList& nonSerialList); + static void CopyBetweenHierarchies(Hierarchy& fromHierarchy, Hierarchy& toHierarchy, const std::string fromLabel, const std::string toLabel, const std::string dataType); +}; + +} // namespace MueLu #define MUELU_HIERARCHYUTILS_SHORT -#endif // MUELU_HIERARCHYUTILS_DECL_HPP +#endif // MUELU_HIERARCHYUTILS_DECL_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_def.hpp b/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_def.hpp index 6581464aa9ad..6c4defcc4116 100644 --- a/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_def.hpp +++ b/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_def.hpp @@ -55,379 +55,359 @@ #include "MueLu_HierarchyManager.hpp" #include "MueLu_FactoryManager.hpp" -//TODO/FIXME: DeclareInput(, **this**) cannot be used here +// TODO/FIXME: DeclareInput(, **this**) cannot be used here #ifdef HAVE_MUELU_INTREPID2 #include "Kokkos_DynRankView.hpp" #endif namespace MueLu { - // Copy object from one hierarchy to another calling AddNewLevel as appropriate. - template - void HierarchyUtils::CopyBetweenHierarchies(Hierarchy& fromHierarchy, Hierarchy& toHierarchy, const std::string fromLabel, const std::string toLabel, const std::string dataType) { +// Copy object from one hierarchy to another calling AddNewLevel as appropriate. +template +void HierarchyUtils::CopyBetweenHierarchies(Hierarchy& fromHierarchy, Hierarchy& toHierarchy, const std::string fromLabel, const std::string toLabel, const std::string dataType) { + // add any necessary levels + for (int i = toHierarchy.GetNumLevels(); i < fromHierarchy.GetNumLevels(); i++) + toHierarchy.AddNewLevel(); - // add any necessary levels - for (int i = toHierarchy.GetNumLevels(); i < fromHierarchy.GetNumLevels(); i++) - toHierarchy.AddNewLevel(); - - for (int i = 0; i < fromHierarchy.GetNumLevels(); i++) { - RCP fromLevel = fromHierarchy.GetLevel(i); - RCP toLevel = toHierarchy.GetLevel(i); - - TEUCHOS_TEST_FOR_EXCEPTION(dataType != "RCP" && dataType != "RCP" - , Exceptions::InvalidArgument, - std::string("MueLu::Utils::CopyBetweenHierarchies: unknown data type(") + dataType + ")"); - if (fromLevel->IsAvailable(fromLabel)) { - if (dataType == "RCP" ) { - // Normally, we should only do - // toLevel->Set(toLabel,fromLevel->Get >(fromLabel)); - // The logic below is meant to handle a special case when we - // repartition a processor away, leaving behind a RCP on - // on the level instead of an RCP + for (int i = 0; i < fromHierarchy.GetNumLevels(); i++) { + RCP fromLevel = fromHierarchy.GetLevel(i); + RCP toLevel = toHierarchy.GetLevel(i); - auto tempOp = fromLevel->Get >(fromLabel); - auto tempMatrix = rcp_dynamic_cast(tempOp); - if(!tempMatrix.is_null()) toLevel->Set(toLabel,tempMatrix); - else toLevel->Set(toLabel,tempOp); - } - if (dataType == "RCP") { - toLevel->Set(toLabel,fromLevel->Get >(fromLabel)); - } + TEUCHOS_TEST_FOR_EXCEPTION(dataType != "RCP" && dataType != "RCP", Exceptions::InvalidArgument, + std::string("MueLu::Utils::CopyBetweenHierarchies: unknown data type(") + dataType + ")"); + if (fromLevel->IsAvailable(fromLabel)) { + if (dataType == "RCP") { + // Normally, we should only do + // toLevel->Set(toLabel,fromLevel->Get >(fromLabel)); + // The logic below is meant to handle a special case when we + // repartition a processor away, leaving behind a RCP on + // on the level instead of an RCP + + auto tempOp = fromLevel->Get>(fromLabel); + auto tempMatrix = rcp_dynamic_cast(tempOp); + if (!tempMatrix.is_null()) + toLevel->Set(toLabel, tempMatrix); + else + toLevel->Set(toLabel, tempOp); + } + if (dataType == "RCP") { + toLevel->Set(toLabel, fromLevel->Get>(fromLabel)); } } } +} - // Adds the following non-serializable data (A,P,R,Nullspace,Coordinates) from level-specific sublist nonSerialList, - // calling AddNewLevel as appropriate. - template - void HierarchyUtils::AddNonSerializableDataToHierarchy(HierarchyManager& HM, Hierarchy& H, const ParameterList& nonSerialList) { - typedef typename Xpetra::MultiVector::coordinateType, - LocalOrdinal, GlobalOrdinal, Node> realvaluedmultivector_type; +// Adds the following non-serializable data (A,P,R,Nullspace,Coordinates) from level-specific sublist nonSerialList, +// calling AddNewLevel as appropriate. +template +void HierarchyUtils::AddNonSerializableDataToHierarchy(HierarchyManager& HM, Hierarchy& H, const ParameterList& nonSerialList) { + typedef typename Xpetra::MultiVector::coordinateType, + LocalOrdinal, GlobalOrdinal, Node> + realvaluedmultivector_type; - for (ParameterList::ConstIterator nonSerialEntry = nonSerialList.begin(); nonSerialEntry != nonSerialList.end(); nonSerialEntry++) { - const std::string& levelName = nonSerialEntry->first; - // Check for match of the form "level X" where X is a positive integer - if (nonSerialList.isSublist(levelName) && levelName.find("level ") == 0 && levelName.size() > 6) { - int levelID = strtol(levelName.substr(6).c_str(), 0, 0); - if (levelID > 0) - { - // Do enough level adding so we can be sure to add the data to the right place - for (int i = H.GetNumLevels(); i <= levelID; i++) - H.AddNewLevel(); - } - RCP level = H.GetLevel(levelID); + for (ParameterList::ConstIterator nonSerialEntry = nonSerialList.begin(); nonSerialEntry != nonSerialList.end(); nonSerialEntry++) { + const std::string& levelName = nonSerialEntry->first; + // Check for match of the form "level X" where X is a positive integer + if (nonSerialList.isSublist(levelName) && levelName.find("level ") == 0 && levelName.size() > 6) { + int levelID = strtol(levelName.substr(6).c_str(), 0, 0); + if (levelID > 0) { + // Do enough level adding so we can be sure to add the data to the right place + for (int i = H.GetNumLevels(); i <= levelID; i++) + H.AddNewLevel(); + } + RCP level = H.GetLevel(levelID); - RCP M = Teuchos::rcp_dynamic_cast(HM.GetFactoryManager(levelID)); - TEUCHOS_TEST_FOR_EXCEPTION(M.is_null(), Exceptions::InvalidArgument, "MueLu::Utils::AddNonSerializableDataToHierarchy: cannot get FactoryManager"); + RCP M = Teuchos::rcp_dynamic_cast(HM.GetFactoryManager(levelID)); + TEUCHOS_TEST_FOR_EXCEPTION(M.is_null(), Exceptions::InvalidArgument, "MueLu::Utils::AddNonSerializableDataToHierarchy: cannot get FactoryManager"); - // Grab the level sublist & loop over parameters - const ParameterList& levelList = nonSerialList.sublist(levelName); - for (ParameterList::ConstIterator levelListEntry = levelList.begin(); levelListEntry != levelList.end(); levelListEntry++) { - const std::string& name = levelListEntry->first; - TEUCHOS_TEST_FOR_EXCEPTION(name != "A" && name != "P" && name != "R" && name != "K" && name != "M" && name != "Mdiag" && - name != "D0" && name != "Dk_1" &&name != "Dk_2" && - name != "Mk_one" && name != "Mk_1_one" && name != "M1_beta" && name != "M1_alpha" && - name != "invMk_1_invBeta" && name != "invMk_2_invAlpha" && - name != "M1" && name != "Ms" && name != "M0inv" && - name != "Pnodal" && name != "NodeMatrix" && name != "NodeAggMatrix" && - name != "Nullspace" && name != "Coordinates" && name != "pcoarsen: element to node map" && - name != "Node Comm" && name != "DualNodeID2PrimalNodeID" && name != "Primal interface DOF map" && - !IsParamMuemexVariable(name), Exceptions::InvalidArgument, - std::string("MueLu::Utils::AddNonSerializableDataToHierarchy: parameter list contains unknown data type(") + name + ")"); + // Grab the level sublist & loop over parameters + const ParameterList& levelList = nonSerialList.sublist(levelName); + for (ParameterList::ConstIterator levelListEntry = levelList.begin(); levelListEntry != levelList.end(); levelListEntry++) { + const std::string& name = levelListEntry->first; + TEUCHOS_TEST_FOR_EXCEPTION(name != "A" && name != "P" && name != "R" && name != "K" && name != "M" && name != "Mdiag" && + name != "D0" && name != "Dk_1" && name != "Dk_2" && + name != "Mk_one" && name != "Mk_1_one" && name != "M1_beta" && name != "M1_alpha" && + name != "invMk_1_invBeta" && name != "invMk_2_invAlpha" && + name != "M1" && name != "Ms" && name != "M0inv" && + name != "Pnodal" && name != "NodeMatrix" && name != "NodeAggMatrix" && + name != "Nullspace" && name != "Coordinates" && name != "pcoarsen: element to node map" && + name != "Node Comm" && name != "DualNodeID2PrimalNodeID" && name != "Primal interface DOF map" && + !IsParamMuemexVariable(name), + Exceptions::InvalidArgument, + std::string("MueLu::Utils::AddNonSerializableDataToHierarchy: parameter list contains unknown data type(") + name + ")"); - // Get a valid communicator and lib - RCP > comm; - if (!level->GetComm().is_null()) - comm = level->GetComm(); - else if (level->IsAvailable("A")) { + // Get a valid communicator and lib + RCP> comm; + if (!level->GetComm().is_null()) + comm = level->GetComm(); + else if (level->IsAvailable("A")) { + RCP mat; + level->Get("A", mat); + comm = mat->getMap()->getComm(); + } else { + RCP level0 = H.GetLevel(0); + if (!level0->GetComm().is_null()) + comm = level0->GetComm(); + else { RCP mat; - level->Get("A", mat); + level0->Get("A", mat); comm = mat->getMap()->getComm(); - } else { - RCP level0 = H.GetLevel(0); - if (!level0->GetComm().is_null()) - comm = level0->GetComm(); - else { - RCP mat; - level0->Get("A", mat); - comm = mat->getMap()->getComm(); - } } - Xpetra::UnderlyingLib lib = level->lib(); + } + Xpetra::UnderlyingLib lib = level->lib(); + + if (name == "A") { + RCP mat; + if (levelListEntry->second.isType()) + // We might also want to read maps here. + mat = Xpetra::IO::Read(Teuchos::getValue(levelListEntry->second), lib, comm); + else + mat = Teuchos::getValue>(levelListEntry->second); + level->Set(name, mat, NoFactory::get()); + M->SetFactory(name, NoFactory::getRCP()); // TAW: not sure about this: be aware that this affects all levels + // However, A is accessible through NoFactory anyway, so it should + // be fine here. + } else if (name == "P" || name == "R" || name == "K" || name == "M") { + if (levelListEntry->second.isType>()) { + RCP mat; + mat = Teuchos::getValue>(levelListEntry->second); + + RCP fact = M->GetFactory(name); + level->AddKeepFlag(name, fact.get(), MueLu::UserData); + level->Set(name, mat, fact.get()); - if (name == "A") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, mat, NoFactory::get()); + } else { RCP mat; if (levelListEntry->second.isType()) // We might also want to read maps here. - mat = Xpetra::IO::Read(Teuchos::getValue(levelListEntry->second), lib, comm); + mat = Xpetra::IO::Read(Teuchos::getValue(levelListEntry->second), lib, comm); else - mat = Teuchos::getValue > (levelListEntry->second); - level->Set(name, mat, NoFactory::get()); - M->SetFactory(name, NoFactory::getRCP()); // TAW: not sure about this: be aware that this affects all levels - // However, A is accessible through NoFactory anyway, so it should - // be fine here. - } - else if(name == "P" || name == "R" || name == "K" || name == "M" ) { - if (levelListEntry->second.isType >()) { - RCP mat; - mat = Teuchos::getValue > (levelListEntry->second); - - RCP fact = M->GetFactory(name); - level->AddKeepFlag(name,fact.get(),MueLu::UserData); - level->Set(name, mat, fact.get()); - - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, mat, NoFactory::get()); - } else { - RCP mat; - if (levelListEntry->second.isType()) - // We might also want to read maps here. - mat = Xpetra::IO::Read(Teuchos::getValue(levelListEntry->second), lib, comm); - else - mat = Teuchos::getValue > (levelListEntry->second); + mat = Teuchos::getValue>(levelListEntry->second); - RCP fact = M->GetFactory(name); - level->AddKeepFlag(name,fact.get(),MueLu::UserData); - level->Set(name, mat, fact.get()); + RCP fact = M->GetFactory(name); + level->AddKeepFlag(name, fact.get(), MueLu::UserData); + level->Set(name, mat, fact.get()); - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, mat, NoFactory::get()); - } + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, mat, NoFactory::get()); } - else if (name == "D0" || name == "Dk_1" ||name == "Dk_2" || + } else if (name == "D0" || name == "Dk_1" || name == "Dk_2" || name == "Mk_one" || name == "Mk_1_one" || name == "M1_beta" || name == "M1_alpha" || name == "invMk_1_invBeta" || name == "invMk_2_invAlpha" || name == "M1" || name == "Ms" || name == "M0inv" || name == "Pnodal" || name == "NodeMatrix" || name == "NodeAggMatrix") { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - if (levelListEntry->second.isType >()) - level->Set(name, Teuchos::getValue > (levelListEntry->second), NoFactory::get()); - else - level->Set(name, Teuchos::getValue > (levelListEntry->second), NoFactory::get()); - } - else if (name == "Mdiag") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue > (levelListEntry->second), NoFactory::get()); - } - else if (name == "Nullspace") - { - RCP vec; - if (levelListEntry->second.isType()) { - TEUCHOS_ASSERT(level->IsAvailable("A")); - RCP mat; - level->Get("A", mat); - auto map = mat->getMap(); - vec = Xpetra::IO::ReadMultiVector(Teuchos::getValue(levelListEntry->second), map); - } else - vec = Teuchos::getValue > (levelListEntry->second); - level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); - level->Set(name, vec, NoFactory::get()); - //M->SetFactory(name, NoFactory::getRCP()); // TAW: generally it is a bad idea to overwrite the factory manager data here - // One should do this only in very special cases - } - else if(name == "Coordinates") //Scalar of Coordinates MV is always double - { - RCP vec; - if (levelListEntry->second.isType()) { - TEUCHOS_ASSERT(level->IsAvailable("A")); - RCP mat; - level->Get("A", mat); - size_t blkSize = mat->GetFixedBlockSize(); - RCP nodeMap = mat->getRowMap(); - if (blkSize > 1) { - // Create a nodal map, as coordinates have not been expanded to a DOF map yet. - RCP dofMap = mat->getRowMap(); - GO indexBase = dofMap->getIndexBase(); - size_t numLocalDOFs = dofMap->getLocalNumElements(); - TEUCHOS_TEST_FOR_EXCEPTION(numLocalDOFs % blkSize, Exceptions::RuntimeError, - "HierarchyUtils: block size (" << blkSize << ") is incompatible with the number of local dofs in a row map (" << numLocalDOFs); - ArrayView GIDs = dofMap->getLocalElementList(); + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + if (levelListEntry->second.isType>()) + level->Set(name, Teuchos::getValue>(levelListEntry->second), NoFactory::get()); + else + level->Set(name, Teuchos::getValue>(levelListEntry->second), NoFactory::get()); + } else if (name == "Mdiag") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>(levelListEntry->second), NoFactory::get()); + } else if (name == "Nullspace") { + RCP vec; + if (levelListEntry->second.isType()) { + TEUCHOS_ASSERT(level->IsAvailable("A")); + RCP mat; + level->Get("A", mat); + auto map = mat->getMap(); + vec = Xpetra::IO::ReadMultiVector(Teuchos::getValue(levelListEntry->second), map); + } else + vec = Teuchos::getValue>(levelListEntry->second); + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, vec, NoFactory::get()); + // M->SetFactory(name, NoFactory::getRCP()); // TAW: generally it is a bad idea to overwrite the factory manager data here + // One should do this only in very special cases + } else if (name == "Coordinates") // Scalar of Coordinates MV is always double + { + RCP vec; + if (levelListEntry->second.isType()) { + TEUCHOS_ASSERT(level->IsAvailable("A")); + RCP mat; + level->Get("A", mat); + size_t blkSize = mat->GetFixedBlockSize(); + RCP nodeMap = mat->getRowMap(); + if (blkSize > 1) { + // Create a nodal map, as coordinates have not been expanded to a DOF map yet. + RCP dofMap = mat->getRowMap(); + GO indexBase = dofMap->getIndexBase(); + size_t numLocalDOFs = dofMap->getLocalNumElements(); + TEUCHOS_TEST_FOR_EXCEPTION(numLocalDOFs % blkSize, Exceptions::RuntimeError, + "HierarchyUtils: block size (" << blkSize << ") is incompatible with the number of local dofs in a row map (" << numLocalDOFs); + ArrayView GIDs = dofMap->getLocalElementList(); - Array nodeGIDs(numLocalDOFs/blkSize); - for (size_t i = 0; i < numLocalDOFs; i += blkSize) - nodeGIDs[i/blkSize] = (GIDs[i] - indexBase)/blkSize + indexBase; + Array nodeGIDs(numLocalDOFs / blkSize); + for (size_t i = 0; i < numLocalDOFs; i += blkSize) + nodeGIDs[i / blkSize] = (GIDs[i] - indexBase) / blkSize + indexBase; - Xpetra::global_size_t INVALID = Teuchos::OrdinalTraits::invalid(); - nodeMap = MapFactory::Build(dofMap->lib(), INVALID, nodeGIDs(), indexBase, dofMap->getComm()); - } - vec = Xpetra::IO::coordinateType,LocalOrdinal,GlobalOrdinal,Node>::ReadMultiVector(Teuchos::getValue(levelListEntry->second), nodeMap); - } else - vec = Teuchos::getValue > (levelListEntry->second); - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, vec, NoFactory::get()); - //M->SetFactory(name, NoFactory::getRCP()); // TAW: generally it is a bad idea to overwrite the factory manager data here - } - else if(name == "Node Comm") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue > >(levelListEntry->second), NoFactory::get()); - } - else if(name == "DualNodeID2PrimalNodeID") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue>>(levelListEntry->second), NoFactory::get()); - } - else if(name == "Primal interface DOF map") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue>(levelListEntry->second), NoFactory::get()); - } + Xpetra::global_size_t INVALID = Teuchos::OrdinalTraits::invalid(); + nodeMap = MapFactory::Build(dofMap->lib(), INVALID, nodeGIDs(), indexBase, dofMap->getComm()); + } + vec = Xpetra::IO::coordinateType, LocalOrdinal, GlobalOrdinal, Node>::ReadMultiVector(Teuchos::getValue(levelListEntry->second), nodeMap); + } else + vec = Teuchos::getValue>(levelListEntry->second); + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, vec, NoFactory::get()); + // M->SetFactory(name, NoFactory::getRCP()); // TAW: generally it is a bad idea to overwrite the factory manager data here + } else if (name == "Node Comm") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>>(levelListEntry->second), NoFactory::get()); + } else if (name == "DualNodeID2PrimalNodeID") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>>(levelListEntry->second), NoFactory::get()); + } else if (name == "Primal interface DOF map") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>(levelListEntry->second), NoFactory::get()); + } #ifdef HAVE_MUELU_INTREPID2 - else if (name == "pcoarsen: element to node map") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue > >(levelListEntry->second), NoFactory::get()); - } + else if (name == "pcoarsen: element to node map") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>>(levelListEntry->second), NoFactory::get()); + } #endif - else + else #ifdef HAVE_MUELU_MATLAB - { - //Custom variable for Muemex - size_t typeNameStart = name.find_first_not_of(' '); - size_t typeNameEnd = name.find(' ', typeNameStart); - std::string typeName = name.substr(typeNameStart, typeNameEnd - typeNameStart); - std::transform(typeName.begin(), typeName.end(), typeName.begin(), ::tolower); - level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); - if(typeName == "matrix") - level->Set(name, Teuchos::getValue >(levelListEntry->second), NoFactory::get()); - else if(typeName == "multivector") - level->Set(name, Teuchos::getValue >(levelListEntry->second), NoFactory::get()); - else if(typeName == "map") - level->Set(name, Teuchos::getValue > >(levelListEntry->second), NoFactory::get()); - else if(typeName == "ordinalvector") - level->Set(name, Teuchos::getValue > >(levelListEntry->second), NoFactory::get()); - else if(typeName == "scalar") - level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); - else if(typeName == "double") - level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); - else if(typeName == "complex") - level->Set(name, Teuchos::getValue >(levelListEntry->second), NoFactory::get()); - else if(typeName == "int") - level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); - else if(typeName == "string") - level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); - } + { + // Custom variable for Muemex + size_t typeNameStart = name.find_first_not_of(' '); + size_t typeNameEnd = name.find(' ', typeNameStart); + std::string typeName = name.substr(typeNameStart, typeNameEnd - typeNameStart); + std::transform(typeName.begin(), typeName.end(), typeName.begin(), ::tolower); + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + if (typeName == "matrix") + level->Set(name, Teuchos::getValue>(levelListEntry->second), NoFactory::get()); + else if (typeName == "multivector") + level->Set(name, Teuchos::getValue>(levelListEntry->second), NoFactory::get()); + else if (typeName == "map") + level->Set(name, Teuchos::getValue>>(levelListEntry->second), NoFactory::get()); + else if (typeName == "ordinalvector") + level->Set(name, Teuchos::getValue>>(levelListEntry->second), NoFactory::get()); + else if (typeName == "scalar") + level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); + else if (typeName == "double") + level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); + else if (typeName == "complex") + level->Set(name, Teuchos::getValue>(levelListEntry->second), NoFactory::get()); + else if (typeName == "int") + level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); + else if (typeName == "string") + level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); + } #else - { - throw std::runtime_error("Invalid non-serializable data on list"); - } -#endif + { + throw std::runtime_error("Invalid non-serializable data on list"); } - } else if (nonSerialList.isSublist(levelName) && levelName.find("user data") != std::string::npos) { - // So far only put data on level 0 - int levelID = 0; - RCP level = H.GetLevel(levelID); +#endif + } + } else if (nonSerialList.isSublist(levelName) && levelName.find("user data") != std::string::npos) { + // So far only put data on level 0 + int levelID = 0; + RCP level = H.GetLevel(levelID); - RCP M = Teuchos::rcp_dynamic_cast(HM.GetFactoryManager(levelID)); - TEUCHOS_TEST_FOR_EXCEPTION(M.is_null(), Exceptions::InvalidArgument, "MueLu::Utils::AddNonSerializableDataToHierarchy: cannot get FactoryManager"); + RCP M = Teuchos::rcp_dynamic_cast(HM.GetFactoryManager(levelID)); + TEUCHOS_TEST_FOR_EXCEPTION(M.is_null(), Exceptions::InvalidArgument, "MueLu::Utils::AddNonSerializableDataToHierarchy: cannot get FactoryManager"); - // Grab the user data sublist & loop over parameters - const ParameterList& userList = nonSerialList.sublist(levelName); - for (ParameterList::ConstIterator userListEntry = userList.begin(); userListEntry != userList.end(); userListEntry++) { - const std::string& name = userListEntry->first; - TEUCHOS_TEST_FOR_EXCEPTION(name != "P" && name != "R" && name != "K" && name != "M" && name != "Mdiag" && - name != "D0" && name != "Dk_1" &&name != "Dk_2" && - name != "Mk_one" && name != "Mk_1_one" && name != "M1_beta" && name != "M1_alpha" && - name != "invMk_1_invBeta" && name != "invMk_2_invAlpha" && - name != "M1" && name != "Ms" && name != "M0inv" && - name != "NodeMatrix" && - name != "Nullspace" && name != "Coordinates" && name != "pcoarsen: element to node map" && - name != "Node Comm" && name != "DualNodeID2PrimalNodeID" && name != "Primal interface DOF map" && - name != "output stream" && - !IsParamValidVariable(name), Exceptions::InvalidArgument, - std::string("MueLu::Utils::AddNonSerializableDataToHierarchy: user data parameter list contains unknown data type (") + name + ")"); - if( name == "P" || name == "R" || name == "K" || name == "M" || - name == "D0" || name == "Dk_1" ||name == "Dk_2" || - name == "Mk_one" || name == "Mk_1_one" || name == "M1_beta" || name == "M1_alpha" || - name == "invMk_1_invBeta" || name == "invMk_2_invAlpha" || - name == "M1" || name == "Ms" || name == "M0inv" || - name == "NodeMatrix" ) { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue > (userListEntry->second), NoFactory::get()); - } else if (name == "Mdiag") { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - } else if (name == "Nullspace") { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - //M->SetFactory(name, NoFactory::getRCP()); // TAW: generally it is a bad idea to overwrite the factory manager data here - // One should do this only in very special cases - } else if(name == "Coordinates") {//Scalar of Coordinates MV is always double - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - } - else if(name == "Node Comm") { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue > >(userListEntry->second), NoFactory::get()); - } - else if(name == "DualNodeID2PrimalNodeID") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue>>(userListEntry->second), NoFactory::get()); - } - else if(name == "Primal interface DOF map") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue>(userListEntry->second), NoFactory::get()); - } + // Grab the user data sublist & loop over parameters + const ParameterList& userList = nonSerialList.sublist(levelName); + for (ParameterList::ConstIterator userListEntry = userList.begin(); userListEntry != userList.end(); userListEntry++) { + const std::string& name = userListEntry->first; + TEUCHOS_TEST_FOR_EXCEPTION(name != "P" && name != "R" && name != "K" && name != "M" && name != "Mdiag" && + name != "D0" && name != "Dk_1" && name != "Dk_2" && + name != "Mk_one" && name != "Mk_1_one" && name != "M1_beta" && name != "M1_alpha" && + name != "invMk_1_invBeta" && name != "invMk_2_invAlpha" && + name != "M1" && name != "Ms" && name != "M0inv" && + name != "NodeMatrix" && + name != "Nullspace" && name != "Coordinates" && name != "pcoarsen: element to node map" && + name != "Node Comm" && name != "DualNodeID2PrimalNodeID" && name != "Primal interface DOF map" && + name != "output stream" && + !IsParamValidVariable(name), + Exceptions::InvalidArgument, + std::string("MueLu::Utils::AddNonSerializableDataToHierarchy: user data parameter list contains unknown data type (") + name + ")"); + if (name == "P" || name == "R" || name == "K" || name == "M" || + name == "D0" || name == "Dk_1" || name == "Dk_2" || + name == "Mk_one" || name == "Mk_1_one" || name == "M1_beta" || name == "M1_alpha" || + name == "invMk_1_invBeta" || name == "invMk_2_invAlpha" || + name == "M1" || name == "Ms" || name == "M0inv" || + name == "NodeMatrix") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + } else if (name == "Mdiag") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + } else if (name == "Nullspace") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + // M->SetFactory(name, NoFactory::getRCP()); // TAW: generally it is a bad idea to overwrite the factory manager data here + // One should do this only in very special cases + } else if (name == "Coordinates") { // Scalar of Coordinates MV is always double + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + } else if (name == "Node Comm") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>>(userListEntry->second), NoFactory::get()); + } else if (name == "DualNodeID2PrimalNodeID") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>>(userListEntry->second), NoFactory::get()); + } else if (name == "Primal interface DOF map") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + } #ifdef HAVE_MUELU_INTREPID2 - else if (name == "pcoarsen: element to node map") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue > >(userListEntry->second), NoFactory::get()); - } + else if (name == "pcoarsen: element to node map") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>>(userListEntry->second), NoFactory::get()); + } #endif - else if (name == "output stream") - { - H.SetMueLuOStream(Teuchos::getValue >(userListEntry->second)); - } - else { - //Custom variable - size_t typeNameStart = name.find_first_not_of(' '); - size_t typeNameEnd = name.find(' ', typeNameStart); - std::string typeName = name.substr(typeNameStart, typeNameEnd - typeNameStart); - size_t varNameStart = name.find_first_not_of(' ', typeNameEnd); - std::string varName = name.substr(varNameStart, name.size()); - std::transform(typeName.begin(), typeName.end(), typeName.begin(), ::tolower); - level->AddKeepFlag(varName, NoFactory::get(), MueLu::UserData); - if(typeName == "matrix") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else if(typeName == "multivector") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else if(typeName == "vector") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else if(typeName == "map") - level->Set(varName, Teuchos::getValue > >(userListEntry->second), NoFactory::get()); - else if(typeName == "ordinalvector") - level->Set(varName, Teuchos::getValue > >(userListEntry->second), NoFactory::get()); - else if(typeName == "scalar") - level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); - else if(typeName == "double") - level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); - else if(typeName == "complex") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else if(typeName == "int") - level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); - else if(typeName == "string") - level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); - else if(typeName == "array") - level->Set(varName, Teuchos::getValue > (userListEntry->second), NoFactory::get()); - else if(typeName == "array") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else if(typeName == "arrayrcp") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else if(typeName == "arrayrcp") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else - throw std::runtime_error("Invalid non-serializable data on list"); - } + else if (name == "output stream") { + H.SetMueLuOStream(Teuchos::getValue>(userListEntry->second)); + } else { + // Custom variable + size_t typeNameStart = name.find_first_not_of(' '); + size_t typeNameEnd = name.find(' ', typeNameStart); + std::string typeName = name.substr(typeNameStart, typeNameEnd - typeNameStart); + size_t varNameStart = name.find_first_not_of(' ', typeNameEnd); + std::string varName = name.substr(varNameStart, name.size()); + std::transform(typeName.begin(), typeName.end(), typeName.begin(), ::tolower); + level->AddKeepFlag(varName, NoFactory::get(), MueLu::UserData); + if (typeName == "matrix") + level->Set(varName, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + else if (typeName == "multivector") + level->Set(varName, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + else if (typeName == "vector") + level->Set(varName, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + else if (typeName == "map") + level->Set(varName, Teuchos::getValue>>(userListEntry->second), NoFactory::get()); + else if (typeName == "ordinalvector") + level->Set(varName, Teuchos::getValue>>(userListEntry->second), NoFactory::get()); + else if (typeName == "scalar") + level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); + else if (typeName == "double") + level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); + else if (typeName == "complex") + level->Set(varName, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + else if (typeName == "int") + level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); + else if (typeName == "string") + level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); + else if (typeName == "array") + level->Set(varName, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + else if (typeName == "array") + level->Set(varName, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + else if (typeName == "arrayrcp") + level->Set(varName, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + else if (typeName == "arrayrcp") + level->Set(varName, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + else + throw std::runtime_error("Invalid non-serializable data on list"); } - // level->print(std::cout, MueLu::Debug); } + // level->print(std::cout, MueLu::Debug); } } -} // namespace MueLu +} +} // namespace MueLu #define MUELU_HIERARCHY_UTILS_SHORT -#endif // MUELU_HIERARCHYHELPERS_DEF_HPP +#endif // MUELU_HIERARCHYHELPERS_DEF_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_Hierarchy_decl.hpp b/packages/muelu/src/MueCentral/MueLu_Hierarchy_decl.hpp index 4c345a4e3c49..22b0f8f41843 100644 --- a/packages/muelu/src/MueCentral/MueLu_Hierarchy_decl.hpp +++ b/packages/muelu/src/MueCentral/MueLu_Hierarchy_decl.hpp @@ -63,7 +63,7 @@ #include "MueLu_Types.hpp" #include "MueLu_FactoryBase_fwd.hpp" -#include "MueLu_FactoryManager.hpp" // no fwd declaration because constructor of FactoryManager is used as a default parameter of Setup() +#include "MueLu_FactoryManager.hpp" // no fwd declaration because constructor of FactoryManager is used as a default parameter of Setup() #include "MueLu_KeepType.hpp" #include "MueLu_Level_fwd.hpp" #include "MueLu_MasterList.hpp" @@ -76,372 +76,377 @@ namespace MueLu { - enum class ConvergenceStatus { - Converged, - Unconverged, - Undefined - }; - - /*! - @class Hierarchy - @brief Provides methods to build a multigrid hierarchy and apply multigrid cycles. - - Allows users to manually populate operators at different levels within - a multigrid method and push them into the hierarchy via SetLevel() - and/or to supply factories for automatically generating prolongators, - restrictors, and coarse level discretizations. Additionally, this class contains - an apply method that supports V and W cycles. - */ - template - class Hierarchy : public BaseClass { +enum class ConvergenceStatus { + Converged, + Unconverged, + Undefined +}; + +/*! + @class Hierarchy + @brief Provides methods to build a multigrid hierarchy and apply multigrid cycles. + + Allows users to manually populate operators at different levels within + a multigrid method and push them into the hierarchy via SetLevel() + and/or to supply factories for automatically generating prolongators, + restrictors, and coarse level discretizations. Additionally, this class contains + an apply method that supports V and W cycles. +*/ +template +class Hierarchy : public BaseClass { #undef MUELU_HIERARCHY_SHORT #include "MueLu_UseShortNames.hpp" - typedef Teuchos::ScalarTraits STS; - typedef typename STS::magnitudeType MagnitudeType; - - //! Data struct for defining stopping criteria of multigrid iteration - struct ConvData { - ConvData() : maxIts_(1), tol_(-STS::magnitude(STS::one())) { } - ConvData(LO maxIts) : maxIts_(maxIts), tol_(-STS::magnitude(STS::one())) { } - ConvData(MagnitudeType tol) : maxIts_(10000), tol_(tol) { } - ConvData(std::pair p) : maxIts_(p.first), tol_(p.second) { } - - LO maxIts_; - MagnitudeType tol_; - }; - - public: - - //! @name Constructors/Destructors - //@{ - - //! Default constructor. - Hierarchy(); - //! Constructor that labels the hierarchy. - Hierarchy(const std::string& label); - - //! Constructor - Hierarchy(const RCP & A); - - //! Constructor - Hierarchy(const RCP & A, const std::string& label); - - //! Destructor. - virtual ~Hierarchy() { } - - //@} - - //! @name Set/Get Methods. - //@{ - - //! - static CycleType GetDefaultCycle() { return MasterList::getDefault("cycle type") == "V" ? VCYCLE : WCYCLE; } - static int GetDefaultCycleStartLevel() { return MasterList::getDefault("W cycle start level"); } - static bool GetDefaultImplicitTranspose() { return MasterList::getDefault("transpose: use implicit"); } - static bool GetDefaultFuseProlongationAndUpdate() { return MasterList::getDefault("fuse prolongation and update"); } - static Xpetra::global_size_t GetDefaultMaxCoarseSize() { return MasterList::getDefault("coarse: max size"); } - static int GetDefaultMaxLevels() { return MasterList::getDefault("max levels"); } - static bool GetDefaultPRrebalance() { return MasterList::getDefault("repartition: rebalance P and R"); } - - Xpetra::global_size_t GetMaxCoarseSize() const { return maxCoarseSize_; } - bool GetImplicitTranspose() const { return implicitTranspose_; } - bool GetFuseProlongationAndUpdate() const { return fuseProlongationAndUpdate_; } - - void SetMaxCoarseSize(Xpetra::global_size_t maxCoarseSize) { maxCoarseSize_ = maxCoarseSize; } - void SetPRrebalance(bool doPRrebalance) { doPRrebalance_ = doPRrebalance; } - void SetPRViaCopyrebalance(bool doPRViaCopyrebalance) { doPRViaCopyrebalance_ = doPRViaCopyrebalance; } - void SetImplicitTranspose(const bool& implicit) { implicitTranspose_ = implicit; } - void SetFuseProlongationAndUpdate(const bool& fuse) { fuseProlongationAndUpdate_ = fuse; } - - //@} - - //! - - template - friend class Hierarchy; - - private: - int LastLevelID() const { return Levels_.size() - 1; } - void DumpCurrentGraph(int level) const; - - public: - - //! Add a level at the end of the hierarchy - void AddLevel(const RCP & level); - - //! Add a new level at the end of the hierarchy - void AddNewLevel(); - - //! Retrieve a certain level from hierarchy. - RCP & GetLevel(const int levelID = 0); - - int GetNumLevels() const; - int GetGlobalNumLevels() const; - - MagnitudeType GetRate() const { return rate_; } + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType MagnitudeType; + + //! Data struct for defining stopping criteria of multigrid iteration + struct ConvData { + ConvData() + : maxIts_(1) + , tol_(-STS::magnitude(STS::one())) {} + ConvData(LO maxIts) + : maxIts_(maxIts) + , tol_(-STS::magnitude(STS::one())) {} + ConvData(MagnitudeType tol) + : maxIts_(10000) + , tol_(tol) {} + ConvData(std::pair p) + : maxIts_(p.first) + , tol_(p.second) {} + + LO maxIts_; + MagnitudeType tol_; + }; - // This function is global - double GetOperatorComplexity() const; + public: + //! @name Constructors/Destructors + //@{ - // This function is global - double GetSmootherComplexity() const; + //! Default constructor. + Hierarchy(); + //! Constructor that labels the hierarchy. + Hierarchy(const std::string& label); - //! Helper function - void CheckLevel(Level& level, int levelID); + //! Constructor + Hierarchy(const RCP& A); + + //! Constructor + Hierarchy(const RCP& A, const std::string& label); + + //! Destructor. + virtual ~Hierarchy() {} + + //@} + + //! @name Set/Get Methods. + //@{ + + //! + static CycleType GetDefaultCycle() { return MasterList::getDefault("cycle type") == "V" ? VCYCLE : WCYCLE; } + static int GetDefaultCycleStartLevel() { return MasterList::getDefault("W cycle start level"); } + static bool GetDefaultImplicitTranspose() { return MasterList::getDefault("transpose: use implicit"); } + static bool GetDefaultFuseProlongationAndUpdate() { return MasterList::getDefault("fuse prolongation and update"); } + static Xpetra::global_size_t GetDefaultMaxCoarseSize() { return MasterList::getDefault("coarse: max size"); } + static int GetDefaultMaxLevels() { return MasterList::getDefault("max levels"); } + static bool GetDefaultPRrebalance() { return MasterList::getDefault("repartition: rebalance P and R"); } + + Xpetra::global_size_t GetMaxCoarseSize() const { return maxCoarseSize_; } + bool GetImplicitTranspose() const { return implicitTranspose_; } + bool GetFuseProlongationAndUpdate() const { return fuseProlongationAndUpdate_; } + + void SetMaxCoarseSize(Xpetra::global_size_t maxCoarseSize) { maxCoarseSize_ = maxCoarseSize; } + void SetPRrebalance(bool doPRrebalance) { doPRrebalance_ = doPRrebalance; } + void SetPRViaCopyrebalance(bool doPRViaCopyrebalance) { doPRViaCopyrebalance_ = doPRViaCopyrebalance; } + void SetImplicitTranspose(const bool& implicit) { implicitTranspose_ = implicit; } + void SetFuseProlongationAndUpdate(const bool& fuse) { fuseProlongationAndUpdate_ = fuse; } + + //@} + + //! + + template + friend class Hierarchy; + + private: + int LastLevelID() const { return Levels_.size() - 1; } + void DumpCurrentGraph(int level) const; + + public: + //! Add a level at the end of the hierarchy + void AddLevel(const RCP& level); + + //! Add a new level at the end of the hierarchy + void AddNewLevel(); + + //! Retrieve a certain level from hierarchy. + RCP& GetLevel(const int levelID = 0); + + int GetNumLevels() const; + int GetGlobalNumLevels() const; + + MagnitudeType GetRate() const { return rate_; } + + // This function is global + double GetOperatorComplexity() const; + + // This function is global + double GetSmootherComplexity() const; + + //! Helper function + void CheckLevel(Level& level, int levelID); + + void SetMatvecParams(RCP matvecParams); + + //! Multi-level setup phase: build a new level of the hierarchy. + /*! This method is aimed to be used in a loop building the hierarchy level by level. See Hierarchy::Setup(manager, startLevel, numDesiredLevels) for an example of usage. + * + * @param coarseLevelID ID of the level to be built. + * @param fineLevelManager defines how to build missing data of the fineLevel (example: aggregates) + * @param coarseLevelManager defines how to build the level + * @param nextLevelManager defines how the next coarse level will be built. This is used to post corresponding request before building the coarse level to keep useful data. + + CoarseLevel is considered to be the last level if: + - input parameter isLastLevel == true + or + - Ac->getRowMap()->getGlobalNumElements() <= maxCoarseSize_ + Method return true if CoarseLevel is the last level. + + Pre-condition: + * FineLevel: + - must have kept useful data (TODO: not tested yet) + - must be Teuchos::null when Setup is called for finest level (Setup then automatically calls Request for "Smoother" and "CoarseSolver") + * CoarseLevel: + - already allocated (using Hierarchy::AddLevel()) + - requests already posted + (exception: for finest level (=fineLevelManager==null) requests are called within setup routine) + * NextLevel: + - do not need to be allocate but could (FIXME: will be deleted if lastlevel...). + - should be null when Setup is called for last level + + Post-condition: + * FineLevel: + - temporary data have been used and released (this condition is not tested) + * CoarseLevel: + - built, requests have been used + - if it is the last level (due to input parameter isLastLevel or getGlobalNumElements() <= maxCoarseSize_), + then the coarse solver factory of the factory manager have been used instead of the smoother factory. + * NextLevel: + If input parameter isLastLevel == false: + - have been allocated + - requests already posted. + */ + bool Setup(int coarseLevelID, const RCP fineLevelManager /* = Teuchos::null */, const RCP coarseLevelManager, + const RCP nextLevelManager = Teuchos::null); - void SetMatvecParams(RCP matvecParams); + //! + void Setup(const FactoryManagerBase& manager = FactoryManager(), int startLevel = 0, int numDesiredLevels = GetDefaultMaxLevels()); - //! Multi-level setup phase: build a new level of the hierarchy. - /*! This method is aimed to be used in a loop building the hierarchy level by level. See Hierarchy::Setup(manager, startLevel, numDesiredLevels) for an example of usage. - * - * @param coarseLevelID ID of the level to be built. - * @param fineLevelManager defines how to build missing data of the fineLevel (example: aggregates) - * @param coarseLevelManager defines how to build the level - * @param nextLevelManager defines how the next coarse level will be built. This is used to post corresponding request before building the coarse level to keep useful data. + void SetupRe(); - CoarseLevel is considered to be the last level if: - - input parameter isLastLevel == true - or - - Ac->getRowMap()->getGlobalNumElements() <= maxCoarseSize_ - Method return true if CoarseLevel is the last level. + //! Clear impermanent data from previous setup + void Clear(int startLevel = 0); + void ExpertClear(); - Pre-condition: - * FineLevel: - - must have kept useful data (TODO: not tested yet) - - must be Teuchos::null when Setup is called for finest level (Setup then automatically calls Request for "Smoother" and "CoarseSolver") - * CoarseLevel: - - already allocated (using Hierarchy::AddLevel()) - - requests already posted - (exception: for finest level (=fineLevelManager==null) requests are called within setup routine) - * NextLevel: - - do not need to be allocate but could (FIXME: will be deleted if lastlevel...). - - should be null when Setup is called for last level + //! Returns multigrid cycle type (supports VCYCLE and WCYCLE) + CycleType GetCycle() const { return Cycle_; } - Post-condition: - * FineLevel: - - temporary data have been used and released (this condition is not tested) - * CoarseLevel: - - built, requests have been used - - if it is the last level (due to input parameter isLastLevel or getGlobalNumElements() <= maxCoarseSize_), - then the coarse solver factory of the factory manager have been used instead of the smoother factory. - * NextLevel: - If input parameter isLastLevel == false: - - have been allocated - - requests already posted. - */ - bool Setup(int coarseLevelID, const RCP fineLevelManager /* = Teuchos::null */, const RCP coarseLevelManager, - const RCP nextLevelManager = Teuchos::null); + //! Supports VCYCLE and WCYCLE types. + void SetCycle(CycleType Cycle) { Cycle_ = Cycle; } - //! - void Setup(const FactoryManagerBase& manager = FactoryManager(), int startLevel = 0, int numDesiredLevels = GetDefaultMaxLevels()); + void SetCycleStartLevel(int cycleStart) { WCycleStartLevel_ = cycleStart; } - void SetupRe(); + //! Specify damping factor alpha such that x = x + alpha*P*c, where c is the coarse grid correction. + void SetProlongatorScalingFactor(double scalingFactor) { scalingFactor_ = scalingFactor; } - //! Clear impermanent data from previous setup - void Clear(int startLevel = 0); - void ExpertClear(); + /*! + @brief Apply the multigrid preconditioner. - //! Returns multigrid cycle type (supports VCYCLE and WCYCLE) - CycleType GetCycle() const { return Cycle_; } + In theory, more general cycle types than just V- and W-cycles are possible. However, + the enumerated type CycleType would have to be extended. - //! Supports VCYCLE and WCYCLE types. - void SetCycle(CycleType Cycle) { Cycle_ = Cycle; } + @param B right-hand side of linear problem + @param X initial and final (approximate) solution of linear problem + @param ConvData struct which stores convergence criteria (maximum number of multigrid iterations or stopping tolerance) + @param InitialGuessIsZero Indicates whether the initial guess is zero + @param startLevel index of starting level to build multigrid hierarchy (default = 0) + */ + ConvergenceStatus Iterate(const MultiVector& B, MultiVector& X, ConvData conv = ConvData(), + bool InitialGuessIsZero = false, LO startLevel = 0); - void SetCycleStartLevel(int cycleStart) { WCycleStartLevel_ = cycleStart; } + /*! + @brief Print matrices in the multigrid hierarchy to file. - //! Specify damping factor alpha such that x = x + alpha*P*c, where c is the coarse grid correction. - void SetProlongatorScalingFactor(double scalingFactor) { scalingFactor_ = scalingFactor; } + @param[in] start start level + @param[in] end end level - /*! - @brief Apply the multigrid preconditioner. + Default behavior is to print system and transfer matrices from the entire hierarchy. + Files are named "A_0.m", "P_1.m", "R_1.m", etc, and are in matrix market coordinate format. + */ + void Write(const LO& start = -1, const LO& end = -1, const std::string& suffix = ""); - In theory, more general cycle types than just V- and W-cycles are possible. However, - the enumerated type CycleType would have to be extended. + //@} - @param B right-hand side of linear problem - @param X initial and final (approximate) solution of linear problem - @param ConvData struct which stores convergence criteria (maximum number of multigrid iterations or stopping tolerance) - @param InitialGuessIsZero Indicates whether the initial guess is zero - @param startLevel index of starting level to build multigrid hierarchy (default = 0) - */ - ConvergenceStatus Iterate(const MultiVector& B, MultiVector& X, ConvData conv = ConvData(), - bool InitialGuessIsZero = false, LO startLevel = 0); + //! @name Permanent storage + //@{ - /*! - @brief Print matrices in the multigrid hierarchy to file. + //! Call Level::Keep(ename, factory) for each level of the Hierarchy. + void Keep(const std::string& ename, const FactoryBase* factory = NoFactory::get()); - @param[in] start start level - @param[in] end end level + //! Call Level::Delete(ename, factory) for each level of the Hierarchy. + void Delete(const std::string& ename, const FactoryBase* factory = NoFactory::get()); - Default behavior is to print system and transfer matrices from the entire hierarchy. - Files are named "A_0.m", "P_1.m", "R_1.m", etc, and are in matrix market coordinate format. - */ - void Write(const LO &start=-1, const LO &end=-1, const std::string &suffix=""); + //! Call Level::AddKeepFlag for each level of the Hierarchy. + void AddKeepFlag(const std::string& ename, const FactoryBase* factory = NoFactory::get(), KeepType keep = MueLu::Keep); - //@} + //! Call Level::RemoveKeepFlag for each level of the Hierarchy + void RemoveKeepFlag(const std::string& ename, const FactoryBase* factory, KeepType keep = MueLu::All); - //! @name Permanent storage - //@{ + //@} - //! Call Level::Keep(ename, factory) for each level of the Hierarchy. - void Keep(const std::string & ename, const FactoryBase* factory = NoFactory::get()); - - //! Call Level::Delete(ename, factory) for each level of the Hierarchy. - void Delete(const std::string& ename, const FactoryBase* factory = NoFactory::get()); - - //! Call Level::AddKeepFlag for each level of the Hierarchy. - void AddKeepFlag(const std::string & ename, const FactoryBase* factory = NoFactory::get(), KeepType keep = MueLu::Keep); - - //! Call Level::RemoveKeepFlag for each level of the Hierarchy - void RemoveKeepFlag(const std::string & ename, const FactoryBase* factory, KeepType keep = MueLu::All); - - //@} - - //! @name Overridden from Teuchos::Describable - //@{ - - //! Return a simple one-line description of this object. - std::string description() const; + //! @name Overridden from Teuchos::Describable + //@{ - /*! @brief Print the Hierarchy with some verbosity level to a FancyOStream object. + //! Return a simple one-line description of this object. + std::string description() const; - @param[in] out The Teuchos::FancyOstream. - @param[in] verbLevel Controls amount of output. - */ - void describe(Teuchos::FancyOStream& out, const VerbLevel verbLevel = Default) const; - void describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel = Teuchos::VERB_HIGH) const; + /*! @brief Print the Hierarchy with some verbosity level to a FancyOStream object. - //! Hierarchy::print is local hierarchy function, thus the statistics can be different from global ones - void print(std::ostream& out = std::cout, const VerbLevel verbLevel = (MueLu::Parameters | MueLu::Statistics0)) const; + @param[in] out The Teuchos::FancyOstream. + @param[in] verbLevel Controls amount of output. + */ + void describe(Teuchos::FancyOStream& out, const VerbLevel verbLevel = Default) const; + void describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel = Teuchos::VERB_HIGH) const; - /*! Indicate whether the multigrid method is a preconditioner or a solver. + //! Hierarchy::print is local hierarchy function, thus the statistics can be different from global ones + void print(std::ostream& out = std::cout, const VerbLevel verbLevel = (MueLu::Parameters | MueLu::Statistics0)) const; - This is used in conjunction with the verbosity level to determine whether the residuals can be printed. - */ - void IsPreconditioner(const bool flag); + /*! Indicate whether the multigrid method is a preconditioner or a solver. - //@} + This is used in conjunction with the verbosity level to determine whether the residuals can be printed. + */ + void IsPreconditioner(const bool flag); - void EnableGraphDumping(const std::string& filename, int levelID = 1) { - isDumpingEnabled_ = true; - dumpLevel_ = levelID; - dumpFile_ = filename; - } + //@} - void setlib(Xpetra::UnderlyingLib inlib) { lib_ = inlib; } - Xpetra::UnderlyingLib lib() { return lib_; } + void EnableGraphDumping(const std::string& filename, int levelID = 1) { + isDumpingEnabled_ = true; + dumpLevel_ = levelID; + dumpFile_ = filename; + } - //! force recreation of cached description_ next time description() is called: - void ResetDescription() { - description_ = ""; - } + void setlib(Xpetra::UnderlyingLib inlib) { lib_ = inlib; } + Xpetra::UnderlyingLib lib() { return lib_; } - void AllocateLevelMultiVectors(int numvecs, bool forceMapCheck=false); - void DeleteLevelMultiVectors(); + //! force recreation of cached description_ next time description() is called: + void ResetDescription() { + description_ = ""; + } - protected: - const RCP& GetLevelManager(const int levelID) const { - return levelManagers_[levelID]; - } + void AllocateLevelMultiVectors(int numvecs, bool forceMapCheck = false); + void DeleteLevelMultiVectors(); - private: - //! Copy constructor is not implemented. - Hierarchy(const Hierarchy &h); + protected: + const RCP& GetLevelManager(const int levelID) const { + return levelManagers_[levelID]; + } - //! Decide if the residual needs to be computed - bool IsCalculationOfResidualRequired(const LO startLevel, const ConvData& conv) const; + private: + //! Copy constructor is not implemented. + Hierarchy(const Hierarchy& h); - /*! - \brief Decide if the multigrid iteration is converged + //! Decide if the residual needs to be computed + bool IsCalculationOfResidualRequired(const LO startLevel, const ConvData& conv) const; - We judge convergence by comparing the current \c residualNorm - to the user given \c convergenceTolerance and then return the - appropriate \c ConvergenceStatus - */ - ConvergenceStatus IsConverged(const Teuchos::Array& residualNorm, - const MagnitudeType convergenceTolerance) const; + /*! + \brief Decide if the multigrid iteration is converged - //! Print \c residualNorm for this \c iteration to the screen - void PrintResidualHistory(const LO iteration, - const Teuchos::Array& residualNorm) const; + We judge convergence by comparing the current \c residualNorm + to the user given \c convergenceTolerance and then return the + appropriate \c ConvergenceStatus + */ + ConvergenceStatus IsConverged(const Teuchos::Array& residualNorm, + const MagnitudeType convergenceTolerance) const; - //! Compute the residual norm and print it depending on the verbosity level - ConvergenceStatus ComputeResidualAndPrintHistory(const Operator& A, const MultiVector& X, - const MultiVector& B, const LO iteration, - const LO startLevel, const ConvData& conv, MagnitudeType& previousResidualNorm); + //! Print \c residualNorm for this \c iteration to the screen + void PrintResidualHistory(const LO iteration, + const Teuchos::Array& residualNorm) const; - //! Container for Level objects - Array > Levels_; + //! Compute the residual norm and print it depending on the verbosity level + ConvergenceStatus ComputeResidualAndPrintHistory(const Operator& A, const MultiVector& X, + const MultiVector& B, const LO iteration, + const LO startLevel, const ConvData& conv, MagnitudeType& previousResidualNorm); - //! We replace coordinates GIDs to make them consistent with matrix GIDs, - //! even if user does not do that. Ideally, though, we should completely - //! remove any notion of coordinate GIDs, and deal only with LIDs, assuming - //! that they are consistent with matrix block IDs - void ReplaceCoordinateMap(Level& level); + //! Container for Level objects + Array > Levels_; - //! Minimum size of a matrix on any level. If we fall below that, we stop - //! the coarsening - Xpetra::global_size_t maxCoarseSize_; + //! We replace coordinates GIDs to make them consistent with matrix GIDs, + //! even if user does not do that. Ideally, though, we should completely + //! remove any notion of coordinate GIDs, and deal only with LIDs, assuming + //! that they are consistent with matrix block IDs + void ReplaceCoordinateMap(Level& level); - //! Potential speed up of the setup by skipping R construction, and using - //! transpose matrix-matrix product for RAP - bool implicitTranspose_; + //! Minimum size of a matrix on any level. If we fall below that, we stop + //! the coarsening + Xpetra::global_size_t maxCoarseSize_; - //! Potential speed up of the solve by fusing prolongation and update steps. - //! This can lead to more iterations to round-off error accumulation. - bool fuseProlongationAndUpdate_; + //! Potential speed up of the setup by skipping R construction, and using + //! transpose matrix-matrix product for RAP + bool implicitTranspose_; - //! Potential speed up of the setup by skipping rebalancing of P and R, and - //! doing extra import during solve - bool doPRrebalance_; - bool doPRViaCopyrebalance_; // fully explicit, needed for CombinePFactory + //! Potential speed up of the solve by fusing prolongation and update steps. + //! This can lead to more iterations to round-off error accumulation. + bool fuseProlongationAndUpdate_; - //! Hierarchy may be used in a standalone mode, or as a preconditioner - bool isPreconditioner_; + //! Potential speed up of the setup by skipping rebalancing of P and R, and + //! doing extra import during solve + bool doPRrebalance_; + bool doPRViaCopyrebalance_; // fully explicit, needed for CombinePFactory - //! V- or W-cycle - CycleType Cycle_; + //! Hierarchy may be used in a standalone mode, or as a preconditioner + bool isPreconditioner_; - //! Level at which to start W-cycle - int WCycleStartLevel_; + //! V- or W-cycle + CycleType Cycle_; - //! Scaling factor to be applied to coarse grid correction. - double scalingFactor_; + //! Level at which to start W-cycle + int WCycleStartLevel_; - //! Epetra/Tpetra mode - Xpetra::UnderlyingLib lib_; + //! Scaling factor to be applied to coarse grid correction. + double scalingFactor_; - //! cache description to avoid recreating in each call to description() - use ResetDescription() to force recreation in Setup, SetupRe, etc. - mutable std::string description_ = ""; // mutable so that we can lazily initialize in description(), which is declared const + //! Epetra/Tpetra mode + Xpetra::UnderlyingLib lib_; - /*! - @brief Graph dumping + //! cache description to avoid recreating in each call to description() - use ResetDescription() to force recreation in Setup, SetupRe, etc. + mutable std::string description_ = ""; // mutable so that we can lazily initialize in description(), which is declared const - If enabled, we dump the graph on a specified level into a specified file - */ - bool isDumpingEnabled_; - // -1 = dump all levels, -2 = dump nothing - int dumpLevel_; - std::string dumpFile_; + /*! + @brief Graph dumping - //! Convergece rate - MagnitudeType rate_; + If enabled, we dump the graph on a specified level into a specified file + */ + bool isDumpingEnabled_; + // -1 = dump all levels, -2 = dump nothing + int dumpLevel_; + std::string dumpFile_; - //! Level managers used during the Setup - Array > levelManagers_; + //! Convergece rate + MagnitudeType rate_; - //! Caching (Multi)Vectors used in Hierarchy::Iterate() - int sizeOfAllocatedLevelMultiVectors_; - Array > residual_, coarseRhs_, coarseX_, coarseImport_, coarseExport_, correction_; + //! Level managers used during the Setup + Array > levelManagers_; + //! Caching (Multi)Vectors used in Hierarchy::Iterate() + int sizeOfAllocatedLevelMultiVectors_; + Array > residual_, coarseRhs_, coarseX_, coarseImport_, coarseExport_, correction_; - }; //class Hierarchy +}; // class Hierarchy -} //namespace MueLu +} // namespace MueLu #define MUELU_HIERARCHY_SHORT -#endif // MUELU_HIERARCHY_DECL_HPP +#endif // MUELU_HIERARCHY_DECL_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_Hierarchy_def.hpp b/packages/muelu/src/MueCentral/MueLu_Hierarchy_def.hpp index 13a40368f1c4..fab4ce803855 100644 --- a/packages/muelu/src/MueCentral/MueLu_Hierarchy_def.hpp +++ b/packages/muelu/src/MueCentral/MueLu_Hierarchy_def.hpp @@ -72,895 +72,899 @@ #include "Teuchos_TimeMonitor.hpp" - - namespace MueLu { - template - Hierarchy::Hierarchy() - : maxCoarseSize_(GetDefaultMaxCoarseSize()), implicitTranspose_(GetDefaultImplicitTranspose()), - fuseProlongationAndUpdate_(GetDefaultFuseProlongationAndUpdate()), - doPRrebalance_(GetDefaultPRrebalance()), doPRViaCopyrebalance_(false), isPreconditioner_(true), Cycle_(GetDefaultCycle()), WCycleStartLevel_(0), - scalingFactor_(Teuchos::ScalarTraits::one()), lib_(Xpetra::UseTpetra), isDumpingEnabled_(false), dumpLevel_(-2), rate_(-1), - sizeOfAllocatedLevelMultiVectors_(0) - { - AddLevel(rcp(new Level)); - } - - template - Hierarchy::Hierarchy(const std::string& label) - : Hierarchy() - { - setObjectLabel(label); - Levels_[0]->setObjectLabel(label); - } - - template - Hierarchy::Hierarchy(const RCP& A) - : maxCoarseSize_(GetDefaultMaxCoarseSize()), implicitTranspose_(GetDefaultImplicitTranspose()), - fuseProlongationAndUpdate_(GetDefaultFuseProlongationAndUpdate()), - doPRrebalance_(GetDefaultPRrebalance()), doPRViaCopyrebalance_(false), isPreconditioner_(true), Cycle_(GetDefaultCycle()), WCycleStartLevel_(0), - scalingFactor_(Teuchos::ScalarTraits::one()), isDumpingEnabled_(false), dumpLevel_(-2), rate_(-1), - sizeOfAllocatedLevelMultiVectors_(0) - { - lib_ = A->getDomainMap()->lib(); - - RCP Finest = rcp(new Level); - AddLevel(Finest); +template +Hierarchy::Hierarchy() + : maxCoarseSize_(GetDefaultMaxCoarseSize()) + , implicitTranspose_(GetDefaultImplicitTranspose()) + , fuseProlongationAndUpdate_(GetDefaultFuseProlongationAndUpdate()) + , doPRrebalance_(GetDefaultPRrebalance()) + , doPRViaCopyrebalance_(false) + , isPreconditioner_(true) + , Cycle_(GetDefaultCycle()) + , WCycleStartLevel_(0) + , scalingFactor_(Teuchos::ScalarTraits::one()) + , lib_(Xpetra::UseTpetra) + , isDumpingEnabled_(false) + , dumpLevel_(-2) + , rate_(-1) + , sizeOfAllocatedLevelMultiVectors_(0) { + AddLevel(rcp(new Level)); +} - Finest->Set("A", A); - } +template +Hierarchy::Hierarchy(const std::string& label) + : Hierarchy() { + setObjectLabel(label); + Levels_[0]->setObjectLabel(label); +} - template - Hierarchy::Hierarchy(const RCP& A, const std::string& label) - : Hierarchy(A) - { - setObjectLabel(label); - Levels_[0]->setObjectLabel(label); - } +template +Hierarchy::Hierarchy(const RCP& A) + : maxCoarseSize_(GetDefaultMaxCoarseSize()) + , implicitTranspose_(GetDefaultImplicitTranspose()) + , fuseProlongationAndUpdate_(GetDefaultFuseProlongationAndUpdate()) + , doPRrebalance_(GetDefaultPRrebalance()) + , doPRViaCopyrebalance_(false) + , isPreconditioner_(true) + , Cycle_(GetDefaultCycle()) + , WCycleStartLevel_(0) + , scalingFactor_(Teuchos::ScalarTraits::one()) + , isDumpingEnabled_(false) + , dumpLevel_(-2) + , rate_(-1) + , sizeOfAllocatedLevelMultiVectors_(0) { + lib_ = A->getDomainMap()->lib(); + + RCP Finest = rcp(new Level); + AddLevel(Finest); + + Finest->Set("A", A); +} - template - void Hierarchy::AddLevel(const RCP& level) { - int levelID = LastLevelID() + 1; // ID of the inserted level +template +Hierarchy::Hierarchy(const RCP& A, const std::string& label) + : Hierarchy(A) { + setObjectLabel(label); + Levels_[0]->setObjectLabel(label); +} - if (level->GetLevelID() != -1 && (level->GetLevelID() != levelID)) - GetOStream(Warnings1) << "Hierarchy::AddLevel(): Level with ID=" << level->GetLevelID() << - " have been added at the end of the hierarchy\n but its ID have been redefined" << - " because last level ID of the hierarchy was " << LastLevelID() << "." << std::endl; +template +void Hierarchy::AddLevel(const RCP& level) { + int levelID = LastLevelID() + 1; // ID of the inserted level - Levels_.push_back(level); - level->SetLevelID(levelID); - level->setlib(lib_); + if (level->GetLevelID() != -1 && (level->GetLevelID() != levelID)) + GetOStream(Warnings1) << "Hierarchy::AddLevel(): Level with ID=" << level->GetLevelID() << " have been added at the end of the hierarchy\n but its ID have been redefined" + << " because last level ID of the hierarchy was " << LastLevelID() << "." << std::endl; - level->SetPreviousLevel( (levelID == 0) ? Teuchos::null : Levels_[LastLevelID() - 1] ); - level->setObjectLabel(this->getObjectLabel()); - } + Levels_.push_back(level); + level->SetLevelID(levelID); + level->setlib(lib_); - template - void Hierarchy::AddNewLevel() { - RCP newLevel = Levels_[LastLevelID()]->Build(); // new coarse level, using copy constructor - newLevel->setlib(lib_); - this->AddLevel(newLevel); // add to hierarchy - } + level->SetPreviousLevel((levelID == 0) ? Teuchos::null : Levels_[LastLevelID() - 1]); + level->setObjectLabel(this->getObjectLabel()); +} - template - RCP & Hierarchy::GetLevel(const int levelID) { - TEUCHOS_TEST_FOR_EXCEPTION(levelID < 0 || levelID > LastLevelID(), Exceptions::RuntimeError, - "MueLu::Hierarchy::GetLevel(): invalid input parameter value: LevelID = " << levelID); - return Levels_[levelID]; - } +template +void Hierarchy::AddNewLevel() { + RCP newLevel = Levels_[LastLevelID()]->Build(); // new coarse level, using copy constructor + newLevel->setlib(lib_); + this->AddLevel(newLevel); // add to hierarchy +} - template - int Hierarchy::GetNumLevels() const { - return Levels_.size(); - } +template +RCP& Hierarchy::GetLevel(const int levelID) { + TEUCHOS_TEST_FOR_EXCEPTION(levelID < 0 || levelID > LastLevelID(), Exceptions::RuntimeError, + "MueLu::Hierarchy::GetLevel(): invalid input parameter value: LevelID = " << levelID); + return Levels_[levelID]; +} - template - int Hierarchy::GetGlobalNumLevels() const { - RCP A = Levels_[0]->template Get >("A"); - RCP > comm = A->getDomainMap()->getComm(); +template +int Hierarchy::GetNumLevels() const { + return Levels_.size(); +} - int numLevels = GetNumLevels(); - int numGlobalLevels; - Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numLevels, Teuchos::ptr(&numGlobalLevels)); +template +int Hierarchy::GetGlobalNumLevels() const { + RCP A = Levels_[0]->template Get >("A"); + RCP > comm = A->getDomainMap()->getComm(); - return numGlobalLevels; - } + int numLevels = GetNumLevels(); + int numGlobalLevels; + Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numLevels, Teuchos::ptr(&numGlobalLevels)); - template - double Hierarchy::GetOperatorComplexity() const { - double totalNnz = 0, lev0Nnz = 1; - for (int i = 0; i < GetNumLevels(); ++i) { - TEUCHOS_TEST_FOR_EXCEPTION(!(Levels_[i]->IsAvailable("A")) , Exceptions::RuntimeError, - "Operator complexity cannot be calculated because A is unavailable on level " << i); - RCP A = Levels_[i]->template Get >("A"); - if (A.is_null()) - break; + return numGlobalLevels; +} - RCP Am = rcp_dynamic_cast(A); - if (Am.is_null()) { - GetOStream(Warnings0) << "Some level operators are not matrices, operator complexity calculation aborted" << std::endl; - return 0.0; - } +template +double Hierarchy::GetOperatorComplexity() const { + double totalNnz = 0, lev0Nnz = 1; + for (int i = 0; i < GetNumLevels(); ++i) { + TEUCHOS_TEST_FOR_EXCEPTION(!(Levels_[i]->IsAvailable("A")), Exceptions::RuntimeError, + "Operator complexity cannot be calculated because A is unavailable on level " << i); + RCP A = Levels_[i]->template Get >("A"); + if (A.is_null()) + break; - totalNnz += as(Am->getGlobalNumEntries()); - if (i == 0) - lev0Nnz = totalNnz; + RCP Am = rcp_dynamic_cast(A); + if (Am.is_null()) { + GetOStream(Warnings0) << "Some level operators are not matrices, operator complexity calculation aborted" << std::endl; + return 0.0; } - return totalNnz / lev0Nnz; + + totalNnz += as(Am->getGlobalNumEntries()); + if (i == 0) + lev0Nnz = totalNnz; } + return totalNnz / lev0Nnz; +} - template - double Hierarchy::GetSmootherComplexity() const { - double node_sc = 0, global_sc=0; - double a0_nnz =0; - const size_t INVALID = Teuchos::OrdinalTraits::invalid(); - // Get cost of fine matvec - if (GetNumLevels() <= 0) return -1.0; - if (!Levels_[0]->IsAvailable("A")) return -1.0; - - RCP A = Levels_[0]->template Get >("A"); - if (A.is_null()) return -1.0; - RCP Am = rcp_dynamic_cast(A); - if(Am.is_null()) return -1.0; - a0_nnz = as(Am->getGlobalNumEntries()); - - // Get smoother complexity at each level - for (int i = 0; i < GetNumLevels(); ++i) { - size_t level_sc=0; - if(!Levels_[i]->IsAvailable("PreSmoother")) continue; - RCP S = Levels_[i]->template Get >("PreSmoother"); - if (S.is_null()) continue; - level_sc = S->getNodeSmootherComplexity(); - if(level_sc == INVALID) {global_sc=-1.0;break;} - - node_sc += as(level_sc); +template +double Hierarchy::GetSmootherComplexity() const { + double node_sc = 0, global_sc = 0; + double a0_nnz = 0; + const size_t INVALID = Teuchos::OrdinalTraits::invalid(); + // Get cost of fine matvec + if (GetNumLevels() <= 0) return -1.0; + if (!Levels_[0]->IsAvailable("A")) return -1.0; + + RCP A = Levels_[0]->template Get >("A"); + if (A.is_null()) return -1.0; + RCP Am = rcp_dynamic_cast(A); + if (Am.is_null()) return -1.0; + a0_nnz = as(Am->getGlobalNumEntries()); + + // Get smoother complexity at each level + for (int i = 0; i < GetNumLevels(); ++i) { + size_t level_sc = 0; + if (!Levels_[i]->IsAvailable("PreSmoother")) continue; + RCP S = Levels_[i]->template Get >("PreSmoother"); + if (S.is_null()) continue; + level_sc = S->getNodeSmootherComplexity(); + if (level_sc == INVALID) { + global_sc = -1.0; + break; } - double min_sc=0.0; - RCP > comm =A->getDomainMap()->getComm(); - Teuchos::reduceAll(*comm,Teuchos::REDUCE_SUM,node_sc,Teuchos::ptr(&global_sc)); - Teuchos::reduceAll(*comm,Teuchos::REDUCE_MIN,node_sc,Teuchos::ptr(&min_sc)); - - if(min_sc < 0.0) return -1.0; - else return global_sc / a0_nnz; + node_sc += as(level_sc); } + double min_sc = 0.0; + RCP > comm = A->getDomainMap()->getComm(); + Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, node_sc, Teuchos::ptr(&global_sc)); + Teuchos::reduceAll(*comm, Teuchos::REDUCE_MIN, node_sc, Teuchos::ptr(&min_sc)); + if (min_sc < 0.0) + return -1.0; + else + return global_sc / a0_nnz; +} +// Coherence checks todo in Setup() (using an helper function): +template +void Hierarchy::CheckLevel(Level& level, int levelID) { + TEUCHOS_TEST_FOR_EXCEPTION(level.lib() != lib_, Exceptions::RuntimeError, + "MueLu::Hierarchy::CheckLevel(): wrong underlying linear algebra library."); + TEUCHOS_TEST_FOR_EXCEPTION(level.GetLevelID() != levelID, Exceptions::RuntimeError, + "MueLu::Hierarchy::CheckLevel(): wrong level ID"); + TEUCHOS_TEST_FOR_EXCEPTION(levelID != 0 && level.GetPreviousLevel() != Levels_[levelID - 1], Exceptions::RuntimeError, + "MueLu::Hierarchy::Setup(): wrong level parent"); +} - // Coherence checks todo in Setup() (using an helper function): - template - void Hierarchy::CheckLevel(Level& level, int levelID) { - TEUCHOS_TEST_FOR_EXCEPTION(level.lib() != lib_, Exceptions::RuntimeError, - "MueLu::Hierarchy::CheckLevel(): wrong underlying linear algebra library."); - TEUCHOS_TEST_FOR_EXCEPTION(level.GetLevelID() != levelID, Exceptions::RuntimeError, - "MueLu::Hierarchy::CheckLevel(): wrong level ID"); - TEUCHOS_TEST_FOR_EXCEPTION(levelID != 0 && level.GetPreviousLevel() != Levels_[levelID-1], Exceptions::RuntimeError, - "MueLu::Hierarchy::Setup(): wrong level parent"); - } - - template - void Hierarchy::SetMatvecParams(RCP matvecParams) { - for (int i = 0; i < GetNumLevels(); ++i) { - RCP level = Levels_[i]; - if (level->IsAvailable("A")) { - RCP Aop = level->Get >("A"); - RCP A = rcp_dynamic_cast(Aop); - if (!A.is_null()) { - RCP xpImporter = A->getCrsGraph()->getImporter(); - if (!xpImporter.is_null()) - xpImporter->setDistributorParameters(matvecParams); - RCP xpExporter = A->getCrsGraph()->getExporter(); - if (!xpExporter.is_null()) - xpExporter->setDistributorParameters(matvecParams); - } - } - if (level->IsAvailable("P")) { - RCP P = level->Get >("P"); - RCP xpImporter = P->getCrsGraph()->getImporter(); - if (!xpImporter.is_null()) - xpImporter->setDistributorParameters(matvecParams); - RCP xpExporter = P->getCrsGraph()->getExporter(); - if (!xpExporter.is_null()) - xpExporter->setDistributorParameters(matvecParams); - } - if (level->IsAvailable("R")) { - RCP R = level->Get >("R"); - RCP xpImporter = R->getCrsGraph()->getImporter(); +template +void Hierarchy::SetMatvecParams(RCP matvecParams) { + for (int i = 0; i < GetNumLevels(); ++i) { + RCP level = Levels_[i]; + if (level->IsAvailable("A")) { + RCP Aop = level->Get >("A"); + RCP A = rcp_dynamic_cast(Aop); + if (!A.is_null()) { + RCP xpImporter = A->getCrsGraph()->getImporter(); if (!xpImporter.is_null()) xpImporter->setDistributorParameters(matvecParams); - RCP xpExporter = R->getCrsGraph()->getExporter(); + RCP xpExporter = A->getCrsGraph()->getExporter(); if (!xpExporter.is_null()) xpExporter->setDistributorParameters(matvecParams); } - if (level->IsAvailable("Importer")) { - RCP xpImporter = level->Get< RCP >("Importer"); - if (!xpImporter.is_null()) - xpImporter->setDistributorParameters(matvecParams); - } + } + if (level->IsAvailable("P")) { + RCP P = level->Get >("P"); + RCP xpImporter = P->getCrsGraph()->getImporter(); + if (!xpImporter.is_null()) + xpImporter->setDistributorParameters(matvecParams); + RCP xpExporter = P->getCrsGraph()->getExporter(); + if (!xpExporter.is_null()) + xpExporter->setDistributorParameters(matvecParams); + } + if (level->IsAvailable("R")) { + RCP R = level->Get >("R"); + RCP xpImporter = R->getCrsGraph()->getImporter(); + if (!xpImporter.is_null()) + xpImporter->setDistributorParameters(matvecParams); + RCP xpExporter = R->getCrsGraph()->getExporter(); + if (!xpExporter.is_null()) + xpExporter->setDistributorParameters(matvecParams); + } + if (level->IsAvailable("Importer")) { + RCP xpImporter = level->Get >("Importer"); + if (!xpImporter.is_null()) + xpImporter->setDistributorParameters(matvecParams); } } +} - // The function uses three managers: fine, coarse and next coarse - // We construct the data for the coarse level, and do requests for the next coarse - template - bool Hierarchy::Setup(int coarseLevelID, - const RCP fineLevelManager, - const RCP coarseLevelManager, - const RCP nextLevelManager) { - // Use PrintMonitor/TimerMonitor instead of just a FactoryMonitor to print "Level 0" instead of Hierarchy(0) - // Print is done after the requests for next coarse level - - TEUCHOS_TEST_FOR_EXCEPTION(LastLevelID() < coarseLevelID, Exceptions::RuntimeError, - "MueLu::Hierarchy:Setup(): level " << coarseLevelID << " (specified by coarseLevelID argument) " - "must be built before calling this function."); - - Level& level = *Levels_[coarseLevelID]; - - std::string label = FormattingHelper::getColonLabel(level.getObjectLabel()); - TimeMonitor m1(*this, label + this->ShortClassName() + ": " + "Setup (total)"); - TimeMonitor m2(*this, label + this->ShortClassName() + ": " + "Setup" + " (total, level=" + Teuchos::toString(coarseLevelID) + ")"); - - // TODO: pass coarseLevelManager by reference - TEUCHOS_TEST_FOR_EXCEPTION(coarseLevelManager == Teuchos::null, Exceptions::RuntimeError, - "MueLu::Hierarchy::Setup(): argument coarseLevelManager cannot be null"); +// The function uses three managers: fine, coarse and next coarse +// We construct the data for the coarse level, and do requests for the next coarse +template +bool Hierarchy::Setup(int coarseLevelID, + const RCP fineLevelManager, + const RCP coarseLevelManager, + const RCP nextLevelManager) { + // Use PrintMonitor/TimerMonitor instead of just a FactoryMonitor to print "Level 0" instead of Hierarchy(0) + // Print is done after the requests for next coarse level - typedef MueLu::TopRAPFactory TopRAPFactory; - typedef MueLu::TopSmootherFactory TopSmootherFactory; + TEUCHOS_TEST_FOR_EXCEPTION(LastLevelID() < coarseLevelID, Exceptions::RuntimeError, + "MueLu::Hierarchy:Setup(): level " << coarseLevelID << " (specified by coarseLevelID argument) " + "must be built before calling this function."); - if (levelManagers_.size() < coarseLevelID+1) - levelManagers_.resize(coarseLevelID+1); - levelManagers_[coarseLevelID] = coarseLevelManager; + Level& level = *Levels_[coarseLevelID]; - bool isFinestLevel = (fineLevelManager.is_null()); - bool isLastLevel = (nextLevelManager.is_null()); + std::string label = FormattingHelper::getColonLabel(level.getObjectLabel()); + TimeMonitor m1(*this, label + this->ShortClassName() + ": " + "Setup (total)"); + TimeMonitor m2(*this, label + this->ShortClassName() + ": " + "Setup" + " (total, level=" + Teuchos::toString(coarseLevelID) + ")"); - int oldRank = -1; - if (isFinestLevel) { - RCP A = level.Get< RCP >("A"); - RCP domainMap = A->getDomainMap(); - RCP > comm = domainMap->getComm(); + // TODO: pass coarseLevelManager by reference + TEUCHOS_TEST_FOR_EXCEPTION(coarseLevelManager == Teuchos::null, Exceptions::RuntimeError, + "MueLu::Hierarchy::Setup(): argument coarseLevelManager cannot be null"); - // Initialize random seed for reproducibility - Utilities::SetRandomSeed(*comm); + typedef MueLu::TopRAPFactory TopRAPFactory; + typedef MueLu::TopSmootherFactory TopSmootherFactory; - // Record the communicator on the level (used for timers sync) - level.SetComm(comm); - oldRank = SetProcRankVerbose(comm->getRank()); + if (levelManagers_.size() < coarseLevelID + 1) + levelManagers_.resize(coarseLevelID + 1); + levelManagers_[coarseLevelID] = coarseLevelManager; - // Set the Hierarchy library to match that of the finest level matrix, - // even if it was already set - lib_ = domainMap->lib(); - level.setlib(lib_); + bool isFinestLevel = (fineLevelManager.is_null()); + bool isLastLevel = (nextLevelManager.is_null()); - } else { - // Permeate library to a coarser level - level.setlib(lib_); + int oldRank = -1; + if (isFinestLevel) { + RCP A = level.Get >("A"); + RCP domainMap = A->getDomainMap(); + RCP > comm = domainMap->getComm(); - Level& prevLevel = *Levels_[coarseLevelID-1]; - oldRank = SetProcRankVerbose(prevLevel.GetComm()->getRank()); - } + // Initialize random seed for reproducibility + Utilities::SetRandomSeed(*comm); - CheckLevel(level, coarseLevelID); + // Record the communicator on the level (used for timers sync) + level.SetComm(comm); + oldRank = SetProcRankVerbose(comm->getRank()); - // Attach FactoryManager to the fine level - RCP SFMFine; - if (!isFinestLevel) - SFMFine = rcp(new SetFactoryManager(Levels_[coarseLevelID-1], fineLevelManager)); + // Set the Hierarchy library to match that of the finest level matrix, + // even if it was already set + lib_ = domainMap->lib(); + level.setlib(lib_); - if (isFinestLevel && Levels_[coarseLevelID]->IsAvailable("Coordinates")) - ReplaceCoordinateMap(*Levels_[coarseLevelID]); + } else { + // Permeate library to a coarser level + level.setlib(lib_); - // Attach FactoryManager to the coarse level - SetFactoryManager SFMCoarse(Levels_[coarseLevelID], coarseLevelManager); + Level& prevLevel = *Levels_[coarseLevelID - 1]; + oldRank = SetProcRankVerbose(prevLevel.GetComm()->getRank()); + } - if (isDumpingEnabled_ && (dumpLevel_ == 0 || dumpLevel_ == -1) && coarseLevelID == 1) - DumpCurrentGraph(0); + CheckLevel(level, coarseLevelID); + + // Attach FactoryManager to the fine level + RCP SFMFine; + if (!isFinestLevel) + SFMFine = rcp(new SetFactoryManager(Levels_[coarseLevelID - 1], fineLevelManager)); + + if (isFinestLevel && Levels_[coarseLevelID]->IsAvailable("Coordinates")) + ReplaceCoordinateMap(*Levels_[coarseLevelID]); + + // Attach FactoryManager to the coarse level + SetFactoryManager SFMCoarse(Levels_[coarseLevelID], coarseLevelManager); + + if (isDumpingEnabled_ && (dumpLevel_ == 0 || dumpLevel_ == -1) && coarseLevelID == 1) + DumpCurrentGraph(0); + + RCP coarseFact; + RCP smootherFact = rcp(new TopSmootherFactory(coarseLevelManager, "Smoother")); + + int nextLevelID = coarseLevelID + 1; + + RCP SFMNext; + if (isLastLevel == false) { + // We are not at the coarsest level, so there is going to be another level ("next coarse") after this one ("coarse") + if (nextLevelID > LastLevelID()) + AddNewLevel(); + CheckLevel(*Levels_[nextLevelID], nextLevelID); + + // Attach FactoryManager to the next level (level after coarse) + SFMNext = rcp(new SetFactoryManager(Levels_[nextLevelID], nextLevelManager)); + Levels_[nextLevelID]->Request(TopRAPFactory(coarseLevelManager, nextLevelManager)); + + // Do smoother requests here. We don't know whether this is going to be + // the coarsest level or not, but we need to DeclareInput before we call + // coarseRAPFactory.Build(), otherwise some stuff may be erased after + // level releases + level.Request(*smootherFact); + + } else { + // Similar to smoother above, do the coarse solver request here. We don't + // know whether this is going to be the coarsest level or not, but we + // need to DeclareInput before we call coarseRAPFactory.Build(), + // otherwise some stuff may be erased after level releases. This is + // actually evident on ProjectorSmoother. It requires both "A" and + // "Nullspace". However, "Nullspace" is erased after all releases, so if + // we call the coarse factory request after RAP build we would not have + // any data, and cannot get it as we don't have previous managers. The + // typical trace looks like this: + // + // MueLu::Level(0)::GetFactory(Aggregates, 0): No FactoryManager + // during request for data " Aggregates" on level 0 by factory TentativePFactory + // during request for data " P" on level 1 by factory EminPFactory + // during request for data " P" on level 1 by factory TransPFactory + // during request for data " R" on level 1 by factory RAPFactory + // during request for data " A" on level 1 by factory TentativePFactory + // during request for data " Nullspace" on level 2 by factory NullspaceFactory + // during request for data " Nullspace" on level 2 by factory NullspacePresmoothFactory + // during request for data " Nullspace" on level 2 by factory ProjectorSmoother + // during request for data " PreSmoother" on level 2 by factory NoFactory + if (coarseFact.is_null()) + coarseFact = rcp(new TopSmootherFactory(coarseLevelManager, "CoarseSolver")); + level.Request(*coarseFact); + } - RCP coarseFact; - RCP smootherFact = rcp(new TopSmootherFactory(coarseLevelManager, "Smoother")); + GetOStream(Runtime0) << std::endl; + PrintMonitor m0(*this, "Level " + Teuchos::toString(coarseLevelID), static_cast(Runtime0 | Test)); - int nextLevelID = coarseLevelID + 1; + // Build coarse level hierarchy + RCP Ac = Teuchos::null; + TopRAPFactory coarseRAPFactory(fineLevelManager, coarseLevelManager); - RCP SFMNext; - if (isLastLevel == false) { - // We are not at the coarsest level, so there is going to be another level ("next coarse") after this one ("coarse") - if (nextLevelID > LastLevelID()) - AddNewLevel(); - CheckLevel(*Levels_[nextLevelID], nextLevelID); + if (level.IsAvailable("A")) { + Ac = level.Get >("A"); + } else if (!isFinestLevel) { + // We only build here, the release is done later + coarseRAPFactory.Build(*level.GetPreviousLevel(), level); + } - // Attach FactoryManager to the next level (level after coarse) - SFMNext = rcp(new SetFactoryManager(Levels_[nextLevelID], nextLevelManager)); - Levels_[nextLevelID]->Request(TopRAPFactory(coarseLevelManager, nextLevelManager)); + bool setLastLevelviaMaxCoarseSize = false; + if (level.IsAvailable("A")) + Ac = level.Get >("A"); + RCP Acm = rcp_dynamic_cast(Ac); + + // Record the communicator on the level + if (!Ac.is_null()) + level.SetComm(Ac->getDomainMap()->getComm()); + + // Test if we reach the end of the hierarchy + bool isOrigLastLevel = isLastLevel; + if (isLastLevel) { + // Last level as we have achieved the max limit + isLastLevel = true; + + } else if (Ac.is_null()) { + // Last level for this processor, as it does not belong to the next + // subcommunicator. Other processors may continue working on the + // hierarchy + isLastLevel = true; + + } else { + if (!Acm.is_null() && Acm->getGlobalNumRows() <= maxCoarseSize_) { + // Last level as the size of the coarse matrix became too small + GetOStream(Runtime0) << "Max coarse size (<= " << maxCoarseSize_ << ") achieved" << std::endl; + isLastLevel = true; + if (Acm->getGlobalNumRows() != 0) setLastLevelviaMaxCoarseSize = true; + } + } - // Do smoother requests here. We don't know whether this is going to be - // the coarsest level or not, but we need to DeclareInput before we call - // coarseRAPFactory.Build(), otherwise some stuff may be erased after - // level releases - level.Request(*smootherFact); + if (!Ac.is_null() && !isFinestLevel) { + RCP A = Levels_[coarseLevelID - 1]->template Get >("A"); + RCP Am = rcp_dynamic_cast(A); + + const double maxCoarse2FineRatio = 0.8; + if (!Acm.is_null() && !Am.is_null() && Acm->getGlobalNumRows() > maxCoarse2FineRatio * Am->getGlobalNumRows()) { + // We could abort here, but for now we simply notify user. + // Couple of additional points: + // - if repartitioning is delayed until level K, but the aggregation + // procedure stagnates between levels K-1 and K. In this case, + // repartitioning could enable faster coarsening once again, but the + // hierarchy construction will abort due to the stagnation check. + // - if the matrix is small enough, we could move it to one processor. + GetOStream(Warnings0) << "Aggregation stagnated. Please check your matrix and/or adjust your configuration file." + << "Possible fixes:\n" + << " - reduce the maximum number of levels\n" + << " - enable repartitioning\n" + << " - increase the minimum coarse size." << std::endl; + } + } - } else { - // Similar to smoother above, do the coarse solver request here. We don't - // know whether this is going to be the coarsest level or not, but we - // need to DeclareInput before we call coarseRAPFactory.Build(), - // otherwise some stuff may be erased after level releases. This is - // actually evident on ProjectorSmoother. It requires both "A" and - // "Nullspace". However, "Nullspace" is erased after all releases, so if - // we call the coarse factory request after RAP build we would not have - // any data, and cannot get it as we don't have previous managers. The - // typical trace looks like this: - // - // MueLu::Level(0)::GetFactory(Aggregates, 0): No FactoryManager - // during request for data " Aggregates" on level 0 by factory TentativePFactory - // during request for data " P" on level 1 by factory EminPFactory - // during request for data " P" on level 1 by factory TransPFactory - // during request for data " R" on level 1 by factory RAPFactory - // during request for data " A" on level 1 by factory TentativePFactory - // during request for data " Nullspace" on level 2 by factory NullspaceFactory - // during request for data " Nullspace" on level 2 by factory NullspacePresmoothFactory - // during request for data " Nullspace" on level 2 by factory ProjectorSmoother - // during request for data " PreSmoother" on level 2 by factory NoFactory + if (isLastLevel) { + if (!isOrigLastLevel) { + // We did not expect to finish this early so we did request a smoother. + // We need a coarse solver instead. Do the magic. + level.Release(*smootherFact); if (coarseFact.is_null()) coarseFact = rcp(new TopSmootherFactory(coarseLevelManager, "CoarseSolver")); level.Request(*coarseFact); } - GetOStream(Runtime0) << std::endl; - PrintMonitor m0(*this, "Level " + Teuchos::toString(coarseLevelID), static_cast(Runtime0 | Test)); - - // Build coarse level hierarchy - RCP Ac = Teuchos::null; - TopRAPFactory coarseRAPFactory(fineLevelManager, coarseLevelManager); - - if (level.IsAvailable("A")) { - Ac = level.Get >("A"); - } else if (!isFinestLevel) { - // We only build here, the release is done later - coarseRAPFactory.Build(*level.GetPreviousLevel(), level); - } + // Do the actual build, if we have any data. + // NOTE: this is not a great check, we may want to call Build() regardless. + if (!Ac.is_null()) + coarseFact->Build(level); - bool setLastLevelviaMaxCoarseSize = false; - if (level.IsAvailable("A")) - Ac = level.Get >("A"); - RCP Acm = rcp_dynamic_cast(Ac); + // Once the dirty deed is done, release stuff. The smoother has already + // been released. + level.Release(*coarseFact); - // Record the communicator on the level + } else { + // isLastLevel = false => isOrigLastLevel = false, meaning that we have + // requested the smoother. Now we need to build it and to release it. + // We don't need to worry about the coarse solver, as we didn't request it. if (!Ac.is_null()) - level.SetComm(Ac->getDomainMap()->getComm()); + smootherFact->Build(level); - // Test if we reach the end of the hierarchy - bool isOrigLastLevel = isLastLevel; - if (isLastLevel) { - // Last level as we have achieved the max limit - isLastLevel = true; - - } else if (Ac.is_null()) { - // Last level for this processor, as it does not belong to the next - // subcommunicator. Other processors may continue working on the - // hierarchy - isLastLevel = true; + level.Release(*smootherFact); + } - } else { - if (!Acm.is_null() && Acm->getGlobalNumRows() <= maxCoarseSize_) { - // Last level as the size of the coarse matrix became too small - GetOStream(Runtime0) << "Max coarse size (<= " << maxCoarseSize_ << ") achieved" << std::endl; - isLastLevel = true; - if (Acm->getGlobalNumRows() != 0) setLastLevelviaMaxCoarseSize = true; + if (isLastLevel == true) { + int actualNumLevels = nextLevelID; + if (isOrigLastLevel == false) { + // Earlier in the function, we constructed the next coarse level, and requested data for the that level, + // assuming that we are not at the coarsest level. Now, we changed our mind, so we have to release those. + Levels_[nextLevelID]->Release(TopRAPFactory(coarseLevelManager, nextLevelManager)); + + // We truncate/resize the hierarchy and possibly remove the last created level if there is + // something wrong with it as indicated by its P not being valid. This might happen + // if the global number of aggregates turns out to be zero + + if (!setLastLevelviaMaxCoarseSize) { + if (Levels_[nextLevelID - 1]->IsAvailable("P")) { + if (Levels_[nextLevelID - 1]->template Get >("P") == Teuchos::null) actualNumLevels = nextLevelID - 1; + } else + actualNumLevels = nextLevelID - 1; } } + if (actualNumLevels == nextLevelID - 1) { + // Didn't expect to finish early so we requested smoother but need coarse solver instead. + Levels_[nextLevelID - 2]->Release(*smootherFact); - if (!Ac.is_null() && !isFinestLevel) { - RCP A = Levels_[coarseLevelID-1]->template Get< RCP >("A"); - RCP Am = rcp_dynamic_cast(A); - - const double maxCoarse2FineRatio = 0.8; - if (!Acm.is_null() && !Am.is_null() && Acm->getGlobalNumRows() > maxCoarse2FineRatio * Am->getGlobalNumRows()) { - // We could abort here, but for now we simply notify user. - // Couple of additional points: - // - if repartitioning is delayed until level K, but the aggregation - // procedure stagnates between levels K-1 and K. In this case, - // repartitioning could enable faster coarsening once again, but the - // hierarchy construction will abort due to the stagnation check. - // - if the matrix is small enough, we could move it to one processor. - GetOStream(Warnings0) << "Aggregation stagnated. Please check your matrix and/or adjust your configuration file." - << "Possible fixes:\n" - << " - reduce the maximum number of levels\n" - << " - enable repartitioning\n" - << " - increase the minimum coarse size." << std::endl; - - } + if (Levels_[nextLevelID - 2]->IsAvailable("PreSmoother")) Levels_[nextLevelID - 2]->RemoveKeepFlag("PreSmoother", NoFactory::get()); + if (Levels_[nextLevelID - 2]->IsAvailable("PostSmoother")) Levels_[nextLevelID - 2]->RemoveKeepFlag("PostSmoother", NoFactory::get()); + if (coarseFact.is_null()) + coarseFact = rcp(new TopSmootherFactory(coarseLevelManager, "CoarseSolver")); + Levels_[nextLevelID - 2]->Request(*coarseFact); + if (!(Levels_[nextLevelID - 2]->template Get >("A").is_null())) + coarseFact->Build(*(Levels_[nextLevelID - 2])); + Levels_[nextLevelID - 2]->Release(*coarseFact); } + Levels_.resize(actualNumLevels); + } - if (isLastLevel) { - if (!isOrigLastLevel) { - // We did not expect to finish this early so we did request a smoother. - // We need a coarse solver instead. Do the magic. - level.Release(*smootherFact); - if (coarseFact.is_null()) - coarseFact = rcp(new TopSmootherFactory(coarseLevelManager, "CoarseSolver")); - level.Request(*coarseFact); - } + // I think this is the proper place for graph so that it shows every dependence + if (isDumpingEnabled_ && ((dumpLevel_ > 0 && coarseLevelID == dumpLevel_) || dumpLevel_ == -1)) + DumpCurrentGraph(coarseLevelID); - // Do the actual build, if we have any data. - // NOTE: this is not a great check, we may want to call Build() regardless. - if (!Ac.is_null()) - coarseFact->Build(level); + if (!isFinestLevel) { + // Release the hierarchy data + // We release so late to help blocked solvers, as the smoothers for them need A blocks + // which we construct in RAPFactory + level.Release(coarseRAPFactory); + } - // Once the dirty deed is done, release stuff. The smoother has already - // been released. - level.Release(*coarseFact); + if (oldRank != -1) + SetProcRankVerbose(oldRank); - } else { - // isLastLevel = false => isOrigLastLevel = false, meaning that we have - // requested the smoother. Now we need to build it and to release it. - // We don't need to worry about the coarse solver, as we didn't request it. - if (!Ac.is_null()) - smootherFact->Build(level); + return isLastLevel; +} - level.Release(*smootherFact); - } +template +void Hierarchy::SetupRe() { + int numLevels = Levels_.size(); + TEUCHOS_TEST_FOR_EXCEPTION(levelManagers_.size() != numLevels, Exceptions::RuntimeError, + "Hierarchy::SetupRe: " << Levels_.size() << " levels, but " << levelManagers_.size() << " level factory managers"); - if (isLastLevel == true) { - int actualNumLevels = nextLevelID; - if (isOrigLastLevel == false) { - // Earlier in the function, we constructed the next coarse level, and requested data for the that level, - // assuming that we are not at the coarsest level. Now, we changed our mind, so we have to release those. - Levels_[nextLevelID]->Release(TopRAPFactory(coarseLevelManager, nextLevelManager)); + const int startLevel = 0; + Clear(startLevel); - // We truncate/resize the hierarchy and possibly remove the last created level if there is - // something wrong with it as indicated by its P not being valid. This might happen - // if the global number of aggregates turns out to be zero +#ifdef HAVE_MUELU_DEBUG + // Reset factories' data used for debugging + for (int i = 0; i < numLevels; i++) + levelManagers_[i]->ResetDebugData(); +#endif - if (!setLastLevelviaMaxCoarseSize) { - if (Levels_[nextLevelID-1]->IsAvailable("P")) { - if (Levels_[nextLevelID-1]->template Get >("P") == Teuchos::null) actualNumLevels = nextLevelID-1; - } - else actualNumLevels = nextLevelID-1; - } - } - if (actualNumLevels == nextLevelID-1) { - // Didn't expect to finish early so we requested smoother but need coarse solver instead. - Levels_[nextLevelID-2]->Release(*smootherFact); - - if (Levels_[nextLevelID-2]->IsAvailable("PreSmoother") ) Levels_[nextLevelID-2]->RemoveKeepFlag("PreSmoother" ,NoFactory::get()); - if (Levels_[nextLevelID-2]->IsAvailable("PostSmoother")) Levels_[nextLevelID-2]->RemoveKeepFlag("PostSmoother",NoFactory::get()); - if (coarseFact.is_null()) - coarseFact = rcp(new TopSmootherFactory(coarseLevelManager, "CoarseSolver")); - Levels_[nextLevelID-2]->Request(*coarseFact); - if ( !(Levels_[nextLevelID-2]->template Get >("A").is_null() )) - coarseFact->Build( *(Levels_[nextLevelID-2])); - Levels_[nextLevelID-2]->Release(*coarseFact); - } - Levels_.resize(actualNumLevels); - } + int levelID; + for (levelID = startLevel; levelID < numLevels;) { + bool r = Setup(levelID, + (levelID != 0 ? levelManagers_[levelID - 1] : Teuchos::null), + levelManagers_[levelID], + (levelID + 1 != numLevels ? levelManagers_[levelID + 1] : Teuchos::null)); + levelID++; + if (r) break; + } + // We may construct fewer levels for some reason, make sure we continue + // doing that in the future + Levels_.resize(levelID); + levelManagers_.resize(levelID); - // I think this is the proper place for graph so that it shows every dependence - if (isDumpingEnabled_ && ( (dumpLevel_ > 0 && coarseLevelID == dumpLevel_) || dumpLevel_ == -1 ) ) - DumpCurrentGraph(coarseLevelID); + int sizeOfVecs = sizeOfAllocatedLevelMultiVectors_; - if (!isFinestLevel) { - // Release the hierarchy data - // We release so late to help blocked solvers, as the smoothers for them need A blocks - // which we construct in RAPFactory - level.Release(coarseRAPFactory); - } + AllocateLevelMultiVectors(sizeOfVecs, true); - if (oldRank != -1) - SetProcRankVerbose(oldRank); + // since the # of levels, etc. may have changed, force re-determination of description during next call to description() + ResetDescription(); - return isLastLevel; - } + describe(GetOStream(Statistics0), GetVerbLevel()); +} - template - void Hierarchy::SetupRe() { - int numLevels = Levels_.size(); - TEUCHOS_TEST_FOR_EXCEPTION(levelManagers_.size() != numLevels, Exceptions::RuntimeError, - "Hierarchy::SetupRe: " << Levels_.size() << " levels, but " << levelManagers_.size() << " level factory managers"); +template +void Hierarchy::Setup(const FactoryManagerBase& manager, int startLevel, int numDesiredLevels) { + // Use MueLu::BaseClass::description() to avoid printing "{numLevels = 1}" (numLevels is increasing...) + PrintMonitor m0(*this, "Setup (" + this->MueLu::BaseClass::description() + ")", Runtime0); - const int startLevel = 0; - Clear(startLevel); + Clear(startLevel); -#ifdef HAVE_MUELU_DEBUG - // Reset factories' data used for debugging - for (int i = 0; i < numLevels; i++) - levelManagers_[i]->ResetDebugData(); + // Check Levels_[startLevel] exists. + TEUCHOS_TEST_FOR_EXCEPTION(Levels_.size() <= startLevel, Exceptions::RuntimeError, + "MueLu::Hierarchy::Setup(): fine level (" << startLevel << ") does not exist"); -#endif + TEUCHOS_TEST_FOR_EXCEPTION(numDesiredLevels <= 0, Exceptions::RuntimeError, + "Constructing non-positive (" << numDesiredLevels << ") number of levels does not make sense."); - int levelID; - for (levelID = startLevel; levelID < numLevels;) { - bool r = Setup(levelID, - (levelID != 0 ? levelManagers_[levelID-1] : Teuchos::null), - levelManagers_[levelID], - (levelID+1 != numLevels ? levelManagers_[levelID+1] : Teuchos::null)); - levelID++; - if (r) break; - } - // We may construct fewer levels for some reason, make sure we continue - // doing that in the future - Levels_ .resize(levelID); - levelManagers_.resize(levelID); + // Check for fine level matrix A + TEUCHOS_TEST_FOR_EXCEPTION(!Levels_[startLevel]->IsAvailable("A"), Exceptions::RuntimeError, + "MueLu::Hierarchy::Setup(): fine level (" << startLevel << ") has no matrix A! " + "Set fine level matrix A using Level.Set()"); - int sizeOfVecs = sizeOfAllocatedLevelMultiVectors_; + RCP A = Levels_[startLevel]->template Get >("A"); + lib_ = A->getDomainMap()->lib(); - AllocateLevelMultiVectors(sizeOfVecs, true); + if (IsPrint(Statistics2)) { + RCP Amat = rcp_dynamic_cast(A); - // since the # of levels, etc. may have changed, force re-determination of description during next call to description() - ResetDescription(); + if (!Amat.is_null()) { + RCP params = rcp(new ParameterList()); + params->set("printLoadBalancingInfo", true); + params->set("printCommInfo", true); - describe(GetOStream(Statistics0), GetVerbLevel()); + GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Amat, "A0", params); + } else { + GetOStream(Warnings1) << "Fine level operator is not a matrix, statistics are not available" << std::endl; + } } - template - void Hierarchy::Setup(const FactoryManagerBase& manager, int startLevel, int numDesiredLevels) { - // Use MueLu::BaseClass::description() to avoid printing "{numLevels = 1}" (numLevels is increasing...) - PrintMonitor m0(*this, "Setup (" + this->MueLu::BaseClass::description() + ")", Runtime0); + RCP rcpmanager = rcpFromRef(manager); + + const int lastLevel = startLevel + numDesiredLevels - 1; + GetOStream(Runtime0) << "Setup loop: startLevel = " << startLevel << ", lastLevel = " << lastLevel + << " (stop if numLevels = " << numDesiredLevels << " or Ac.size() < " << maxCoarseSize_ << ")" << std::endl; + + // Setup multigrid levels + int iLevel = 0; + if (numDesiredLevels == 1) { + iLevel = 0; + Setup(startLevel, Teuchos::null, rcpmanager, Teuchos::null); // setup finest==coarsest level (first and last managers are Teuchos::null) + + } else { + bool bIsLastLevel = Setup(startLevel, Teuchos::null, rcpmanager, rcpmanager); // setup finest level (level 0) (first manager is Teuchos::null) + if (bIsLastLevel == false) { + for (iLevel = startLevel + 1; iLevel < lastLevel; iLevel++) { + bIsLastLevel = Setup(iLevel, rcpmanager, rcpmanager, rcpmanager); // setup intermediate levels + if (bIsLastLevel == true) + break; + } + if (bIsLastLevel == false) + Setup(lastLevel, rcpmanager, rcpmanager, Teuchos::null); // setup coarsest level (last manager is Teuchos::null) + } + } - Clear(startLevel); + // TODO: some check like this should be done at the beginning of the routine + TEUCHOS_TEST_FOR_EXCEPTION(iLevel != Levels_.size() - 1, Exceptions::RuntimeError, + "MueLu::Hierarchy::Setup(): number of level"); - // Check Levels_[startLevel] exists. - TEUCHOS_TEST_FOR_EXCEPTION(Levels_.size() <= startLevel, Exceptions::RuntimeError, - "MueLu::Hierarchy::Setup(): fine level (" << startLevel << ") does not exist"); + // TODO: this is not exception safe: manager will still hold default + // factories if you exit this function with an exception + manager.Clean(); - TEUCHOS_TEST_FOR_EXCEPTION(numDesiredLevels <= 0, Exceptions::RuntimeError, - "Constructing non-positive (" << numDesiredLevels << ") number of levels does not make sense."); + describe(GetOStream(Statistics0), GetVerbLevel()); +} - // Check for fine level matrix A - TEUCHOS_TEST_FOR_EXCEPTION(!Levels_[startLevel]->IsAvailable("A"), Exceptions::RuntimeError, - "MueLu::Hierarchy::Setup(): fine level (" << startLevel << ") has no matrix A! " - "Set fine level matrix A using Level.Set()"); +template +void Hierarchy::Clear(int startLevel) { + if (startLevel < GetNumLevels()) + GetOStream(Runtime0) << "Clearing old data (if any)" << std::endl; - RCP A = Levels_[startLevel]->template Get >("A"); - lib_ = A->getDomainMap()->lib(); + for (int iLevel = startLevel; iLevel < GetNumLevels(); iLevel++) + Levels_[iLevel]->Clear(); +} - if (IsPrint(Statistics2)) { - RCP Amat = rcp_dynamic_cast(A); +template +void Hierarchy::ExpertClear() { + GetOStream(Runtime0) << "Clearing old data (expert)" << std::endl; + for (int iLevel = 0; iLevel < GetNumLevels(); iLevel++) + Levels_[iLevel]->ExpertClear(); +} - if (!Amat.is_null()) { - RCP params = rcp(new ParameterList()); - params->set("printLoadBalancingInfo", true); - params->set("printCommInfo", true); +#if defined(HAVE_MUELU_EXPERIMENTAL) && defined(HAVE_MUELU_ADDITIVE_VARIANT) +template +ConvergenceStatus Hierarchy::Iterate(const MultiVector& B, MultiVector& X, ConvData conv, + bool InitialGuessIsZero, LO startLevel) { + LO nIts = conv.maxIts_; + MagnitudeType tol = conv.tol_; - GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Amat, "A0", params); - } else { - GetOStream(Warnings1) << "Fine level operator is not a matrix, statistics are not available" << std::endl; - } - } + std::string prefix = this->ShortClassName() + ": "; + std::string levelSuffix = " (level=" + toString(startLevel) + ")"; + std::string levelSuffix1 = " (level=" + toString(startLevel + 1) + ")"; - RCP rcpmanager = rcpFromRef(manager); + using namespace Teuchos; + RCP