Merge branch 'develop' into rm_DMR

maki49 · May 4, 2024 · f52a25a · f52a25a
2 parents 2f247a5 + 1439426
commit f52a25a
Show file tree

Hide file tree

Showing 56 changed files with 1,265 additions and 329 deletions.
diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
@@ -4,88 +4,21 @@ on:
   workflow_dispatch:
 
 jobs:
-  start-runner:
-    name: Start self-hosted EC2 runner
-    runs-on: ubuntu-latest
-    outputs:
-      label: ${{ steps.start-ec2-runner.outputs.label }}
-      ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
-    steps:
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          aws-region: us-east-2
-      - name: Start EC2 runner
-        id: start-ec2-runner
-        uses: machulav/ec2-github-runner@v2
-        with:
-          mode: start
-          github-token: ${{ secrets.PAT }}
-          ec2-image-id: ami-04cd9fec4a7a39019
-          ec2-instance-type: g4dn.xlarge
-          subnet-id: subnet-72d3e53e
-          security-group-id: sg-06b0c93122c08aeab
-
   test:
-    name: Do the job on the runner
-    needs: start-runner # required to start the main job when the runner is ready
-    runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
+    name: Test on CUDA Build
+    runs-on: nvidia
     container:
       image: ghcr.io/deepmodeling/abacus-cuda
       options: --gpus all
     steps:
       - name: Checkout
         uses: actions/checkout@v4
-      - name: Build cuSolver
+        with:
+          submodules: recursive
+      - name: Build
         run: |
           nvidia-smi
-          cmake -B build -DUSE_CUSOLVER_LCAO=ON
+          cmake -B build -DUSE_CUDA=ON -DBUILD_TESTING=ON
           cmake --build build -j4
           cmake --install build
-          cmake -B build -DBUILD_TESTING=ON
-          cmake --build build -j4 --target hsolver_diago
-      - name: Test e2e
-        run: |
-          export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
-          cd tests/integrate
-          echo "ks_solver cusolver" >> ./270_NO_MD_2O/INPUT
-          ./Autotest.sh -r 270_NO_MD_2O
-      - name: Test UT
-        run: |
-          cd source/src_pdiag/test/
-          cp ../../../build/source/src_pdiag/test/hsolver_diago .
-          ./hsolver_diago
-          bash diago_parallel_test.sh
-      - name: Test performance
-        run: |
-          cd examples/performance
-          ls -d P1*lcao* > allcase
-          sed -i '/ks_solver/d' P1*lcao*/INPUT
-          sed -i '$a ks_solver cusolver' P1*lcao*/INPUT
-          bash run.sh
-          cat sumall.dat
-
-
-  stop-runner:
-    name: Stop self-hosted EC2 runner
-    needs:
-      - start-runner # required to get output from the start-runner job
-      - test # required to wait when the main job is done
-    runs-on: ubuntu-latest
-    if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
-    steps:
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          aws-region: us-east-2
-      - name: Stop EC2 runner
-        uses: machulav/ec2-github-runner@v2
-        with:
-          mode: stop
-          github-token: ${{ secrets.PAT }}
-          label: ${{ needs.start-runner.outputs.label }}
-          ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}
+      # TODO: add tests
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -37,6 +37,7 @@ option(COMMIT_INFO "Print commit information in log" ON)
 option(ENABLE_FFT_TWO_CENTER "Enable FFT-based two-center integral method." ON)
 option(ENABLE_GOOGLEBENCH "Enable GOOGLE-benchmark usage." OFF)
 option(ENABLE_RAPIDJSON "Enable rapid-json usage." OFF)
+option(ENABLE_CNPY "Enable cnpy usage." OFF)
 
 # enable json support
 if(ENABLE_RAPIDJSON)
@@ -467,6 +468,27 @@ if(ENABLE_DEEPKS)
   add_compile_definitions(__DEEPKS)
 endif()
 
+if (ENABLE_CNPY)
+  find_path(cnpy_SOURCE_DIR
+    cnpy.h
+    HINTS ${libnpy_INCLUDE_DIR}
+  )
+  if(NOT cnpy_SOURCE_DIR)
+    include(FetchContent)
+    FetchContent_Declare(
+      cnpy
+      GIT_REPOSITORY https://github.com/rogersce/cnpy.git
+      GIT_PROGRESS TRUE
+    )
+    FetchContent_MakeAvailable(cnpy)
+  else()
+    include_directories(${cnpy_INCLUDE_DIR})
+  endif()
+  include_directories(${cnpy_SOURCE_DIR})
+  target_link_libraries(${ABACUS_BIN_NAME} cnpy)
+  add_compile_definitions(__USECNPY)
+endif()
+
 function(git_submodule_update)
   if(GIT_SUBMOD_RESULT EQUAL "0")
     message(DEBUG "Submodule init'ed")

diff --git a/Dockerfile.intel b/Dockerfile.intel
@@ -16,18 +16,16 @@ RUN apt-get update && \
         intel-oneapi-compiler-dpcpp-cpp \
         intel-oneapi-compiler-fortran \
         intel-oneapi-mkl-devel \
-        intel-oneapi-mpi-devel \
+        intel-oneapi-mpi-devel="2021.11.*" \
         intel-oneapi-vtune
-
-
-ENV I_MPI_ROOT='/opt/intel/oneapi/mpi/latest' \
+ENV I_MPI_ROOT=/opt/intel/oneapi/mpi/latest \
     LIBRARY_PATH=/opt/intel/oneapi/tbb/latest/env/../lib/intel64/gcc4.8:/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/mkl/latest/lib/:/opt/intel/oneapi/ippcp/latest/lib/:/opt/intel/oneapi/ipp/latest/lib:/opt/intel/oneapi/dpl/latest/lib:/opt/intel/oneapi/dnnl/latest/lib:/opt/intel/oneapi/dal/latest/lib:/opt/intel/oneapi/compiler/latest/lib:/opt/intel/oneapi/ccl/latest/lib/ \
     LD_LIBRARY_PATH=/opt/intel/oneapi/tbb/latest/env/../lib/intel64/gcc4.8:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/mkl/latest/lib:/opt/intel/oneapi/itac/latest/slib:/opt/intel/oneapi/ippcp/latest/lib/:/opt/intel/oneapi/ipp/latest/lib:/opt/intel/oneapi/dpl/latest/lib:/opt/intel/oneapi/dnnl/latest/lib:/opt/intel/oneapi/debugger/latest/opt/debugger/lib:/opt/intel/oneapi/dal/latest/lib:/opt/intel/oneapi/compiler/latest/opt/oclfpga/host/linux64/lib:/opt/intel/oneapi/compiler/latest/opt/compiler/lib:/opt/intel/oneapi/compiler/latest/lib:/opt/intel/oneapi/ccl/latest/lib/ \
     PATH=/opt/intel/oneapi/vtune/latest/bin64:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mpi/latest/bin:/opt/intel/oneapi/mkl/latest/bin/:/opt/intel/oneapi/itac/latest/bin:/opt/intel/oneapi/inspector/latest/bin64:/opt/intel/oneapi/dpcpp-ct/latest/bin:/opt/intel/oneapi/dev-utilities/latest/bin:/opt/intel/oneapi/debugger/latest/opt/debugger/bin:/opt/intel/oneapi/compiler/latest/opt/oclfpga/bin:/opt/intel/oneapi/compiler/latest/bin:/opt/intel/oneapi/advisor/latest/bin64:/opt/mamba/bin:/opt/mamba/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
-    MKLROOT='/opt/intel/oneapi/mkl/latest' \
-    FI_PROVIDER_PATH='/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib/prov:/usr/lib/x86_64-linux-gnu/libfabric' \
-    CMAKE_PREFIX_PATH='/opt/intel/oneapi/tbb/latest/env/..:/opt/intel/oneapi/mkl/latest/lib/cmake:/opt/intel/oneapi/ipp/latest/lib/cmake/ipp:/opt/intel/oneapi/dpl/latest/lib/cmake/oneDPL:/opt/intel/oneapi/dnnl/latest/lib/cmake:/opt/intel/oneapi/dal/latest:/opt/intel/oneapi/compiler/latest' \
-    CMPLR_ROOT='/opt/intel/oneapi/compiler/latest'
+    MKLROOT=/opt/intel/oneapi/mkl/latest \
+    FI_PROVIDER_PATH=/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib/prov:/usr/lib/x86_64-linux-gnu/libfabric \
+    CMAKE_PREFIX_PATH=/opt/intel/oneapi/tbb/latest/env/..:/opt/intel/oneapi/mkl/latest/lib/cmake:/opt/intel/oneapi/ipp/latest/lib/cmake/ipp:/opt/intel/oneapi/dpl/latest/lib/cmake/oneDPL:/opt/intel/oneapi/dnnl/latest/lib/cmake:/opt/intel/oneapi/dal/latest:/opt/intel/oneapi/compiler/latest \
+    CMPLR_ROOT=/opt/intel/oneapi/compiler/latest
 
 SHELL ["/bin/bash", "-c"]
 ENV CC=mpiicx CXX=mpiicpx FC=mpiifx

diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md
@@ -438,7 +438,7 @@ These variables are used to control general system parameters.
 
 - **Type**: Integer
 - **Description**: takes value 1, 0 or -1.
-  - -1: No symmetry will be considered.
+  - -1: No symmetry will be considered. It is recommended to set -1 for non-colinear + soc calculations, where time reversal symmetry is broken sometimes.
   - 0: Only time reversal symmetry would be considered in symmetry operations, which implied k point and -k point would be treated as a single k point with twice the weight.
   - 1: Symmetry analysis will be performed to determine the type of Bravais lattice and associated symmetry operations. (point groups, space groups, primitive cells, and irreducible k-points)
 - **Default**: 
@@ -1607,6 +1607,20 @@ These variables are used to control the output of properties.
 - **Description**: Whether to print the upper triangular part of the exchange-correlation matrices in **Kohn-Sham orbital representation** (unit: Ry): $\braket{\psi_i|V_\text{xc}^\text{(semi-)local}+V_\text{exx}+V_\text{DFTU}|\psi_j}$ for each k point into files in the directory `OUT.${suffix}`, which is useful for the subsequent GW calculation. (Note that currently DeePKS term is not included. ) The files are named `k-$k-Vxc`, the meaning of `$k`corresponding to k point and spin  is same as [hs_matrix.md](../elec_properties/hs_matrix.md#out_mat_hs).
 - **Default**: False
 
+### out_hr_npz/out_dm_npz
+
+- **Type**: Boolean
+- **Availability**: Numerical atomic orbital basis
+- **Description**: Whether to print Hamiltonian matrices H(R)/density matrics DM(R) in npz format. This feature does not work for gamma-only calculations. Currently only intended for internal usage.
+- **Default**: False
+
+### dm_to_rho
+
+- **Type**: Boolean
+- **Availability**: Numerical atomic orbital basis
+- **Description**: Reads density matrix DM(R) in npz format and creates electron density on grids. This feature does not work for gamma-only calculations. Only supports serial calculations. Currently only intended for internal usage.
+- **Default**: False
+
 ### out_app_flag
 
 - **Type**: Boolean

diff --git a/docs/advanced/scf/converge.md b/docs/advanced/scf/converge.md
@@ -10,6 +10,8 @@ For each of the mixing types, we also provide variables for controlling relevant
 
 `mixing_ndim` is the mixing dimensions in DIIS (broyden or pulay) mixing. Gerenally, a larger `mixing_ndim` leads to a better convergence. the default choice `mixing_ndim=8` should work fine in most cases. For `mixing_type`, the default choice is `broyden`, which is slightly better than `Pulay` typically. Besides that, a large `mixing_beta` means a larger change in electron density for each SCF step. For well-behaved systems, a larger `mixing_beta` leads to faster convergence. However, for some difficult cases, a smaller `mixing_beta` is preferred to avoid numerical instabilities.
 
+For most isolated systems, Kerker preconditioning is unnecessary. You can turn off it by setting `mixing_gg0  0.0` to get a faster convergence.
+
 For non-spin-polarized calculations, the default choices usually achieve convergence. If convergence issue arises in metallic systems, you can try different value of Kerker preconditioning [mixing_gg0](../input_files/input-main.md#mixing_gg0) and [mixing_gg0_min](../input_files/input-main.md#mixing_gg0_min), and try to reduce `mixing_beta`, which is 0.8 defaultly for `nspin=1`.
 
 For magnetic calculations, `mixing_beta_mag` and `mixing_gg0_mag` are activated. Considering collinear calculations, you can rely on the default value for most cases. If convergence issue arises, you can try to reduce `mixing_beta` and `mixing_beta_mag` together. For non-collinear calculations, tradtional broyden usually works, especially for a given magnetic configuration. If one is not interested in the energies of a given magnetic configuration but wants to determine the ground state by relaxing the magnetic moments’ directions, the standard Broyden mixing algorithm sometimes fails to find the correct magnetic configuration. If so, we can set [mixing_angle=1.0](../input_files/input-main.md#mixing_angle), which is a promising mixing method proposed by J. Phys. Soc. Jpn. 82 (2013) 114706.

diff --git a/docs/community/faq.md b/docs/community/faq.md
@@ -80,6 +80,8 @@ If the program prompt something like "KILLED BY SIGNAL: 9 (Killed)", it may be c
 
 If the error message is "Segmentation fault", or there is no enough information on the error, please feel free to submit an issue.
 
+**4. Error "Read -1" when using mpirun in docker environment**
+This is a [known issue](https://github.com/open-mpi/ompi/issues/4948) of OpenMPI running in a docker container. In this case, please set environment variable `OMPI_MCA_btl_vader_single_copy_mechanism=none`.
 ## Miscellaneous
 
 **1. How to visualize charge density file?**

diff --git a/source/Makefile.Objects b/source/Makefile.Objects
@@ -222,6 +222,7 @@ OBJS_ESOLVER_LCAO=esolver_ks_lcao.o\
       esolver_ks_lcao_elec.o\
       esolver_ks_lcao_tddft.o\
       esolver_ks_lcao_tmpfunc.o\
+      io_npz.o\
 
 OBJS_GINT=gint.o\
       gint_gamma.o\

diff --git a/source/module_base/global_variable.cpp b/source/module_base/global_variable.cpp
@@ -283,6 +283,9 @@ bool out_bandgap = false; // QO added for bandgap printing
 int out_interval = 1;    // convert from out_hsR_interval liuyu 2023-04-18
 
 bool out_mat_xc = false; // output Vxc in KS-wfc representation for GW calculation
+bool out_hr_npz = false;
+bool out_dm_npz = false;
+bool dm_to_rho = false; // reads dm in npz format, then prints density in cube format
 
 //==========================================================
 // Deltaspin related

diff --git a/source/module_base/global_variable.h b/source/module_base/global_variable.h
@@ -313,6 +313,9 @@ extern bool out_bandgap;
 extern int out_interval;
 
 extern bool out_mat_xc; // output Vxc in KS-wfc representation for GW calculation
+extern bool out_hr_npz; //writes h(r) in npz format
+extern bool out_dm_npz; //writes dm(r) in npz format
+extern bool dm_to_rho; //reads in dm(r) and creates density
 
 // Deltaspin related
 extern bool sc_mag_switch; // 0: no deltaspin; 1: constrain atomic magnetic moments;

diff --git a/source/module_cell/read_atoms.cpp b/source/module_cell/read_atoms.cpp
@@ -1003,6 +1003,7 @@ void UnitCell::print_stru_file(const std::string &fn, const int &type, const int
 
 	if(type==1)
 	{
+		int nat_tmp = 0;
 		ofs << "Cartesian" << std::endl;
 		for(int it=0; it<ntype; it++)
 		{
@@ -1028,28 +1029,31 @@ void UnitCell::print_stru_file(const std::string &fn, const int &type, const int
                     ofs << context.str();
                 }
 
-                if (GlobalV::NSPIN == 2)
+                if (GlobalV::NSPIN == 2 && GlobalV::out_mul)
                 {
                     // output magnetic information
                     ofs << " mag ";
                     context.set_context("double_w6_f2");
-                    context << atoms[it].mag[ia];
+                    context << atom_mulliken[nat_tmp][1];
                     ofs << context.str();
                 }
-                else if (GlobalV::NSPIN == 4)
+                else if (GlobalV::NSPIN == 4 && GlobalV::out_mul)
                 {
                     // output magnetic information
                     ofs << " mag ";
-                    context.set_context("vector3d");
-                    context << atoms[it].m_loc_[ia].x << " " << atoms[it].m_loc_[ia].y << " " << atoms[it].m_loc_[ia].z;
-                    ofs << context.str();
+					ofs << std::fixed << std::setprecision(5);
+                    ofs << std::setw(8) << atom_mulliken[nat_tmp][1] 
+						<< std::setw(8) << atom_mulliken[nat_tmp][2] 
+						<< std::setw(8) << atom_mulliken[nat_tmp][3];
                 }
                 ofs << std::endl;
+				nat_tmp++;
 			}
 		}
 	}
 	else if(type==2)
 	{
+		int nat_tmp = 0;
 		ofs << "Direct" << std::endl;
 		for(int it=0; it<ntype; it++)
 		{
@@ -1075,23 +1079,25 @@ void UnitCell::print_stru_file(const std::string &fn, const int &type, const int
                     ofs << context.str();
                 }
 
-                if (GlobalV::NSPIN == 2)
+                if (GlobalV::NSPIN == 2 && GlobalV::out_mul)
                 {
                     // output magnetic information
                     ofs << " mag ";
                     context.set_context("double_w6_f2");
-                    context << atoms[it].mag[ia];
+                    context << atom_mulliken[nat_tmp][1];
                     ofs << context.str();
                 }
-                else if (GlobalV::NSPIN == 4)
+                else if (GlobalV::NSPIN == 4 && GlobalV::out_mul)
                 {
                     // output magnetic information
                     ofs << " mag ";
-                    context.set_context("vector3d");
-                    context << atoms[it].m_loc_[ia].x << " " << atoms[it].m_loc_[ia].y << " " << atoms[it].m_loc_[ia].z;
-                    ofs << context.str();
+					ofs << std::fixed << std::setprecision(5);
+                    ofs << std::setw(8) << atom_mulliken[nat_tmp][1] 
+						<< std::setw(8) << atom_mulliken[nat_tmp][2] 
+						<< std::setw(8) << atom_mulliken[nat_tmp][3];
                 }
                 ofs << std::endl;
+				nat_tmp++;
 			}
 		}
 	}

diff --git a/source/module_cell/unitcell.h b/source/module_cell/unitcell.h
@@ -23,7 +23,7 @@ class UnitCell
     Magnetism magnet;  // magnetism Yu Liu 2021-07-03
     void cal_ux();
     bool judge_parallel(double a[3],ModuleBase::Vector3<double> b);
-	double *atom_mag;
+    std::vector<std::vector<double>> atom_mulliken;  //[nat][nspin]
 	int n_mag_at;
 
     std::string& Coordinate = lat.Coordinate;

diff --git a/source/module_elecstate/elecstate.cpp b/source/module_elecstate/elecstate.cpp
@@ -218,6 +218,7 @@ void ElecState::init_scf(const int istep, const ModuleBase::ComplexMatrix& struc
     if (istep == 0)
     {
         this->charge->init_rho(this->eferm, strucfac, this->bigpw->nbz, this->bigpw->bz);
+        this->charge->check_rho(); // check the rho
     }
 
     // renormalize the charge density

diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp
@@ -93,6 +93,12 @@ void ElecStateLCAO<std::complex<double>>::psiToRho(const psi::Psi<std::complex<d
     ModuleBase::timer::tick("ElecStateLCAO", "psiToRho");
 
     this->calculate_weights();
+
+// the calculations of dm, and dm -> rho are, technically, two separate functionalities, as we cannot
+// rule out the possibility that we may have a dm from other sources, such as read from file.
+// However, since we are not separating them now, I opt to add a flag to control how dm is obtained as of now
+if(!GlobalV::dm_to_rho)
+{
     this->calEBand();
 
     ModuleBase::GlobalFunc::NOTE("Calculate the density matrix.");
@@ -130,6 +136,7 @@ void ElecStateLCAO<std::complex<double>>::psiToRho(const psi::Psi<std::complex<d
             this->print_psi(psi);
         }
     }
+}
     // old 2D-to-Grid conversion has been replaced by new Gint Refactor 2023/09/25
     //this->loc->cal_dk_k(*this->lowf->gridt, this->wg, (*this->klist));
     for (int is = 0; is < GlobalV::NSPIN; is++)