Bugfix (pyscf#135)

* recover deleted branch * fixed a bug in rks hessian * cutlass repo * FetchContent->ExternalProject * resolve the dependency in cmake * add more unittest * flake8 * Do not compress with git clone * include dftd3 and dftd4 for CI * change PYTHONPATH * change import for dispersion * cleanup unit test * skip unit test for pyscf 2.5 * revert changes in examples
bytedance · Apr 7, 2024 · 2ad6750 · 2ad6750
1 parent d60babf
commit 2ad6750
Show file tree

Hide file tree

Showing 42 changed files with 919 additions and 418 deletions.
diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
@@ -22,7 +22,9 @@ jobs:
     - name: Install dependencies
       run: |
         python3 -m pip install --upgrade pip
-        pip3 install flake8 pytest coverage gpu4pyscf-libxc-cuda11x pytest-cov
+        pip3 install flake8 pytest coverage gpu4pyscf-libxc-cuda11x pytest-cov dftd3 dftd4
+        pip3 install pyscf --upgrade
+        git config --global core.compression 9
     - name: Build GPU4PySCF
       run: |
         export CUDA_HOME=/usr/local/cuda
@@ -32,5 +34,5 @@ jobs:
     - name: Test with pytest
       run: |
         echo $GITHUB_WORKSPACE
-        export PYTHONPATH="${PYTHONPATH}:$GITHUB_WORKSPACE"
+        export PYTHONPATH="${PYTHONPATH}:$(pwd)"
         pytest --cov=$GITHUB_WORKSPACE
diff --git a/examples/01-h2o_dftd3.py b/examples/01-h2o_dftd3.py
@@ -42,6 +42,7 @@
 mf_GPU.grids.level = grids_level
 mf_GPU.conv_tol = scf_tol
 mf_GPU.max_cycle = max_scf_cycles
+mf_GPU.conv_tol_cpscf = 1e-6
 mf_GPU.screen_tol = screen_tol
 
 # Compute Energy
@@ -50,7 +51,9 @@
 print('DFT energy by GPU4PySCF')
 print(e_dft)
 print('DFT energy by Q-Chem')
-print(-76.4672557846) # reference from q-chem: -76.4672557846
+e_qchem = -76.4672557846
+print(e_qchem) # reference from q-chem: -76.4672557846
+print('Energy diff between Q-Chem and PySCF', e_dft - e_qchem)
 
 # Compute Gradient
 print('------------------ Gradient ----------------------------')
@@ -61,9 +64,11 @@
 print(g_dft)
 print('Gradient by Q-Chem')
 # reference from q-chem
-print(np.array([[0.0000000,   0.0030278,  -0.0030278],
+g_qchem = np.array([[0.0000000,   0.0030278,  -0.0030278],
         [-0.0000000,  -0.0000000,   0.0000000],
-        [-0.0023449,   0.0011724,   0.0011724]]).T)
+        [-0.0023449,   0.0011724,   0.0011724]]).T
+print(g_qchem)
+print('Gradient diff between Q-Chem and PySCF', np.linalg.norm(g_dft - g_qchem))
 
 # Compute Hessian
 print('------------------- Hessian -----------------------------')
@@ -81,3 +86,4 @@
 print('Diagonals entries of Mass-weighted Hessian by Q-Chem')
 hess_qchem = np.loadtxt('hess_qchem.txt')
 print(np.diag(hess_qchem))
+print('Hessian diff between Q-Chem and PySCF', np.linalg.norm(hess_qchem - h_dft))
diff --git a/examples/06-h2o_hf.py b/examples/06-h2o_hf.py
@@ -13,41 +13,38 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-from pyscf import gto, scf, grad
+from pyscf import gto
+from gpu4pyscf import scf
 import numpy as np
+import cupy
 
 mol = gto.M(atom=
 '''
 O       0.0000000000    -0.0000000000     0.1174000000
 H      -0.7570000000    -0.0000000000    -0.4696000000
 H       0.7570000000     0.0000000000    -0.4696000000
 ''',
-basis='cc-pvtz',
+basis='def2-tzvpp',
+cart=1,
 verbose=4)
 
-mf = scf.hf.RHF(mol)
-mf.direct_scf_tol = 1e-14
-mf.conv_tol = 1e-12
-e_cpu = mf.kernel()
-
-cpu_gradient = grad.rhf.Gradients(mf)
-cpu_gradient.kernel()
-
-from gpu4pyscf import scf
-import gpu4pyscf
-
+# Calculation on GPU
 mf = scf.hf.RHF(mol)
 mf.direct_scf_tol = 1e-14
 mf.conv_tol = 1e-12
 e_gpu = mf.kernel()
-gpu_gradient = gpu4pyscf.grad.rhf.Gradients(mf)
+
+gpu_gradient = mf.nuc_grad_method()
 gpu_gradient.kernel()
 
+# Move the object to CPU
+mf = mf.to_cpu()
+e_cpu = mf.kernel()
+cpu_gradient = mf.nuc_grad_method()
+cpu_gradient.kernel()
 cpu_de = cpu_gradient.de
-print(cpu_de)
-print(gpu_gradient.de)
-
 cpu_de -= np.sum(cpu_de, axis=0)/mol.natm
+
 print('e diff = ', e_cpu - e_gpu)
 print('g diff = \n', cpu_de - gpu_gradient.de)
 

diff --git a/gpu4pyscf/df/df_jk.py b/gpu4pyscf/df/df_jk.py
@@ -20,7 +20,7 @@
 import cupy
 import numpy
 from cupy import cublas
-from pyscf import lib, scf, __config__
+from pyscf import lib, __config__
 from pyscf.scf import dhf
 from pyscf.df import df_jk, addons
 from gpu4pyscf.lib import logger
@@ -97,7 +97,7 @@ def _density_fit(mf, auxbasis=None, with_df=None, only_dfj=False):
         with_df.verbose = mf.verbose
         with_df.auxbasis = auxbasis
 
-    if isinstance(mf, df_jk._DFHF):
+    if isinstance(mf, _DFHF):
         if mf.with_df is None:
             mf.with_df = with_df
         elif getattr(mf.with_df, 'auxbasis', None) != auxbasis:
@@ -240,7 +240,6 @@ def get_veff(self, mol=None, dm=None, dm_last=None, vhf_last=0, hermi=1):
 
     def to_cpu(self):
         obj = self.undo_df().to_cpu().density_fit()
-        print(type(obj))
         return utils.to_cpu(self, obj)
 
 

diff --git a/gpu4pyscf/df/grad/rhf.py b/gpu4pyscf/df/grad/rhf.py
@@ -17,8 +17,7 @@
 import numpy
 import cupy
 from cupyx.scipy.linalg import solve_triangular
-from pyscf.df.grad import rhf
-from pyscf import lib, scf, gto
+from pyscf import scf
 from gpu4pyscf.df import int3c2e
 from gpu4pyscf.lib.cupy_helper import print_mem_info, tag_array, unpack_tril, contract, load_library, take_last2d
 from gpu4pyscf.grad import rhf as rhf_grad

diff --git a/gpu4pyscf/df/grad/uhf.py b/gpu4pyscf/df/grad/uhf.py
@@ -19,9 +19,8 @@
 from cupyx.scipy.linalg import solve_triangular
 from pyscf import scf
 from gpu4pyscf.df import int3c2e
-from gpu4pyscf.lib.cupy_helper import print_mem_info, tag_array, unpack_tril, contract, load_library, take_last2d
+from gpu4pyscf.lib.cupy_helper import tag_array, contract, load_library, take_last2d
 from gpu4pyscf.grad import uhf as uhf_grad
-from gpu4pyscf.grad.rhf import grad_elec
 from gpu4pyscf import __config__
 from gpu4pyscf.lib import logger
 
@@ -62,13 +61,13 @@ def get_jk(mf_grad, mol=None, dm0=None, hermi=0, with_j=True, with_k=True, omega
         mo_coeff = cupy.asarray(mf_grad.base.mo_coeff)
     if mo_occ is None:
         mo_occ = cupy.asarray(mf_grad.base.mo_occ)
-    sph_ao_idx = intopt.sph_ao_idx
-    dm = take_last2d(dm0, sph_ao_idx)
+    ao_idx = intopt.ao_idx
+    dm = take_last2d(dm0, ao_idx)
     if dm2 is not None:
-        dm2_tmp = take_last2d(dm2, sph_ao_idx)
+        dm2_tmp = take_last2d(dm2, ao_idx)
     # (L|ij) -> rhoj: (L), rhok: (L|oo)
     orbo = mo_coeff[:,mo_occ>0] * mo_occ[mo_occ>0] ** 0.5
-    orbo = orbo[sph_ao_idx, :]
+    orbo = orbo[ao_idx, :]
     nocc = orbo.shape[-1]
 
     # (L|ij) -> rhoj: (L), rhok: (L|oo)
@@ -109,8 +108,8 @@ def get_jk(mf_grad, mol=None, dm0=None, hermi=0, with_j=True, with_k=True, omega
     else:
         int2c_e1 = auxmol.intor('int2c2e_ip1')
     int2c_e1 = cupy.asarray(int2c_e1)
-    sph_aux_idx = intopt.sph_aux_idx
-    rev_aux_idx = np.argsort(sph_aux_idx)
+    aux_ao_idx = intopt.aux_ao_idx
+    rev_aux_idx = np.argsort(aux_ao_idx)
     auxslices = auxmol.aoslice_by_atom()
     aux_cart2sph = intopt.aux_cart2sph
     low_t = low.T.copy()
@@ -123,8 +122,13 @@ def get_jk(mf_grad, mol=None, dm0=None, hermi=0, with_j=True, with_k=True, omega
             rhoj = solve_triangular(low_t, rhoj, lower=False, overwrite_b=True)
             if dm2 is not None:
                 rhoj2 = solve_triangular(low_t, rhoj2, lower=False, overwrite_b=True)
-        rhoj_cart = contract('pq,q->p', aux_cart2sph, rhoj)
+        if not auxmol.cart:
+            rhoj_cart = contract('pq,q->p', aux_cart2sph, rhoj)
+        else:
+            rhoj_cart = rhoj
+
         rhoj = rhoj[rev_aux_idx]
+
         if dm2 is not None:
             rhoj2 = rhoj2[rev_aux_idx]
         tmp = contract('xpq,q->xp', int2c_e1, rhoj)
@@ -144,7 +148,10 @@ def get_jk(mf_grad, mol=None, dm0=None, hermi=0, with_j=True, with_k=True, omega
         vkaux = -contract('xpq,pq->xp', int2c_e1, tmp)
         vkaux_2c = cupy.array([-vkaux[:,p0:p1].sum(axis=1) for p0, p1 in auxslices[:,2:]])
         vkaux = tmp = None
-        rhok_cart = contract('pq,qkl->pkl', aux_cart2sph, rhok)
+        if not auxmol.cart:
+            rhok_cart = contract('pq,qkl->pkl', aux_cart2sph, rhok)
+        else:
+            rhok_cart = rhok
         rhok = None
     low_t = None
     t0 = log.timer_debug1('rhoj and rhok', *t0)
@@ -156,15 +163,21 @@ def get_jk(mf_grad, mol=None, dm0=None, hermi=0, with_j=True, with_k=True, omega
     # rebuild with aosym
     intopt.build(mf.direct_scf_tol, diag_block_with_triu=True, aosym=False,
                  group_size_aux=block_size)#, group_size=block_size)
-
-    # sph2cart for ao
-    cart2sph = intopt.cart2sph
-    orbo_cart = cart2sph @ orbo
-    if dm2 is None:
-        dm_cart = cart2sph @ dm @ cart2sph.T
+    if not intopt._mol.cart:
+        # sph2cart for ao
+        cart2sph = intopt.cart2sph
+        orbo_cart = cart2sph @ orbo
+        if dm2 is None:
+            dm_cart = cart2sph @ dm @ cart2sph.T
+        else:
+            dm2_tmp = take_last2d(dm2, ao_idx)
+            dm_cart = cart2sph @ dm2_tmp @ cart2sph.T
     else:
-        dm2_tmp = take_last2d(dm2, sph_ao_idx)
-        dm_cart = cart2sph @ dm2_tmp @ cart2sph.T
+        if dm2 is None:
+            dm_cart = dm
+        else:
+            dm_cart = take_last2d(dm2, ao_idx)
+        orbo_cart = orbo
     dm = orbo = None
 
     vj = vk = rhoj_tmp = rhok_tmp = None
@@ -272,10 +285,12 @@ def get_k(self, mol=None, dm=None, hermi=0, mo_coeff=None, mo_occ=None, dm2 = No
 
 
     def get_veff(self, mol=None, dm=None):
-        vj0, vk0, vjaux0, vkaux0 = self.get_jk(mol, dm[0], mo_coeff=self.base.mo_coeff[0], mo_occ=self.base.mo_occ[0])
-        vj1, vk1, vjaux1, vkaux1 = self.get_jk(mol, dm[1], mo_coeff=self.base.mo_coeff[1], mo_occ=self.base.mo_occ[1])
-        vj0_m1, vjaux0_m1 = self.get_j(mol, dm[0], mo_coeff=self.base.mo_coeff[0], mo_occ=self.base.mo_occ[0], dm2=dm[1])
-        vj1_m0, vjaux1_m0 = self.get_j(mol, dm[1], mo_coeff=self.base.mo_coeff[1], mo_occ=self.base.mo_occ[1], dm2=dm[0])
+        mo_a, mo_b = self.base.mo_coeff
+        mo_occa, mo_occb = self.base.mo_occ
+        vj0, vk0, vjaux0, vkaux0 = self.get_jk(mol, dm[0], mo_coeff=mo_a, mo_occ=mo_occa)
+        vj1, vk1, vjaux1, vkaux1 = self.get_jk(mol, dm[1], mo_coeff=mo_b, mo_occ=mo_occb)
+        vj0_m1, vjaux0_m1 = self.get_j(mol, dm[0], mo_coeff=mo_a, mo_occ=mo_occa, dm2=dm[1])
+        vj1_m0, vjaux1_m0 = self.get_j(mol, dm[1], mo_coeff=mo_b, mo_occ=mo_occb, dm2=dm[0])
         vhf = vj0 + vj1 + vj0_m1 + vj1_m0 - vk0 - vk1
         if self.auxbasis_response:
             e1_aux = vjaux0 + vjaux1 + vjaux0_m1 + vjaux1_m0 - vkaux0 - vkaux1

diff --git a/gpu4pyscf/df/grad/uks.py b/gpu4pyscf/df/grad/uks.py
@@ -91,20 +91,23 @@ def get_veff(ks_grad, mol=None, dm=None):
     vxc = cupy.asarray(vxc)
 
     if not ni.libxc.is_hybrid_xc(mf.xc):
-        vj0, vjaux0 = ks_grad.get_j(mol, dm[0], mo_coeff=ks_grad.base.mo_coeff[0], mo_occ=ks_grad.base.mo_occ[0])
-        vj1, vjaux1 = ks_grad.get_j(mol, dm[1], mo_coeff=ks_grad.base.mo_coeff[1], mo_occ=ks_grad.base.mo_occ[1])
-        vj0_m1, vjaux0_m1 = ks_grad.get_j(mol, dm[0], mo_coeff=ks_grad.base.mo_coeff[0], mo_occ=ks_grad.base.mo_occ[0], dm2=dm[1])
-        vj1_m0, vjaux1_m0 = ks_grad.get_j(mol, dm[1], mo_coeff=ks_grad.base.mo_coeff[1], mo_occ=ks_grad.base.mo_occ[1], dm2=dm[0])
+        mo_a, mo_b = ks_grad.base.mo_coeff
+        mo_occa, mo_occb = ks_grad.base.mo_occ
+        vj0, vjaux0 = ks_grad.get_j(mol, dm[0], mo_coeff=mo_a, mo_occ=mo_occa)
+        vj1, vjaux1 = ks_grad.get_j(mol, dm[1], mo_coeff=mo_b, mo_occ=mo_occb)
+        vj0_m1, vjaux0_m1 = ks_grad.get_j(mol, dm[0], mo_coeff=mo_a, mo_occ=mo_occa, dm2=dm[1])
+        vj1_m0, vjaux1_m0 = ks_grad.get_j(mol, dm[1], mo_coeff=mo_b, mo_occ=mo_occb, dm2=dm[0])
         if ks_grad.auxbasis_response:
             e1_aux = vjaux0 + vjaux1 + vjaux0_m1 + vjaux1_m0
         vxc += vj0 + vj1 + vj0_m1 + vj1_m0
     else:
         omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, spin=mol.spin)
-
-        vj0, vk0, vjaux0, vkaux0 = ks_grad.get_jk(mol, dm[0], mo_coeff=ks_grad.base.mo_coeff[0], mo_occ=ks_grad.base.mo_occ[0])
-        vj1, vk1, vjaux1, vkaux1 = ks_grad.get_jk(mol, dm[1], mo_coeff=ks_grad.base.mo_coeff[1], mo_occ=ks_grad.base.mo_occ[1])
-        vj0_m1, vjaux0_m1 = ks_grad.get_j(mol, dm[0], mo_coeff=ks_grad.base.mo_coeff[0], mo_occ=ks_grad.base.mo_occ[0], dm2=dm[1])
-        vj1_m0, vjaux1_m0 = ks_grad.get_j(mol, dm[1], mo_coeff=ks_grad.base.mo_coeff[1], mo_occ=ks_grad.base.mo_occ[1], dm2=dm[0])
+        mo_a, mo_b = ks_grad.base.mo_coeff
+        mo_occa, mo_occb = ks_grad.base.mo_occ
+        vj0, vk0, vjaux0, vkaux0 = ks_grad.get_jk(mol, dm[0], mo_coeff=mo_a, mo_occ=mo_occa)
+        vj1, vk1, vjaux1, vkaux1 = ks_grad.get_jk(mol, dm[1], mo_coeff=mo_b, mo_occ=mo_occb)
+        vj0_m1, vjaux0_m1 = ks_grad.get_j(mol, dm[0], mo_coeff=mo_a, mo_occ=mo_occa, dm2=dm[1])
+        vj1_m0, vjaux1_m0 = ks_grad.get_j(mol, dm[1], mo_coeff=mo_b, mo_occ=mo_occb, dm2=dm[0])
         vj = vj0 + vj1 + vj0_m1 + vj1_m0
         vk = (vk0 + vk1) * hyb
         if ks_grad.auxbasis_response:

diff --git a/gpu4pyscf/df/hessian/tests/test_df_uhf_hessian.py b/gpu4pyscf/df/hessian/tests/test_df_uhf_hessian.py
@@ -16,13 +16,11 @@
 import unittest
 import numpy
 import cupy
-from pyscf import gto, scf, lib
-from pyscf import grad, hessian
+from pyscf import gto, scf
 from pyscf.df.hessian import uhf as df_uhf_cpu
 from pyscf.hessian import uhf as uhf_cpu
 from gpu4pyscf.df.hessian import uhf as df_uhf_gpu
 from gpu4pyscf.hessian import uhf as uhf_gpu
-from gpu4pyscf import scf as scf_gpu
 
 def setUpModule():
     global mol

diff --git a/gpu4pyscf/df/hessian/tests/test_df_uks_hessian.py b/gpu4pyscf/df/hessian/tests/test_df_uks_hessian.py
@@ -15,11 +15,7 @@
 
 import unittest
 import numpy
-import cupy
-from pyscf import gto, scf, lib, dft
-from pyscf import grad, hessian
-from pyscf.df.hessian import uhf as df_uks_cpu
-from gpu4pyscf.df.hessian import uks as df_uks_gpu
+from pyscf import gto, dft
 
 def setUpModule():
     global mol

diff --git a/gpu4pyscf/df/tests/test_df_ecp.py b/gpu4pyscf/df/tests/test_df_ecp.py
@@ -16,11 +16,8 @@
 import unittest
 import numpy as np
 import pyscf
-from pyscf import lib
 from gpu4pyscf.dft import rks
 
-lib.num_threads(8)
-
 xc='pbe0'
 bas='def2-tzvpp'
 auxbasis='def2-tzvpp-jkfit'