diff --git a/gpu4pyscf/dft/tests/test_uks.py b/gpu4pyscf/dft/tests/test_uks.py
new file mode 100644
index 00000000..afe51281
--- /dev/null
+++ b/gpu4pyscf/dft/tests/test_uks.py
@@ -0,0 +1,95 @@
+# gpu4pyscf is a plugin to use Nvidia GPU in PySCF package
+#
+# Copyright (C) 2022 Qiming Sun
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import numpy as np
+import unittest
+import pyscf
+from gpu4pyscf.dft import uks
+
+atom = '''
+O 0.0000000000 -0.0000000000 0.1174000000
+H -0.7570000000 -0.0000000000 -0.4696000000
+H 0.7570000000 0.0000000000 -0.4696000000
+'''
+bas='def2-qzvpp'
+grids_level = 3
+nlcgrids_level = 1
+
+def setUpModule():
+ global mol
+ mol = pyscf.M(
+ atom=atom,
+ basis=bas,
+ max_memory=32000,
+ verbose = 1,
+ spin = 1,
+ charge = 1,
+ output = '/dev/null'
+ )
+
+def tearDownModule():
+ global mol
+ mol.stdout.close()
+ del mol
+
+def run_dft(xc):
+ mf = uks.UKS(mol, xc=xc)
+ mf.grids.level = grids_level
+ mf.nlcgrids.level = nlcgrids_level
+ e_dft = mf.kernel()
+ return e_dft
+
+class KnownValues(unittest.TestCase):
+ '''
+ known values are obtained by pyscf
+ '''
+ def test_uks_lda(self):
+ print('------- LDA ----------------')
+ e_tot = run_dft("LDA, vwn5")
+ assert np.allclose(e_tot, -75.42821982483972)
+
+ def test_uks_pbe(self):
+ print('------- PBE ----------------')
+ e_tot = run_dft('PBE')
+ assert np.allclose(e_tot, -75.91732813416843)
+
+ def test_uks_b3lyp(self):
+ print('-------- B3LYP -------------')
+ e_tot = run_dft('B3LYP')
+ assert np.allclose(e_tot, -76.00306439862237)
+
+ def test_uks_m06(self):
+ print('--------- M06 --------------')
+ e_tot = run_dft("M06")
+ assert np.allclose(e_tot, -75.96551006522827)
+
+ def test_uks_wb97(self):
+ print('-------- wB97 --------------')
+ e_tot = run_dft("HYB_GGA_XC_WB97")
+ assert np.allclose(e_tot, -75.987601337562)
+
+ def test_uks_vv10(self):
+ print("------- wB97m-v -------------")
+ e_tot = run_dft('HYB_MGGA_XC_WB97M_V')
+ assert np.allclose(e_tot, -75.97363094678428)
+
+ #TODO: add test cases for D3/D4 and gradient
+
+if __name__ == "__main__":
+ print("Full Tests for dft")
+ unittest.main()
+
diff --git a/gpu4pyscf/dft/uks.py b/gpu4pyscf/dft/uks.py
index a3e48cb4..d56f067f 100644
--- a/gpu4pyscf/dft/uks.py
+++ b/gpu4pyscf/dft/uks.py
@@ -15,16 +15,121 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
+import cupy
from pyscf.dft import uks
-from gpu4pyscf.dft import numint
-from gpu4pyscf.scf.uhf import UHF
+from pyscf import lib
+from gpu4pyscf import scf
+from gpu4pyscf.lib import logger
+from gpu4pyscf.dft import numint, gen_grid, rks
+from gpu4pyscf.lib.cupy_helper import tag_array
-class UKS(uks.UKS):
+
+def get_veff(ks, mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1):
+ '''Coulomb + XC functional for UKS. See pyscf/dft/rks.py
+ :func:`get_veff` fore more details.
+ '''
+ if mol is None: mol = ks.mol
+ if dm is None: dm = ks.make_rdm1()
+ t0 = logger.init_timer(ks)
+ rks.initialize_grids(ks, mol, dm)
+
+ if hasattr(ks, 'screen_tol') and ks.screen_tol is not None:
+ ks.direct_scf_tol = ks.screen_tol
+ ground_state = (isinstance(dm, cupy.ndarray) and dm.ndim == 3)
+
+ ni = ks._numint
+ if hermi == 2: # because rho = 0
+ n, exc, vxc = (0,0), 0, 0
+ else:
+ max_memory = ks.max_memory - lib.current_memory()[0]
+ n, exc, vxc = ni.nr_uks(mol, ks.grids, ks.xc, dm, max_memory=max_memory)
+ logger.debug(ks, 'nelec by numeric integration = %s', n)
+ if ks.nlc or ni.libxc.is_nlc(ks.xc):
+ if ni.libxc.is_nlc(ks.xc):
+ xc = ks.xc
+ else:
+ assert ni.libxc.is_nlc(ks.nlc)
+ xc = ks.nlc
+ n, enlc, vnlc = ni.nr_nlc_vxc(mol, ks.nlcgrids, xc, dm[0]+dm[1],
+ max_memory=max_memory)
+ exc += enlc
+ vxc += vnlc
+ logger.debug(ks, 'nelec with nlc grids = %s', n)
+ t0 = logger.timer(ks, 'vxc', *t0)
+
+ if not ni.libxc.is_hybrid_xc(ks.xc):
+ vk = None
+ if (ks._eri is None and ks.direct_scf and
+ getattr(vhf_last, 'vj', None) is not None):
+ ddm = cupy.asarray(dm) - cupy.asarray(dm_last)
+ vj = ks.get_j(mol, ddm[0]+ddm[1], hermi)
+ vj += vhf_last.vj
+ else:
+ vj = ks.get_j(mol, dm[0]+dm[1], hermi)
+ vxc += vj
+ else:
+ omega, alpha, hyb = ni.rsh_and_hybrid_coeff(ks.xc, spin=mol.spin)
+ if (ks._eri is None and ks.direct_scf and
+ getattr(vhf_last, 'vk', None) is not None):
+ ddm = cupy.asarray(dm) - cupy.asarray(dm_last)
+ vj, vk = ks.get_jk(mol, ddm, hermi)
+ vk *= hyb
+ if abs(omega) > 1e-10: # For range separated Coulomb operator
+ vklr = ks.get_k(mol, ddm, hermi, omega)
+ vklr *= (alpha - hyb)
+ vk += vklr
+ vj = vj[0] + vj[1] + vhf_last.vj
+ vk += vhf_last.vk
+ else:
+ vj, vk = ks.get_jk(mol, dm, hermi)
+ vj = vj[0] + vj[1]
+ vk *= hyb
+ if abs(omega) > 1e-10:
+ vklr = ks.get_k(mol, dm, hermi, omega=omega)
+ vklr *= (alpha - hyb)
+ vk += vklr
+ vxc += vj - vk
+
+ if ground_state:
+ exc -=(cupy.einsum('ij,ji', dm[0], vk[0]).real +
+ cupy.einsum('ij,ji', dm[1], vk[1]).real) * .5
+ if ground_state:
+ ecoul = cupy.einsum('ij,ji', dm[0]+dm[1], vj).real * .5
+ else:
+ ecoul = None
+ t0 = logger.timer_debug1(ks, 'jk total', *t0)
+ vxc = tag_array(vxc, ecoul=ecoul, exc=exc, vj=vj, vk=vk)
+ return vxc
+
+
+def energy_elec(ks, dm=None, h1e=None, vhf=None):
+ if dm is None: dm = ks.make_rdm1()
+ if h1e is None: h1e = ks.get_hcore()
+ if vhf is None or getattr(vhf, 'ecoul', None) is None:
+ vhf = ks.get_veff(ks.mol, dm)
+ if not (isinstance(dm, cupy.ndarray) and dm.ndim == 2):
+ dm = dm[0] + dm[1]
+ return rks.energy_elec(ks, dm, h1e, vhf)
+
+
+class UKS(scf.uhf.UHF, uks.UKS):
from gpu4pyscf.lib.utils import to_cpu, to_gpu, device
+ _keys = {'disp', 'screen_tol'}
- def __init__(self, mol, xc='LDA,VWN'):
+ def __init__(self, mol, xc='LDA,VWN', disp=None):
super().__init__(mol, xc)
+ self.disp = disp
self._numint = numint.NumInt()
+ self.screen_tol = 1e-14
+
+ grids_level = self.grids.level
+ self.grids = gen_grid.Grids(mol)
+ self.grids.level = grids_level
- get_jk = UHF.get_jk
- _eigh = UHF._eigh
+ nlcgrids_level = self.nlcgrids.level
+ self.nlcgrids = gen_grid.Grids(mol)
+ self.nlcgrids.level = nlcgrids_level
+
+ energy_elec = energy_elec
+ get_veff = get_veff
+
diff --git a/gpu4pyscf/scf/diis.py b/gpu4pyscf/scf/diis.py
index b1e4c1f7..c01d4972 100644
--- a/gpu4pyscf/scf/diis.py
+++ b/gpu4pyscf/scf/diis.py
@@ -15,7 +15,7 @@
#
# Author: Qiming Sun
#
-# modified by Xiaojie Wu
+# modified by Xiaojie Wu ; Zhichen Pu
"""
DIIS
@@ -64,9 +64,13 @@ def get_num_vec(self):
def get_err_vec(s, d, f):
'''error vector = SDF - FDS'''
- if isinstance(f, cupy.ndarray):
+ if isinstance(f, cupy.ndarray) and f.ndim == 2:
sdf = reduce(cupy.dot, (s,d,f))
- errvec = (sdf.T.conj() - sdf)
- return errvec
+ errvec = (sdf.conj().T - sdf).ravel()
+ elif f.ndim == s.ndim+1 and f.shape[0] == 2: # for UHF
+ errvec = cupy.hstack([
+ get_err_vec(s, d[0], f[0]).ravel(),
+ get_err_vec(s, d[1], f[1]).ravel()])
else:
- cpu_diis.get_err_vec(s, d, f)
+ raise RuntimeError('Unknown SCF DIIS type')
+ return errvec
diff --git a/gpu4pyscf/scf/tests/test_uhf.py b/gpu4pyscf/scf/tests/test_uhf.py
new file mode 100644
index 00000000..0fe8eb7b
--- /dev/null
+++ b/gpu4pyscf/scf/tests/test_uhf.py
@@ -0,0 +1,255 @@
+# gpu4pyscf is a plugin to use Nvidia GPU in PySCF package
+#
+# Copyright (C) 2022 Qiming Sun
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import unittest
+import numpy as np
+import cupy
+import pyscf
+from pyscf import lib
+from gpu4pyscf import scf
+
+mol = pyscf.M(
+ atom='''
+C -0.65830719, 0.61123287, -0.00800148
+C 0.73685281, 0.61123287, -0.00800148
+C 1.43439081, 1.81898387, -0.00800148
+C 0.73673681, 3.02749287, -0.00920048
+''',
+ basis='ccpvtz',
+ charge=1,
+ spin=1,
+ output = '/dev/null'
+)
+
+mol1 = pyscf.M(
+ atom='''
+C -1.20806619, -0.34108413, -0.00755148
+C 1.28636081, -0.34128013, -0.00668648
+H 2.53407081, 1.81906387, -0.00736748
+H 1.28693681, 3.97963587, -0.00925948
+''',
+ basis='''unc
+#BASIS SET:
+H S
+ 1.815041 1
+ 0.591063 1
+H P
+ 2.305000 1
+#BASIS SET:
+C S
+ 8.383976 1
+ 3.577015 1
+ 1.547118 1
+H P
+ 2.305000 1
+ 1.098827 1
+ 0.806750 1
+ 0.282362 1
+H D
+ 1.81900 1
+ 0.72760 1
+ 0.29104 1
+H F
+ 0.970109 1
+C G
+ 0.625000 1
+C H
+ 0.4 1
+ ''',
+ output = '/dev/null'
+)
+
+def tearDownModule():
+ global mol, mol1
+ mol.stdout.close()
+ mol1.stdout.close()
+ del mol, mol1
+
+class KnownValues(unittest.TestCase):
+ def test_get_jk(self):
+ np.random.seed(1)
+ nao = mol.nao
+ dm = np.random.random((2,nao,nao))
+ dm = dm + dm.transpose(0,2,1)
+ mf = scf.UHF(mol)
+ vj, vk = mf.get_jk(mol, dm)
+ self.assertAlmostEqual(lib.fp(vj), -1782.4478082102428, 7)
+ self.assertAlmostEqual(lib.fp(vk), -280.36548013781095, 7)
+ mf1 = mf.to_cpu()
+ refj, refk = mf1.get_jk(mol, dm)
+ self.assertAlmostEqual(abs(vj - refj).max(), 0, 7)
+ self.assertAlmostEqual(abs(vk - refk).max(), 0, 7)
+ with lib.temporary_env(mol, cart=True):
+ np.random.seed(1)
+ nao = mol.nao
+ dm = np.random.random((2, nao,nao))
+ dm = dm + dm.transpose(0,2,1)
+ mf = scf.UHF(mol)
+ vj, vk = mf.get_jk(mol, dm)
+ self.assertAlmostEqual(lib.fp(vj), -1790.0063863999496, 7)
+ self.assertAlmostEqual(lib.fp(vk), -8.969890703683895 , 7)
+
+ mf1 = mf.to_cpu()
+ refj, refk = mf1.get_jk(mol, dm)
+ self.assertAlmostEqual(abs(vj - refj).max(), 0, 7)
+ self.assertAlmostEqual(abs(vk - refk).max(), 0, 7)
+
+ def test_get_j(self):
+ np.random.seed(1)
+ nao = mol.nao
+ dm = np.random.random((2,nao,nao))
+ dm = dm + dm.transpose(0,2,1)
+ mf = scf.UHF(mol)
+ vj = mf.get_j(mol, dm)
+ self.assertAlmostEqual(lib.fp(vj), -1782.4478082102423 , 7)
+
+ mf1 = mf.to_cpu()
+ refj = mf1.get_j(mol, dm)
+ self.assertAlmostEqual(abs(vj - refj).max(), 0, 7)
+
+ with lib.temporary_env(mol, cart=True):
+ np.random.seed(1)
+ nao = mol.nao
+ dm = np.random.random((2,nao,nao))
+ dm = dm + dm.transpose(0,2,1)
+ mf = scf.UHF(mol)
+ vj = mf.get_j(mol, dm)
+ self.assertAlmostEqual(lib.fp(vj), -1790.0063863999503, 7)
+
+ mf1 = mf.to_cpu()
+ refj = mf1.get_j(mol, dm)
+ self.assertAlmostEqual(abs(vj - refj).max(), 0, 7)
+
+ def test_get_k(self):
+ np.random.seed(1)
+ nao = mol.nao
+ dm = np.random.random((2,nao,nao))
+ dm = dm + dm.transpose(0,2,1)
+ mf = scf.UHF(mol)
+ vk = mf.get_k(mol, dm)
+ self.assertAlmostEqual(lib.fp(vk), -280.36548013781083, 7)
+
+ mf1 = mf.to_cpu()
+ refk = mf1.get_k(mol, dm)
+ self.assertAlmostEqual(abs(vk - refk).max(), 0, 7)
+
+ with lib.temporary_env(mol, cart=True):
+ np.random.seed(1)
+ nao = mol.nao
+ dm = np.random.random((2,nao,nao))
+ dm = dm + dm.transpose(0,2,1)
+ mf = scf.UHF(mol)
+ vk = mf.get_k(mol, dm)
+ self.assertAlmostEqual(lib.fp(vk), -8.969890703691519 , 7)
+
+ mf1 = mf.to_cpu()
+ refk = mf1.get_k(mol, dm)
+ self.assertAlmostEqual(abs(vk - refk).max(), 0, 7)
+
+ def test_get_jk1(self):
+ # test l >= 4
+ np.random.seed(1)
+ nao = mol1.nao
+ dm = np.random.random((2,nao,nao))
+ dm = dm + dm.transpose(0,2,1)
+ mf = scf.UHF(mol1)
+ vj, vk = mf.get_jk(mol1, dm, hermi=1)
+ self.assertAlmostEqual(lib.fp(vj), 179.14526555374763, 7)
+ self.assertAlmostEqual(lib.fp(vk), -34.851182918653606, 7)
+
+ mf1 = mf.to_cpu()
+ refj, refk = mf1.get_jk(mol1, dm, hermi=1)
+ self.assertAlmostEqual(abs(vj - refj).max(), 0, 8)
+ self.assertAlmostEqual(abs(vk - refk).max(), 0, 8)
+
+ @unittest.skip('hermi=0')
+ def test_get_jk1_hermi0(self):
+ np.random.seed(1)
+ nao = mol1.nao
+ dm = np.random.random((2,nao,nao))
+ mf = scf.UHF(mol1)
+ vj, vk = mf.get_jk(mol1, cupy.asarray(dm), hermi=0)
+ self.assertAlmostEqual(lib.fp(vj.get()), 89.57263277687345 , 7)
+ self.assertAlmostEqual(lib.fp(vk.get()),-26.369697697245883, 7)
+
+ mf1 = mf.to_cpu()
+ refj, refk = mf1.get_jk(mol1, dm, hermi=0)
+ self.assertAlmostEqual(abs(vj.get() - refj).max(), 0, 8)
+ self.assertAlmostEqual(abs(vk.get() - refk).max(), 0, 8)
+
+ def test_get_j1(self):
+ # test l >= 4
+ np.random.seed(1)
+ nao = mol1.nao
+ dm = np.random.random((2,nao,nao))
+ dm = dm + dm.transpose(0,2,1)
+ mf = scf.UHF(mol1)
+ vj = mf.get_j(mol1, dm, hermi=1)
+ self.assertAlmostEqual(lib.fp(vj), 179.14526555374712, 7)
+
+ mf1 = mf.to_cpu()
+ refj = mf1.get_j(mol1, dm, hermi=1)
+ self.assertAlmostEqual(abs(vj - refj).max(), 0, 7)
+
+ @unittest.skip('hermi=0')
+ def test_get_j1_hermi0(self):
+ np.random.seed(1)
+ nao = mol1.nao
+ dm = np.random.random((2,nao,nao))
+ mf = scf.UHF(mol1)
+ vj = mf.get_j(mol1, dm, hermi=0).get()
+ self.assertAlmostEqual(lib.fp(vj), 89.5726327768736, 7)
+
+ mf1 = mf.to_cpu()
+ refj = mf1.get_j(mol1, dm, hermi=0)
+ self.assertAlmostEqual(abs(vj - refj).max(), 0, 7)
+
+ def test_get_k1(self):
+ # test l >= 4
+ np.random.seed(1)
+ nao = mol1.nao
+ dm = np.random.random((2,nao,nao))
+ dm = dm + dm.transpose(0,2,1)
+ mf = scf.UHF(mol1)
+ vk = mf.get_k(mol1, dm, hermi=1)
+ self.assertAlmostEqual(lib.fp(vk), -34.85118291865315, 7)
+
+ mf1 = mf.to_cpu()
+ refk = mf1.get_k(mol1, dm, hermi=1)
+ self.assertAlmostEqual(abs(vk - refk).max(), 0, 7)
+
+ @unittest.skip('hermi=0')
+ def test_get_k1_hermi0(self):
+ np.random.seed(1)
+ nao = mol1.nao
+ dm = np.random.random((2,nao,nao))
+ mf = scf.UHF(mol1)
+ vk = mf.get_k(mol1, dm, hermi=0).get()
+ self.assertAlmostEqual(lib.fp(vk),-26.369697697246007, 7)
+
+ mf1 = mf.to_cpu()
+ refk = mf1.get_k(mol1, dm, hermi=0)
+ self.assertAlmostEqual(abs(vk - refk).max(), 0, 7)
+
+ # end to end test
+ def test_uhf_scf(self):
+ e_tot = scf.UHF(mol).kernel()
+ self.assertAlmostEqual(e_tot, -150.76441654065087)
+
+if __name__ == "__main__":
+ print("Full Tests for UHF")
+ unittest.main()
diff --git a/gpu4pyscf/scf/uhf.py b/gpu4pyscf/scf/uhf.py
index 9566cf0b..d606d7dd 100644
--- a/gpu4pyscf/scf/uhf.py
+++ b/gpu4pyscf/scf/uhf.py
@@ -15,11 +15,161 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
+
+from functools import reduce
from pyscf.scf import uhf
-from gpu4pyscf.scf.hf import _get_jk, eigh
+from gpu4pyscf.scf.hf import _get_jk, eigh, damping, level_shift, _kernel
+from gpu4pyscf.lib import logger
+from gpu4pyscf.lib.cupy_helper import tag_array
+import numpy as np
+import cupy
+from gpu4pyscf import lib
+from gpu4pyscf.scf import diis
+from pyscf import lib as pyscf_lib
+
+
+def make_rdm1(mo_coeff, mo_occ, **kwargs):
+ '''One-particle density matrix in AO representation
+
+ Args:
+ mo_coeff : tuple of 2D ndarrays
+ Orbital coefficients for alpha and beta spins. Each column is one orbital.
+ mo_occ : tuple of 1D ndarrays
+ Occupancies for alpha and beta spins.
+ Returns:
+ A list of 2D ndarrays for alpha and beta spins
+ '''
+ mo_a = mo_coeff[0]
+ mo_b = mo_coeff[1]
+ dm_a = cupy.dot(mo_a*mo_occ[0], mo_a.conj().T)
+ dm_b = cupy.dot(mo_b*mo_occ[1], mo_b.conj().T)
+# DO NOT make tag_array for DM here because the DM arrays may be modified and
+# passed to functions like get_jk, get_vxc. These functions may take the tags
+# (mo_coeff, mo_occ) to compute the potential if tags were found in the DM
+# arrays and modifications to DM arrays may be ignored.
+ return tag_array((dm_a, dm_b), mo_coeff=mo_coeff, mo_occ=mo_occ)
+
+
+def spin_square(mo, s=1):
+ r'''Spin square and multiplicity of UHF determinant
+
+ Detailed derivataion please refers to the cpu pyscf.
+
+ '''
+ mo_a, mo_b = mo
+ nocc_a = mo_a.shape[1]
+ nocc_b = mo_b.shape[1]
+ s = reduce(cupy.dot, (mo_a.conj().T, cupy.asarray(s), mo_b))
+ ssxy = (nocc_a+nocc_b) * .5 - cupy.einsum('ij,ij->', s.conj(), s)
+ ssz = (nocc_b-nocc_a)**2 * .25
+ ss = (ssxy + ssz).real
+ s = cupy.sqrt(ss+.25) - .5
+ return ss, s*2+1
+
+
+def get_fock(mf, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, diis=None,
+ diis_start_cycle=None, level_shift_factor=None, damp_factor=None):
+ if h1e is None: h1e = cupy.asarray(mf.get_hcore())
+ if vhf is None: vhf = mf.get_veff(mf.mol, dm)
+ f = h1e + vhf
+ if f.ndim == 2:
+ f = (f, f)
+ if cycle < 0 and diis is None: # Not inside the SCF iteration
+ return f
+
+ if diis_start_cycle is None:
+ diis_start_cycle = mf.diis_start_cycle
+ if level_shift_factor is None:
+ level_shift_factor = mf.level_shift
+ if damp_factor is None:
+ damp_factor = mf.damp
+ if s1e is None: s1e = mf.get_ovlp()
+ if dm is None: dm = mf.make_rdm1()
+
+ if isinstance(level_shift_factor, (tuple, list, np.ndarray)):
+ shifta, shiftb = level_shift_factor
+ else:
+ shifta = shiftb = level_shift_factor
+ if isinstance(damp_factor, (tuple, list, np.ndarray)):
+ dampa, dampb = damp_factor
+ else:
+ dampa = dampb = damp_factor
+
+ if 0 <= cycle < diis_start_cycle-1 and abs(dampa)+abs(dampb) > 1e-4:
+ f = (damping(s1e, dm[0], f[0], dampa),
+ damping(s1e, dm[1], f[1], dampb))
+ if diis and cycle >= diis_start_cycle:
+ f = diis.update(s1e, dm, f, mf, h1e, vhf)
+ if abs(shifta)+abs(shiftb) > 1e-4:
+ f = (level_shift(s1e, dm[0], f[0], shifta),
+ level_shift(s1e, dm[1], f[1], shiftb))
+ return f
+
class UHF(uhf.UHF):
from gpu4pyscf.lib.utils import to_cpu, to_gpu, device
+ DIIS = diis.SCF_DIIS
get_jk = _get_jk
_eigh = staticmethod(eigh)
+ get_fock = get_fock
+
+ def make_rdm1(self, mo_coeff=None, mo_occ=None, **kwargs):
+ if mo_coeff is None:
+ mo_coeff = self.mo_coeff
+ if mo_occ is None:
+ mo_occ = self.mo_occ
+ return make_rdm1(mo_coeff, mo_occ, **kwargs)
+
+ def eig(self, fock, s):
+ e_a, c_a = self._eigh(fock[0], s)
+ e_b, c_b = self._eigh(fock[1], s)
+ return cupy.array((e_a,e_b)), cupy.array((c_a,c_b))
+
+ def get_veff(self, mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1):
+ if mol is None: mol = self.mol
+ if dm is None: dm = self.make_rdm1()
+
+ if isinstance(dm, cupy.ndarray) and dm.ndim == 2:
+ dm = cupy.asarray((dm*.5,dm*.5))
+
+ if self._eri is not None or not self.direct_scf:
+ vj, vk = self.get_jk(mol, cupy.asarray(dm), hermi)
+ vhf = vj[0] + vj[1] - vk
+ else:
+ ddm = cupy.asarray(dm) - cupy.asarray(dm_last)
+ vj, vk = self.get_jk(mol, ddm, hermi)
+ vhf = vj[0] + vj[1] - vk
+ vhf += cupy.asarray(vhf_last)
+ return vhf
+
+ def scf(self, dm0=None, **kwargs):
+ cput0 = logger.init_timer(self)
+
+ self.dump_flags()
+ self.build(self.mol)
+
+ if self.max_cycle > 0 or self.mo_coeff is None:
+ self.converged, self.e_tot, \
+ self.mo_energy, self.mo_coeff, self.mo_occ = \
+ _kernel(self, self.conv_tol, self.conv_tol_grad,
+ dm0=dm0, callback=self.callback,
+ conv_check=self.conv_check, **kwargs)
+ else:
+ self.e_tot = _kernel(self, self.conv_tol, self.conv_tol_grad,
+ dm0=dm0, callback=self.callback,
+ conv_check=self.conv_check, **kwargs)[1]
+
+ logger.timer(self, 'SCF', *cput0)
+ self._finalize()
+ return self.e_tot
+ kernel = pyscf_lib.alias(scf, alias_name='kernel')
+
+ def spin_square(self, mo_coeff=None, s=None):
+ if mo_coeff is None:
+ mo_coeff = (self.mo_coeff[0][:,self.mo_occ[0]>0],
+ self.mo_coeff[1][:,self.mo_occ[1]>0])
+ if s is None:
+ s = self.get_ovlp()
+ return spin_square(mo_coeff, s)
+