Skip to content

Commit

Permalink
New Rys kernel (#221)
Browse files Browse the repository at this point in the history
* New Rys kernel

Improve tasks generation

Tune

Fix cupy_helper

Optimized cache Rpa

Adapt to scf code

q_cond double to float

do_j and do_k

cleanup and bugfix

Add LR ERI

Add SR ERI

Tune sr-eri

Rys ERI based j-engine

Tune J-engine

Task adjustment

Fix condense function

Add tests and examples for JK matrix evaluation

* Solve merging conflicts

* Missing import

* Fix import errors

* Conditionally compiling against CUDA_VERSION

* Restore CMakeLists.txt

* Missing code

* test name conflicts

* Fix various merging conflicts

* Update unittest configuration

* compilation for cuda<12.4

* Update the treatment of omega in get_jk

* Handle h functions

* Fix a gradients initialization bug

* Add assertatin and comments

* Compiling issue for rys-j

* some code in legacy version

---------

Co-authored-by: Qiming Sun <[email protected]>
Co-authored-by: xiaojie.wu <[email protected]>
  • Loading branch information
3 people authored Oct 11, 2024
1 parent a98497f commit ed993be
Show file tree
Hide file tree
Showing 50 changed files with 83,896 additions and 847 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/unittest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ jobs:
- name: Build GPU4PySCF
run: |
export CUDA_HOME=/usr/local/cuda
export PATH=${CUDA_HOME}/bin:${PATH}
export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:$LD_LIBRARY_PATH
export CMAKE_CONFIGURE_ARGS="-DBUILD_LIBXC=OFF -DCUDA_ARCHITECTURES=70-real -DBUILD_CUTLASS=ON"
sh build.sh
- name: Test with pytest
run: |
Expand Down
4 changes: 3 additions & 1 deletion build.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#!/bin/bash

export CUDA_HOME=${CUDA_HOME:-/usr/local/cuda}
echo "PATH=${PATH}"
echo "CUDA_HOME=${CUDA_HOME}"
export PATH="$CUDA_HOME/bin:$PATH"
export PATH="${CUDA_HOME}/bin:$PATH"
export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:$LD_LIBRARY_PATH
python3 setup.py bdist_wheel
rm -rf output && mv dist output
CURRENT_PATH=`pwd`
Expand Down
41 changes: 41 additions & 0 deletions examples/23-j_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Copyright 2024 The GPU4PySCF Authors. All Rights Reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

'''
Compute J and K matrices separately. The J matrix is evaluated using J-engine.
'''

import pyscf
from gpu4pyscf import scf
from gpu4pyscf.scf import jk

mol = pyscf.M(
atom = '''
O 0.0000000000 -0.0000000000 0.1174000000
H -0.7570000000 -0.0000000000 -0.4696000000
H 0.7570000000 0.0000000000 -0.4696000000
''',
basis='def2-tzvp',
verbose=5
)

def get_veff(self, mol, dm, *args, **kwargs):
vj = jk.get_j(mol, dm[0] + dm[1])
_, vk = jk.get_jk(mol, dm, with_j=False)
return vj - vk

scf.uhf.UHF.get_veff = get_veff

mf = mol.UHF().to_gpu().run()
2 changes: 1 addition & 1 deletion gpu4pyscf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from . import lib, grad, hessian, solvent, scf, dft
#from . import lib, grad, hessian, solvent, scf, dft

__version__ = '1.0.2'

Expand Down
16 changes: 8 additions & 8 deletions gpu4pyscf/cc/ccsd_incore.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from pyscf.cc import ccsd
from pyscf.cc import _ccsd
from pyscf import __config__
from gpu4pyscf.scf import hf as gpu_hf
from gpu4pyscf.scf import int4c2e
from gpu4pyscf.lib.cupy_helper import load_library
from gpu4pyscf.lib import logger

Expand Down Expand Up @@ -171,7 +171,7 @@ def _direct_ovvv_vvvv(mycc, t1, t2):
((mem_avail-Ht2_mem)*.5/8/nao_cart**2)**.5)))
logger.debug1(mycc, 'blksize %d nao %d', blksize, nao_cart)

vhfopt = gpu_hf._VHFOpt(mycc.mol, 'int2e')
vhfopt = int4c2e._VHFOpt(mycc.mol, 'int2e')
vhfopt.build(group_size=blksize, diag_block_with_triu=True)
mol = vhfopt.mol

Expand Down Expand Up @@ -219,7 +219,7 @@ def contract_vvvv_(eri, i0, i1, j0, j1):
log_qs = vhfopt.log_qs
cp_idx, cp_jdx = np.tril_indices(len(vhfopt.uniq_l_ctr))

if vhfopt.uniq_l_ctr[:,0].max() <= gpu_hf.LMAX_ON_GPU:
if vhfopt.uniq_l_ctr[:,0].max() <= int4c2e.LMAX_ON_GPU:
# Computing ERIs on GPU
idx, idy = cupy.tril_indices(nao)
#eribuf = cupy.empty(blksize**2*nao**2)
Expand Down Expand Up @@ -279,7 +279,7 @@ def fint(ish0, ish1, jsh0, jsh1, group_id):
cpj = cp_jdx[cp_ij_id]
li = vhfopt.uniq_l_ctr[cpi,0]
lj = vhfopt.uniq_l_ctr[cpj,0]
if li > gpu_hf.LMAX_ON_GPU or lj > gpu_hf.LMAX_ON_GPU or log_q_ij.size == 0:
if li > int4c2e.LMAX_ON_GPU or lj > int4c2e.LMAX_ON_GPU or log_q_ij.size == 0:
continue

ish0 = l_ctr_offsets[cpi]
Expand Down Expand Up @@ -375,7 +375,7 @@ def _fill_eri_block(eri, strides, ao_offsets, vhfopt, group_id):
cpl = cp_jdx[cp_kl_id]
lk = vhfopt.uniq_l_ctr[cpk,0]
ll = vhfopt.uniq_l_ctr[cpl,0]
if lk > gpu_hf.LMAX_ON_GPU or ll > gpu_hf.LMAX_ON_GPU:
if lk > int4c2e.LMAX_ON_GPU or ll > int4c2e.LMAX_ON_GPU:
raise NotImplementedError

stream = cupy.cuda.get_current_stream()
Expand All @@ -394,7 +394,7 @@ def _fill_eri_block(eri, strides, ao_offsets, vhfopt, group_id):
cpj = cp_jdx[cp_ij_id]
li = vhfopt.uniq_l_ctr[cpi,0]
lj = vhfopt.uniq_l_ctr[cpj,0]
if li > gpu_hf.LMAX_ON_GPU or lj > gpu_hf.LMAX_ON_GPU or log_q_ij.size == 0:
if li > int4c2e.LMAX_ON_GPU or lj > int4c2e.LMAX_ON_GPU or log_q_ij.size == 0:
continue

t0 = time.perf_counter()
Expand Down Expand Up @@ -446,7 +446,7 @@ def _make_eris_incore(mycc, mo_coeff=None):
(mem_avail*.5/8/nao_cart**2)**.5)))
logger.debug1(mycc, 'blksize %d nao %d', blksize, nao_cart)

vhfopt = gpu_hf._VHFOpt(mycc.mol, 'int2e')
vhfopt = int4c2e._VHFOpt(mycc.mol, 'int2e')
vhfopt.build(group_size=blksize, diag_block_with_triu=True)
mol = vhfopt.mol
mo = vhfopt.coeff.dot(mo_coeff)
Expand All @@ -470,7 +470,7 @@ def _make_eris_incore(mycc, mo_coeff=None):
cpj = cp_jdx[cp_ij_id]
li = vhfopt.uniq_l_ctr[cpi,0]
lj = vhfopt.uniq_l_ctr[cpj,0]
if li > gpu_hf.LMAX_ON_GPU or lj > gpu_hf.LMAX_ON_GPU or log_q_ij.size == 0:
if li > int4c2e.LMAX_ON_GPU or lj > int4c2e.LMAX_ON_GPU or log_q_ij.size == 0:
continue

ish0 = l_ctr_offsets[cpi]
Expand Down
10 changes: 9 additions & 1 deletion gpu4pyscf/cc/tests/test_ccsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,12 @@ def test_to_gpu(self):

if __name__ == '__main__':
print("Full Tests for CCSD")
unittest.main()
#unittest.main()
mol = pyscf.M(atom = [
[8 , (0. , 0. , 0.)],
[1 , (0. , -0.757 , 0.587)],
[1 , (0. , 0.757 , 0.587)],
], basis = 'cc-pvdz', verbose=0)
mf = mol.RHF().run()
mcc = ccsd_incore.CCSD(mf)
eris = mcc.ao2mo()
10 changes: 4 additions & 6 deletions gpu4pyscf/df/int3c2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@
import cupy
from pyscf import gto, df, lib
from pyscf.scf import _vhf
from gpu4pyscf.scf.hf import BasisProdCache, _make_s_index_offsets
from gpu4pyscf.scf.int4c2e import (BasisProdCache, _make_s_index_offsets,
libgvhf, libgint)
from gpu4pyscf.lib.cupy_helper import (
block_c2s_diag, cart2sph, block_diag, contract, load_library, get_avail_mem, print_mem_info, take_last2d)
block_c2s_diag, cart2sph, block_diag, contract, load_library, get_avail_mem,
print_mem_info, take_last2d, libcupy_helper)
from gpu4pyscf.lib import logger
from gpu4pyscf.gto.mole import basis_seg_contraction

Expand All @@ -32,10 +34,6 @@
BLKSIZE = 128
NROOT_ON_GPU = 7

libgvhf = load_library('libgvhf')
libgint = load_library('libgint')
libcupy_helper = load_library('libcupy_helper')

def make_fake_mol():
'''
fake mol for pairing with auxiliary basis
Expand Down
1 change: 0 additions & 1 deletion gpu4pyscf/dft/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,3 @@ def KS(mol, xc='LDA,VWN'):
return RKS(mol, xc)
else:
return UKS(mol, xc)

4 changes: 1 addition & 3 deletions gpu4pyscf/dft/libxc.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,10 @@
libxc_path = os.path.abspath(os.path.join(path, 'gpu4pyscf', 'lib', 'deps', 'lib'))
try:
_libxc = np.ctypeslib.load_library('libxc', libxc_path)
break
except Exception:
_libxc = None

if _libxc is not None:
break

libgdft = load_library('libgdft')
libgdft.GDFT_xc_lda.argtypes = (
ctypes.c_void_p,
Expand Down
2 changes: 1 addition & 1 deletion gpu4pyscf/dft/xc_deriv.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,4 +241,4 @@ def transform_kxc(rho, fxc, kxc, xctype, spin=0):
vp[4,0,4] = frtt
vp[4,4,0] = frtt
vp[4,4,4] = fttt
return vp
return vp
Loading

0 comments on commit ed993be

Please sign in to comment.