Skip to content

Commit

Permalink
More bound checks in numint (#309)
Browse files Browse the repository at this point in the history
* be compatible with pyscf 2.8

* remove an example

* check convergence

* max_memory = 32000

* more bound checks for grids

* remove test_gil

* remove log

* unit test
  • Loading branch information
wxj6000 authored Jan 20, 2025
1 parent 0add455 commit 86ca248
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 31 deletions.
35 changes: 21 additions & 14 deletions gpu4pyscf/dft/numint.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,8 +391,18 @@ def _vv10nlc(rho, coords, vvrho, vvweight, vvcoords, nlc_pars):
vxc[1,threshind] = 1.5*W*dW0dG
return exc,vxc

def gen_grid_range(ngrids, device_id, blksize=MIN_BLK_SIZE):
'''
Calculate the range of grids assigned the given device
'''
ngrids_per_device = (ngrids + _num_devices - 1) // _num_devices
ngrids_per_device = (ngrids_per_device + blksize - 1) // blksize * blksize
grid_start = min(device_id * ngrids_per_device, ngrids)
grid_end = min((device_id + 1) * ngrids_per_device, ngrids)
return grid_start, grid_end

def _nr_rks_task(ni, mol, grids, xc_code, dms, mo_coeff, mo_occ,
verbose=None, with_lapl=False, grid_range=(), device_id=0, hermi=1):
verbose=None, with_lapl=False, device_id=0, hermi=1):
''' nr_rks task on given device
'''
with cupy.cuda.Device(device_id), _streams[device_id]:
Expand All @@ -413,12 +423,9 @@ def _nr_rks_task(ni, mol, grids, xc_code, dms, mo_coeff, mo_occ,
ao_deriv = 1

ngrids_glob = grids.coords.shape[0]
ngrids_per_device = (ngrids_glob + _num_devices - 1) // _num_devices
ngrids_per_device = (ngrids_per_device + MIN_BLK_SIZE - 1) // MIN_BLK_SIZE * MIN_BLK_SIZE
grid_start = min(device_id * ngrids_per_device, ngrids_glob)
grid_end = min((device_id + 1) * ngrids_per_device, ngrids_glob)
grid_start, grid_end = gen_grid_range(ngrids_glob, device_id)
ngrids_local = grid_end - grid_start
log.debug(f"{ngrids_local} on Device {device_id}")
log.debug(f"{ngrids_local} grids on Device {device_id}")

weights = cupy.empty([ngrids_local])
if xctype == 'LDA':
Expand All @@ -439,12 +446,12 @@ def _nr_rks_task(ni, mol, grids, xc_code, dms, mo_coeff, mo_occ,
if mo_coeff is None:
dms_mask = dms[i][idx[:,None],idx]
rho_tot[i,:,p0:p1] = eval_rho(_sorted_mol, ao_mask, dms_mask,
xctype=xctype, hermi=hermi, with_lapl=with_lapl)
xctype=xctype, hermi=hermi, with_lapl=with_lapl)
else:
assert hermi == 1
mo_coeff_mask = mo_coeff[idx,:]
rho_tot[i,:,p0:p1] = eval_rho2(_sorted_mol, ao_mask, mo_coeff_mask, mo_occ,
None, xctype, with_lapl)
None, xctype, with_lapl)
p0 = p1
t0 = log.timer_debug1(f'eval rho on Device {device_id}', *t0)

Expand Down Expand Up @@ -787,7 +794,7 @@ def nr_rks_group(ni, mol, grids, xc_code, dms, relativity=0, hermi=1,
return nelec, excsum, vmat

def _nr_uks_task(ni, mol, grids, xc_code, dms, mo_coeff, mo_occ,
verbose=None, with_lapl=False, grid_range=(), device_id=0, hermi=1):
verbose=None, with_lapl=False, device_id=0, hermi=1):
''' nr_uks task on one device
'''
with cupy.cuda.Device(device_id), _streams[device_id]:
Expand Down Expand Up @@ -817,12 +824,9 @@ def _nr_uks_task(ni, mol, grids, xc_code, dms, mo_coeff, mo_occ,
ao_deriv = 1

ngrids_glob = grids.coords.shape[0]
ngrids_per_device = (ngrids_glob + _num_devices - 1) // _num_devices
ngrids_per_device = (ngrids_per_device + MIN_BLK_SIZE - 1) // MIN_BLK_SIZE * MIN_BLK_SIZE
grid_start = min(device_id * ngrids_per_device, ngrids_glob)
grid_end = min((device_id + 1) * ngrids_per_device, ngrids_glob)
grid_start, grid_end = gen_grid_range(ngrids_glob, device_id)
ngrids_local = grid_end - grid_start
log.debug(f"{ngrids_local} on Device {device_id}")
log.debug(f"{ngrids_local} grids on Device {device_id}")

for ao_mask, idx, weight, _ in ni.block_loop(_sorted_mol, grids, nao, ao_deriv,
max_memory=None,
Expand Down Expand Up @@ -1674,6 +1678,9 @@ def _block_loop(ni, mol, grids, nao=None, deriv=0, max_memory=2000,
ni.non0ao_idx[lookup_key] = _sparse_index(_sorted_mol, coords, opt.l_ctr_offsets)

pad, idx, non0shl_idx, ctr_offsets_slice, ao_loc_slice = ni.non0ao_idx[lookup_key]
if len(idx) == 0:
continue

ao_mask = eval_ao(
_sorted_mol, coords, deriv,
nao_slice=len(idx),
Expand Down
9 changes: 5 additions & 4 deletions gpu4pyscf/grad/rks.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def get_veff(ks_grad, mol=None, dm=None, verbose=None):
return tag_array(exc1_per_atom, exc1_grid=exc)

def _get_vxc_task(ni, mol, grids, xc_code, dms, mo_coeff, mo_occ,
verbose=None, with_lapl=False, grid_range=(), device_id=0):
verbose=None, with_lapl=False, device_id=0):
''' Calculate the gradient of vxc on given device
'''
with cupy.cuda.Device(device_id), _streams[device_id]:
Expand All @@ -151,10 +151,11 @@ def _get_vxc_task(ni, mol, grids, xc_code, dms, mo_coeff, mo_occ,
opt = ni.gdftopt
_sorted_mol = opt._sorted_mol
nset = dms.shape[0]

ngrids_glob = grids.coords.shape[0]
ngrids_per_device = (ngrids_glob + _num_devices - 1) // _num_devices
grid_start = device_id * ngrids_per_device
grid_end = (device_id + 1) * ngrids_per_device
grid_start, grid_end = numint.gen_grid_range(ngrids_glob, device_id)
ngrids_local = grid_end - grid_start
log.debug(f"{ngrids_local} grids on Device {device_id}")

nset = len(dms)
assert nset == 1
Expand Down
7 changes: 4 additions & 3 deletions gpu4pyscf/grad/uks.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,11 @@ def _get_vxc_task(ni, mol, grids, xc_code, dms, mo_coeff, mo_occ,
opt = ni.gdftopt
_sorted_mol = opt._sorted_mol
nset = dms.shape[0]

ngrids_glob = grids.coords.shape[0]
ngrids_per_device = (ngrids_glob + _num_devices - 1) // _num_devices
grid_start = device_id * ngrids_per_device
grid_end = (device_id + 1) * ngrids_per_device
grid_start, grid_end = numint.gen_grid_range(ngrids_glob, device_id)
ngrids_local = grid_end - grid_start
log.debug(f"{ngrids_local} grids on Device {device_id}")

vmat = cupy.zeros((nset,3,nao,nao))
if xctype == 'LDA':
Expand Down
16 changes: 6 additions & 10 deletions gpu4pyscf/hessian/rks.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,9 +347,7 @@ def _get_vxc_deriv2_task(hessobj, grids, mo_coeff, mo_occ, max_memory, device_id
ao_loc = mol.ao_loc_nr()

ngrids_glob = grids.coords.shape[0]
ngrids_per_device = (ngrids_glob + _num_devices - 1) // _num_devices
grid_start = device_id * ngrids_per_device
grid_end = (device_id + 1) * ngrids_per_device
grid_start, grid_end = numint.gen_grid_range(ngrids_glob, device_id)

with cupy.cuda.Device(device_id), _streams[device_id]:
log = logger.new_logger(mol, verbose)
Expand Down Expand Up @@ -551,10 +549,8 @@ def _get_vxc_deriv1_task(hessobj, grids, mo_coeff, mo_occ, max_memory, device_id
ao_loc = mol.ao_loc_nr()

ngrids_glob = grids.coords.shape[0]
ngrids_per_device = (ngrids_glob + _num_devices - 1) // _num_devices
grid_start = device_id * ngrids_per_device
grid_end = (device_id + 1) * ngrids_per_device

grid_start, grid_end = numint.gen_grid_range(ngrids_glob, device_id)

with cupy.cuda.Device(device_id), _streams[device_id]:
mo_occ = cupy.asarray(mo_occ)
mo_coeff = cupy.asarray(mo_coeff)
Expand Down Expand Up @@ -727,9 +723,9 @@ def _nr_rks_fxc_mo_task(ni, mol, grids, xc_code, fxc, mo_coeff, mo1, mocc,
ao_deriv = 1

ngrids_glob = grids.coords.shape[0]
ngrids_per_device = (ngrids_glob + _num_devices - 1) // _num_devices
grid_start = device_id * ngrids_per_device
grid_end = (device_id + 1) * ngrids_per_device
grid_start, grid_end = numint.gen_grid_range(ngrids_glob, device_id)
ngrids_local = grid_end - grid_start
log.debug(f"{ngrids_local} grids on Device {device_id}")

p0 = p1 = grid_start
t1 = t0 = log.init_timer()
Expand Down

0 comments on commit 86ca248

Please sign in to comment.