Skip to content

Commit

Permalink
Improve dft (#66)
Browse files Browse the repository at this point in the history
* save

* implement SMD solvent model

* updated readme

* fixed issues after v0.6.9

* format examples

* output = /dev/null in test_smd.py

* evaluate sparse ao directly
  • Loading branch information
wxj6000 authored Dec 15, 2023
1 parent cc54c0e commit e2bad58
Show file tree
Hide file tree
Showing 9 changed files with 411 additions and 229 deletions.
5 changes: 4 additions & 1 deletion examples/dft_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
max_memory=32000)
# set verbose >= 6 for debugging timer

mol.verbose = 0
mol.verbose = 6

mf_df = rks.RKS(mol, xc=args.xc).density_fit(auxbasis=args.auxbasis)
mf_df.verbose = 6
Expand All @@ -47,12 +47,15 @@
mf_df.with_solvent.eps = 78.3553

mf_df.grids.atom_grid = (99,590)
if mf_df._numint.libxc.is_nlc(mf_df.xc):
mf_df.nlcgrids.atom_grid = (50,194)
mf_df.direct_scf_tol = 1e-14
mf_df.direct_scf = 1e-14
mf_df.conv_tol = 1e-12
e_tot = mf_df.kernel()
scf_time = time.time() - start_time
print(f'compute time for energy: {scf_time:.3f} s')
exit()

start_time = time.time()
g = mf_df.nuc_grad_method()
Expand Down
4 changes: 2 additions & 2 deletions gpu4pyscf/__config__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
# such as A100-80G
if props['totalGlobalMem'] >= 64 * GB:
min_ao_blksize = 256
min_grid_blksize = 256*256
ao_aligned = 64
min_grid_blksize = 128*128
ao_aligned = 32
grid_aligned = 128
mem_fraction = 0.9
number_of_threads = 2048 * 108
Expand Down
23 changes: 21 additions & 2 deletions gpu4pyscf/df/int3c2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ def build(self, cutoff=1e-14, group_size=None,
l_ctr_offsets, l_ctr_offsets, q_cond,
diag_block_with_triu=diag_block_with_triu, aosym=aosym)
self.log_qs = log_qs.copy()
cput1 = logger.timer_debug1(sorted_mol, 'Get pairing', *cput1)

# contraction coefficient for ao basis
cart_ao_loc = self.sorted_mol.ao_loc_nr(cart=True)
Expand All @@ -239,6 +240,7 @@ def build(self, cutoff=1e-14, group_size=None,
inv_idx = np.argsort(self.sph_ao_idx, kind='stable').astype(np.int32)
self.rev_ao_idx = inv_idx
self.coeff = self.cart2sph[:, inv_idx]
cput1 = logger.timer_debug1(sorted_mol, 'AO cart2sph coeff', *cput1)

# pairing auxiliary basis with fake basis set
fake_l_ctr_offsets = np.append(0, np.cumsum(fake_l_ctr_counts))
Expand Down Expand Up @@ -268,6 +270,7 @@ def build(self, cutoff=1e-14, group_size=None,
inv_idx = np.argsort(self.sph_aux_idx, kind='stable').astype(np.int32)
self.aux_coeff = self.aux_cart2sph[:, inv_idx]
aux_l_ctr_offsets += fake_l_ctr_offsets[-1]
cput1 = logger.timer_debug1(tot_mol, 'aux cart2sph coeff', *cput1)

ao_loc = self.sorted_mol.ao_loc_nr(cart=False)
self.ao_pairs_row, self.ao_pairs_col = get_ao_pairs(pair2bra, pair2ket, ao_loc)
Expand All @@ -276,6 +279,7 @@ def build(self, cutoff=1e-14, group_size=None,
self.cderi_row = cderi_row
self.cderi_col = cderi_col
self.cderi_diag = cupy.argwhere(cderi_row == cderi_col)[:,0]
cput1 = logger.timer_debug1(tot_mol, 'Get AO pairs', *cput1)

aux_pair2bra = []
aux_pair2ket = []
Expand Down Expand Up @@ -1434,28 +1438,43 @@ def get_pairing(p_offsets, q_offsets, q_cond,
log_qs = []
pair2bra = []
pair2ket = []

for p0, p1 in zip(p_offsets[:-1], p_offsets[1:]):
for q0, q1 in zip(q_offsets[:-1], q_offsets[1:]):
if aosym and q0 < p0 or not aosym:
q_sub = q_cond[p0:p1,q0:q1].ravel()
'''
idx = q_sub.argsort(axis=None)[::-1]
q_sorted = q_sub[idx]
mask = q_sorted > cutoff
idx = idx[mask]
ishs, jshs = np.unravel_index(idx, (p1-p0, q1-q0))
print(ishs.shape)
'''
mask = q_sub > cutoff
ishs, jshs = np.indices((p1-p0,q1-q0))
ishs = ishs.ravel()[mask]
jshs = jshs.ravel()[mask]
ishs += p0
jshs += q0
pair2bra.append(ishs)
pair2ket.append(jshs)
log_q = np.log(q_sorted[mask])
log_q = np.log(q_sub[mask])
log_q[log_q > 0] = 0
log_qs.append(log_q)
elif aosym and p0 == q0 and p1 == q1:
q_sub = q_cond[p0:p1,p0:p1].ravel()
'''
idx = q_sub.argsort(axis=None)[::-1]
q_sorted = q_sub[idx]
ishs, jshs = np.unravel_index(idx, (p1-p0, p1-p0))
mask = q_sorted > cutoff
'''

ishs, jshs = np.indices((p1-p0, p1-p0))
ishs = ishs.ravel()
jshs = jshs.ravel()
mask = q_sub > cutoff
if not diag_block_with_triu:
# Drop the shell pairs in the upper triangle for diagonal blocks
mask &= ishs >= jshs
Expand All @@ -1469,7 +1488,7 @@ def get_pairing(p_offsets, q_offsets, q_cond,
pair2bra.append(ishs)
pair2ket.append(jshs)

log_q = np.log(q_sorted[mask])
log_q = np.log(q_sub[mask])
log_q[log_q > 0] = 0
log_qs.append(log_q)
return log_qs, pair2bra, pair2ket
Expand Down
11 changes: 8 additions & 3 deletions gpu4pyscf/dft/gen_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@
libdft = lib.load_library('libdft')
libgdft = load_library('libgdft')

from pyscf.dft.gen_grid import GROUP_BOX_SIZE, GROUP_BOUNDARY_PENALTY, NELEC_ERROR_TOL, LEBEDEV_ORDER, LEBEDEV_NGRID
from pyscf.dft.gen_grid import GROUP_BOUNDARY_PENALTY, NELEC_ERROR_TOL, LEBEDEV_ORDER, LEBEDEV_NGRID

GROUP_BOX_SIZE = 3.0
ALIGNMENT_UNIT = 32
# SG0
# S. Chien and P. Gill, J. Comput. Chem. 27 (2006) 730-739.
Expand Down Expand Up @@ -388,8 +389,12 @@ def arg_group_grids(mol, coords, box_size=GROUP_BOX_SIZE):
box_ids[box_ids[:,0] > boxes[0], 0] = boxes[0]
box_ids[box_ids[:,1] > boxes[1], 1] = boxes[1]
box_ids[box_ids[:,2] > boxes[2], 2] = boxes[2]
rev_idx = numpy.unique(box_ids.get(), axis=0, return_inverse=True)[1]
return rev_idx.argsort(kind='stable')

boxes *= 2 # for safety
box_id = box_ids[:,0] + box_ids[:,1] * boxes[0] + box_ids[:,2] * boxes[0] * boxes[1]
#rev_idx = numpy.unique(box_ids.get(), axis=0, return_inverse=True)[1]
rev_idx = cupy.unique(box_id, return_inverse=True)[1]
return rev_idx.argsort()

def _load_conf(mod, name, default):
var = getattr(__config__, name, None)
Expand Down
Loading

0 comments on commit e2bad58

Please sign in to comment.