Improve dft (#66)

* save * implement SMD solvent model * updated readme * fixed issues after v0.6.9 * format examples * output = /dev/null in test_smd.py * evaluate sparse ao directly
pyscf · Dec 15, 2023 · e2bad58 · e2bad58
1 parent cc54c0e
commit e2bad58
Show file tree

Hide file tree

Showing 9 changed files with 411 additions and 229 deletions.
diff --git a/examples/dft_driver.py b/examples/dft_driver.py
@@ -35,7 +35,7 @@
     max_memory=32000)
 # set verbose >= 6 for debugging timer
 
-mol.verbose = 0
+mol.verbose = 6
 
 mf_df = rks.RKS(mol, xc=args.xc).density_fit(auxbasis=args.auxbasis)
 mf_df.verbose = 6
@@ -47,12 +47,15 @@
     mf_df.with_solvent.eps = 78.3553
 
 mf_df.grids.atom_grid = (99,590)
+if mf_df._numint.libxc.is_nlc(mf_df.xc):
+    mf_df.nlcgrids.atom_grid = (50,194)
 mf_df.direct_scf_tol = 1e-14
 mf_df.direct_scf = 1e-14
 mf_df.conv_tol = 1e-12
 e_tot = mf_df.kernel()
 scf_time = time.time() - start_time
 print(f'compute time for energy: {scf_time:.3f} s')
+exit()
 
 start_time = time.time()
 g = mf_df.nuc_grad_method()

diff --git a/gpu4pyscf/__config__.py b/gpu4pyscf/__config__.py
@@ -5,8 +5,8 @@
 # such as A100-80G
 if props['totalGlobalMem'] >= 64 * GB:
     min_ao_blksize = 256
-    min_grid_blksize = 256*256
-    ao_aligned = 64
+    min_grid_blksize = 128*128
+    ao_aligned = 32
     grid_aligned = 128
     mem_fraction = 0.9
     number_of_threads = 2048 * 108

diff --git a/gpu4pyscf/df/int3c2e.py b/gpu4pyscf/df/int3c2e.py
@@ -215,6 +215,7 @@ def build(self, cutoff=1e-14, group_size=None,
             l_ctr_offsets, l_ctr_offsets, q_cond,
             diag_block_with_triu=diag_block_with_triu, aosym=aosym)
         self.log_qs = log_qs.copy()
+        cput1 = logger.timer_debug1(sorted_mol, 'Get pairing', *cput1)
 
         # contraction coefficient for ao basis
         cart_ao_loc = self.sorted_mol.ao_loc_nr(cart=True)
@@ -239,6 +240,7 @@ def build(self, cutoff=1e-14, group_size=None,
         inv_idx = np.argsort(self.sph_ao_idx, kind='stable').astype(np.int32)
         self.rev_ao_idx = inv_idx
         self.coeff = self.cart2sph[:, inv_idx]
+        cput1 = logger.timer_debug1(sorted_mol, 'AO cart2sph coeff', *cput1)
 
         # pairing auxiliary basis with fake basis set
         fake_l_ctr_offsets = np.append(0, np.cumsum(fake_l_ctr_counts))
@@ -268,6 +270,7 @@ def build(self, cutoff=1e-14, group_size=None,
         inv_idx = np.argsort(self.sph_aux_idx, kind='stable').astype(np.int32)
         self.aux_coeff = self.aux_cart2sph[:, inv_idx]
         aux_l_ctr_offsets += fake_l_ctr_offsets[-1]
+        cput1 = logger.timer_debug1(tot_mol, 'aux cart2sph coeff', *cput1)
 
         ao_loc = self.sorted_mol.ao_loc_nr(cart=False)
         self.ao_pairs_row, self.ao_pairs_col = get_ao_pairs(pair2bra, pair2ket, ao_loc)
@@ -276,6 +279,7 @@ def build(self, cutoff=1e-14, group_size=None,
         self.cderi_row = cderi_row
         self.cderi_col = cderi_col
         self.cderi_diag = cupy.argwhere(cderi_row == cderi_col)[:,0]
+        cput1 = logger.timer_debug1(tot_mol, 'Get AO pairs', *cput1)
 
         aux_pair2bra = []
         aux_pair2ket = []
@@ -1434,28 +1438,43 @@ def get_pairing(p_offsets, q_offsets, q_cond,
     log_qs = []
     pair2bra = []
     pair2ket = []
+
     for p0, p1 in zip(p_offsets[:-1], p_offsets[1:]):
         for q0, q1 in zip(q_offsets[:-1], q_offsets[1:]):
             if aosym and q0 < p0 or not aosym:
                 q_sub = q_cond[p0:p1,q0:q1].ravel()
+                '''
                 idx = q_sub.argsort(axis=None)[::-1]
                 q_sorted = q_sub[idx]
                 mask = q_sorted > cutoff
                 idx = idx[mask]
                 ishs, jshs = np.unravel_index(idx, (p1-p0, q1-q0))
+                print(ishs.shape)
+                '''
+                mask = q_sub > cutoff
+                ishs, jshs = np.indices((p1-p0,q1-q0))
+                ishs = ishs.ravel()[mask]
+                jshs = jshs.ravel()[mask]
                 ishs += p0
                 jshs += q0
                 pair2bra.append(ishs)
                 pair2ket.append(jshs)
-                log_q = np.log(q_sorted[mask])
+                log_q = np.log(q_sub[mask])
                 log_q[log_q > 0] = 0
                 log_qs.append(log_q)
             elif aosym and p0 == q0 and p1 == q1:
                 q_sub = q_cond[p0:p1,p0:p1].ravel()
+                '''
                 idx = q_sub.argsort(axis=None)[::-1]
                 q_sorted = q_sub[idx]
                 ishs, jshs = np.unravel_index(idx, (p1-p0, p1-p0))
                 mask = q_sorted > cutoff
+                '''
+
+                ishs, jshs = np.indices((p1-p0, p1-p0))
+                ishs = ishs.ravel()
+                jshs = jshs.ravel()
+                mask = q_sub > cutoff
                 if not diag_block_with_triu:
                     # Drop the shell pairs in the upper triangle for diagonal blocks
                     mask &= ishs >= jshs
@@ -1469,7 +1488,7 @@ def get_pairing(p_offsets, q_offsets, q_cond,
                 pair2bra.append(ishs)
                 pair2ket.append(jshs)
 
-                log_q = np.log(q_sorted[mask])
+                log_q = np.log(q_sub[mask])
                 log_q[log_q > 0] = 0
                 log_qs.append(log_q)
     return log_qs, pair2bra, pair2ket

diff --git a/gpu4pyscf/dft/gen_grid.py b/gpu4pyscf/dft/gen_grid.py
@@ -41,8 +41,9 @@
 libdft = lib.load_library('libdft')
 libgdft = load_library('libgdft')
 
-from pyscf.dft.gen_grid import GROUP_BOX_SIZE, GROUP_BOUNDARY_PENALTY, NELEC_ERROR_TOL, LEBEDEV_ORDER, LEBEDEV_NGRID
+from pyscf.dft.gen_grid import GROUP_BOUNDARY_PENALTY, NELEC_ERROR_TOL, LEBEDEV_ORDER, LEBEDEV_NGRID
 
+GROUP_BOX_SIZE = 3.0
 ALIGNMENT_UNIT = 32
 # SG0
 # S. Chien and P. Gill,  J. Comput. Chem. 27 (2006) 730-739.
@@ -388,8 +389,12 @@ def arg_group_grids(mol, coords, box_size=GROUP_BOX_SIZE):
     box_ids[box_ids[:,0] > boxes[0], 0] = boxes[0]
     box_ids[box_ids[:,1] > boxes[1], 1] = boxes[1]
     box_ids[box_ids[:,2] > boxes[2], 2] = boxes[2]
-    rev_idx = numpy.unique(box_ids.get(), axis=0, return_inverse=True)[1]
-    return rev_idx.argsort(kind='stable')
+
+    boxes *= 2 # for safety
+    box_id = box_ids[:,0] + box_ids[:,1] * boxes[0] + box_ids[:,2] * boxes[0] * boxes[1]
+    #rev_idx = numpy.unique(box_ids.get(), axis=0, return_inverse=True)[1]
+    rev_idx = cupy.unique(box_id, return_inverse=True)[1]
+    return rev_idx.argsort()
 
 def _load_conf(mod, name, default):
     var = getattr(__config__, name, None)