Skip to content

Commit

Permalink
Remove natm * 3 * nao * nao memory bottleneck, now the bottleneck is …
Browse files Browse the repository at this point in the history
…natm * 3 * nmo * nocc. Also bug fix here and there
  • Loading branch information
henryw7 committed Jan 7, 2025
1 parent a2657cc commit ad0e46b
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 42 deletions.
2 changes: 1 addition & 1 deletion gpu4pyscf/gto/int3c1e_ip.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,6 @@ def get_int3c1e_ip1_density_contracted(mol, grids, charge_exponents, dm, intopt)
dm = intopt.sort_orbitals(dm, [0,1])
if not mol.cart:
cart2sph_transformation_matrix = intopt.cart2sph
print(intopt.cart2sph.shape)
# TODO: This part is inefficient (O(N^3)), should be changed to the O(N^2) algorithm
dm = cart2sph_transformation_matrix @ dm @ cart2sph_transformation_matrix.T
dm = dm.flatten(order='F') # Column major order matches (i + j * n_ao) access pattern in the C function
Expand Down Expand Up @@ -487,6 +486,7 @@ def int1e_grids_ip1(mol, grids, charge_exponents=None, dm=None, charges=None, di
else:
return get_int3c1e_ip1_charge_contracted(mol, grids, charge_exponents, charges, intopt)
else:
assert dm is not None
return get_int3c1e_ip1_density_contracted(mol, grids, charge_exponents, dm, intopt)

def int1e_grids_ip2(mol, grids, charge_exponents=None, dm=None, charges=None, direct_scf_tol=1e-13, intopt=None):
Expand Down
2 changes: 1 addition & 1 deletion gpu4pyscf/qmmm/pbc/itrf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,7 @@ def calculate_h1e(self, h1_gpu):
nao = mol.nao
if mm_mol.charge_model == 'gaussian' and len(coords) != 0:
expnts = cp.hstack([mm_mol.get_zetas()] * len(Ls))[mask]
g_qm += int1e_grids_ip1(mol, coords, charges = charges, charge_exponents = expnts).transpose(0,2,1)
g_qm += int1e_grids_ip1(mol, coords, charges = charges, charge_exponents = expnts)
elif mm_mol.charge_model == 'point' and len(coords) != 0:
raise RuntimeError("Not tested yet")
max_memory = self.max_memory - lib.current_memory()[0]
Expand Down
81 changes: 41 additions & 40 deletions gpu4pyscf/solvent/hessian/pcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from gpu4pyscf.lib import logger
from gpu4pyscf.hessian.jk import _ao2mo
from gpu4pyscf.gto.int3c1e_ip import int1e_grids_ip1, int1e_grids_ip2
from gpu4pyscf.gto import int3c1e
from gpu4pyscf.gto.int3c1e import int1e_grids

def hess_nuc(pcmobj):
Expand Down Expand Up @@ -210,6 +211,7 @@ def analytic_grad_vmat(pcmobj, dm, mo_coeff, mo_occ, atmlst=None, verbose=None):

nao, nmo = mo_coeff.shape
mocc = mo_coeff[:,mo_occ>0]
nocc = mocc.shape[1]

gridslice = pcmobj.surface['gslice_by_atom']
charge_exp = pcmobj.surface['charge_exp']
Expand All @@ -225,22 +227,30 @@ def analytic_grad_vmat(pcmobj, dm, mo_coeff, mo_occ, atmlst=None, verbose=None):

ngrids = q_sym.shape[0]

dIdx = cupy.zeros([len(atmlst), 3, nao, nao])
intopt_fock = int3c1e.VHFOpt(mol)
intopt_fock.build(cutoff = 1e-14, aosym = True)
intopt_derivative = int3c1e.VHFOpt(mol)
intopt_derivative.build(cutoff = 1e-14, aosym = False)

dIdx_mo = cupy.zeros([len(atmlst), 3, nmo, nocc])

dIdA = int1e_grids_ip1(mol, grid_coords, direct_scf_tol = 1e-14, charges = q_sym, charge_exponents = charge_exp**2)
dIdA = int1e_grids_ip1(mol, grid_coords, charges = q_sym, intopt = intopt_derivative, charge_exponents = charge_exp**2)
aoslice = mol.aoslice_by_atom()
aoslice = numpy.array(aoslice)
for i_atom in atmlst:
p0,p1 = aoslice[i_atom, 2:]
dIdx[i_atom, :, p0:p1, :] += dIdA[:, p0:p1, :]
dIdx[i_atom, :, :, p0:p1] += dIdA[:, p0:p1, :].transpose(0,2,1)
# dIdx[i_atom, :, p0:p1, :] += dIdA[:, p0:p1, :]
# dIdx[i_atom, :, :, p0:p1] += dIdA[:, p0:p1, :].transpose(0,2,1)
dIdx_mo[i_atom, :, :, :] += cupy.einsum('ip,dpq,qj->dij', mo_coeff[p0:p1, :].T, dIdA[:, p0:p1, :], mocc)
dIdx_mo[i_atom, :, :, :] += cupy.einsum('ip,dpq,qj->dij', mo_coeff.T, dIdA[:, p0:p1, :].transpose(0,2,1), mocc[p0:p1, :])

for i_atom in atmlst:
g0,g1 = gridslice[i_atom]
dIdx[i_atom, :, :, :] += int1e_grids_ip2(mol, grid_coords[g0:g1,:], charges = q_sym[g0:g1],
direct_scf_tol = 1e-14, charge_exponents = charge_exp[g0:g1]**2)
dIdC = int1e_grids_ip2(mol, grid_coords[g0:g1,:], charges = q_sym[g0:g1],
intopt = intopt_derivative, charge_exponents = charge_exp[g0:g1]**2)
dIdx_mo[i_atom, :, :, :] += cupy.einsum('ip,dpq,qj->dij', mo_coeff.T, dIdC, mocc)

dV_on_molecule_dx = dIdx
dV_on_molecule_dx_mo = dIdx_mo

inverse_K = cupy.linalg.inv(K)
def append_dS_dot_q(dS, dSii, q, output, atmlst, gridslice):
Expand Down Expand Up @@ -273,12 +283,7 @@ def append_dDT_dot_q(dD, q, output, atmlst, gridslice):
dSdx_dot_q = cupy.zeros((len(atmlst), 3, ngrids))
append_dS_dot_q(dS, dSii, q_sym, dSdx_dot_q, atmlst, gridslice)

dqdx = cupy.einsum('ij,Adj->Adi', inverse_K, dSdx_dot_q)

for i_atom in atmlst:
for i_xyz in range(3):
dV_on_molecule_dx[i_atom, i_xyz, :, :] += int1e_grids(mol, grid_coords, charges = dqdx[i_atom, i_xyz, :],
direct_scf_tol = 1e-14, charge_exponents = charge_exp**2)
dqdx_fix_Vq = cupy.einsum('ij,Adj->Adi', inverse_K, dSdx_dot_q)

elif pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SMD']:
dF, dA = get_dF_dA(pcmobj.surface)
Expand Down Expand Up @@ -307,7 +312,7 @@ def append_dDT_dot_q(dD, q, output, atmlst, gridslice):
append_dD_dot_q(dD, AS @ q, dDdx_dot_ASq, atmlst, gridslice)
dKdx_dot_q -= f_eps_over_2pi * dDdx_dot_ASq

dqdx = -cupy.einsum('ij,Adj->Adi', inverse_K, dKdx_dot_q)
dqdx_fix_Vq = -cupy.einsum('ij,Adj->Adi', inverse_K, dKdx_dot_q)

dAdx_dot_V = cupy.zeros((len(atmlst), 3, ngrids))
append_dA_dot_q(dA, v_grids, dAdx_dot_V, atmlst, gridslice)
Expand All @@ -316,7 +321,7 @@ def append_dDT_dot_q(dD, q, output, atmlst, gridslice):
append_dD_dot_q(dD, A * v_grids, dDdx_dot_AV, atmlst, gridslice)

dRdx_dot_V = f_eps_over_2pi * (dDdx_dot_AV + cupy.einsum('ij,Adj->Adi', D, dAdx_dot_V))
dqdx += cupy.einsum('ij,Adj->Adi', inverse_K, dRdx_dot_V)
dqdx_fix_Vq += cupy.einsum('ij,Adj->Adi', inverse_K, dRdx_dot_V)

invKT_V = inverse_K.T @ v_grids
dDdxT_dot_invKT_V = cupy.zeros((len(atmlst), 3, ngrids))
Expand All @@ -325,7 +330,7 @@ def append_dDT_dot_q(dD, q, output, atmlst, gridslice):
DT_invKT_V = D.T @ invKT_V
dAdxT_dot_DT_invKT_V = cupy.zeros((len(atmlst), 3, ngrids))
append_dA_dot_q(dA, DT_invKT_V, dAdxT_dot_DT_invKT_V, atmlst, gridslice)
dqdx += f_eps_over_2pi * (cupy.einsum('i,Adi->Adi', A, dDdxT_dot_invKT_V) + dAdxT_dot_DT_invKT_V)
dqdx_fix_Vq += f_eps_over_2pi * (cupy.einsum('i,Adi->Adi', A, dDdxT_dot_invKT_V) + dAdxT_dot_DT_invKT_V)

dSdxT_dot_invKT_V = cupy.zeros((len(atmlst), 3, ngrids))
append_dST_dot_q(dS, dSii, invKT_V, dSdxT_dot_invKT_V, atmlst, gridslice)
Expand All @@ -338,14 +343,9 @@ def append_dDT_dot_q(dD, q, output, atmlst, gridslice):
append_dST_dot_q(dS, dSii, DA.T @ invKT_V, dSdxT_dot_AT_DT_invKT_V, atmlst, gridslice)
dKdxT_dot_invKT_V -= f_eps_over_2pi * dSdxT_dot_AT_DT_invKT_V

dqdx += -cupy.einsum('ij,Adj->Adi', R.T @ inverse_K.T, dKdxT_dot_invKT_V)

dqdx *= -0.5
dqdx_fix_Vq += -cupy.einsum('ij,Adj->Adi', R.T @ inverse_K.T, dKdxT_dot_invKT_V)

for i_atom in atmlst:
for i_xyz in range(3):
dV_on_molecule_dx[i_atom, i_xyz, :, :] += int1e_grids(mol, grid_coords, charges = dqdx[i_atom, i_xyz, :],
direct_scf_tol = 1e-14, charge_exponents = charge_exp**2)
dqdx_fix_Vq *= -0.5

elif pcmobj.method.upper() in ['SS(V)PE']:
dF, dA = get_dF_dA(pcmobj.surface)
Expand Down Expand Up @@ -390,7 +390,7 @@ def dK_dot_q(q):

q = inverse_K @ R @ v_grids
dKdx_dot_q = dK_dot_q(q)
dqdx = -cupy.einsum('ij,Adj->Adi', inverse_K, dKdx_dot_q)
dqdx_fix_Vq = -cupy.einsum('ij,Adj->Adi', inverse_K, dKdx_dot_q)

dAdx_dot_V = cupy.zeros((len(atmlst), 3, ngrids))
append_dA_dot_q(dA, v_grids, dAdx_dot_V, atmlst, gridslice)
Expand All @@ -399,7 +399,7 @@ def dK_dot_q(q):
append_dD_dot_q(dD, A * v_grids, dDdx_dot_AV, atmlst, gridslice)

dRdx_dot_V = f_eps_over_2pi * (dDdx_dot_AV + cupy.einsum('ij,Adj->Adi', D, dAdx_dot_V))
dqdx += cupy.einsum('ij,Adj->Adi', inverse_K, dRdx_dot_V)
dqdx_fix_Vq += cupy.einsum('ij,Adj->Adi', inverse_K, dRdx_dot_V)

invKT_V = inverse_K.T @ v_grids
dDdxT_dot_invKT_V = cupy.zeros((len(atmlst), 3, ngrids))
Expand All @@ -408,20 +408,22 @@ def dK_dot_q(q):
DT_invKT_V = D.T @ invKT_V
dAdxT_dot_DT_invKT_V = cupy.zeros((len(atmlst), 3, ngrids))
append_dA_dot_q(dA, DT_invKT_V, dAdxT_dot_DT_invKT_V, atmlst, gridslice)
dqdx += f_eps_over_2pi * (cupy.einsum('i,Adi->Adi', A, dDdxT_dot_invKT_V) + dAdxT_dot_DT_invKT_V)
dqdx_fix_Vq += f_eps_over_2pi * (cupy.einsum('i,Adi->Adi', A, dDdxT_dot_invKT_V) + dAdxT_dot_DT_invKT_V)

dKdx_dot_invKT_V = dK_dot_q(invKT_V)
dqdx += -cupy.einsum('ij,Adj->Adi', R.T @ inverse_K.T, dKdx_dot_invKT_V)
dqdx_fix_Vq += -cupy.einsum('ij,Adj->Adi', R.T @ inverse_K.T, dKdx_dot_invKT_V)

dqdx *= -0.5
dqdx_fix_Vq *= -0.5

for i_atom in atmlst:
for i_xyz in range(3):
dV_on_molecule_dx[i_atom, i_xyz, :, :] += int1e_grids(mol, grid_coords, charges = dqdx[i_atom, i_xyz, :],
direct_scf_tol = 1e-14, charge_exponents = charge_exp**2)
else:
raise RuntimeError(f"Unknown implicit solvent model: {pcmobj.method}")

for i_atom in atmlst:
for i_xyz in range(3):
dIdx_from_dqdx = int1e_grids(mol, grid_coords, charges = dqdx_fix_Vq[i_atom, i_xyz, :],
intopt = intopt_fock, charge_exponents = charge_exp**2)
dV_on_molecule_dx_mo[i_atom, i_xyz, :, :] += cupy.einsum('ip,pq,qj->ij', mo_coeff.T, dIdx_from_dqdx, mocc)

atom_coords = mol.atom_coords(unit='B')
atom_charges = numpy.asarray(mol.atom_charges(), dtype=numpy.float64)
atom_coords = atom_coords[atmlst]
Expand All @@ -439,21 +441,20 @@ def dK_dot_q(q):
g0,g1 = gridslice[i_atom]
dV_on_charge_dx[i_atom,:,g0:g1] += cupy.einsum('dqA,A->dq', v_ng_ip2[:,g0:g1,:], atom_charges)

dIdA = int1e_grids_ip1(mol, grid_coords, dm = dm + dm.T, direct_scf_tol = 1e-14, charge_exponents = charge_exp**2)
dIdA = int1e_grids_ip1(mol, grid_coords, dm = dm + dm.T, intopt = intopt_derivative, charge_exponents = charge_exp**2)
dV_on_charge_dx[atmlst,:,:] -= dIdA[atmlst,:,:]

dIdC = int1e_grids_ip2(mol, grid_coords, direct_scf_tol = 1e-14, dm = dm, charge_exponents = charge_exp**2)
dIdC = int1e_grids_ip2(mol, grid_coords, intopt = intopt_derivative, dm = dm, charge_exponents = charge_exp**2)
for i_atom in atmlst:
g0,g1 = gridslice[i_atom]
dV_on_charge_dx[i_atom,:,g0:g1] -= dIdC[:,g0:g1]

for i_atom in atmlst:
for i_xyz in range(3):
invK_R_dVdx = 0.5 * (inverse_K @ R + R.T @ inverse_K.T) @ dV_on_charge_dx[i_atom, i_xyz, :]
dV_on_molecule_dx[i_atom, i_xyz, :, :] += int1e_grids(mol, grid_coords, charges = invK_R_dVdx,
direct_scf_tol = 1e-14, charge_exponents = charge_exp**2)

dV_on_molecule_dx_mo = cupy.einsum('ip,Adpq,qj->Adij', mo_coeff.T, dV_on_molecule_dx, mocc)
dIdx_from_dVdx = int1e_grids(mol, grid_coords, charges = invK_R_dVdx,
intopt = intopt_fock, charge_exponents = charge_exp**2)
dV_on_molecule_dx_mo[i_atom, i_xyz, :, :] += cupy.einsum('ip,pq,qj->ij', mo_coeff.T, dIdx_from_dVdx, mocc)

t1 = log.timer_debug1('computing solvent grad veff', *t1)
return dV_on_molecule_dx_mo
Expand Down Expand Up @@ -520,8 +521,8 @@ def make_h1(self, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None):
solvent = self.base.with_solvent
dm = self.base.make_rdm1(ao_repr=True)
dm = dm[0] + dm[1]
dva = fd_grad_vmat(solvent, dm, mo_coeff[0], mo_occ[0], atmlst=atmlst, verbose=verbose)
dvb = fd_grad_vmat(solvent, dm, mo_coeff[1], mo_occ[1], atmlst=atmlst, verbose=verbose)
dva = analytic_grad_vmat(solvent, dm, mo_coeff[0], mo_occ[0], atmlst=atmlst, verbose=verbose)
dvb = analytic_grad_vmat(solvent, dm, mo_coeff[1], mo_occ[1], atmlst=atmlst, verbose=verbose)
for i0, ia in enumerate(atmlst):
h1aoa[i0] += dva[i0]
h1aob[i0] += dvb[i0]
Expand Down

0 comments on commit ad0e46b

Please sign in to comment.