From 37b77b03fa154f935f70b40dcb238168f138b633 Mon Sep 17 00:00:00 2001 From: Yuichi Motoyama Date: Mon, 21 Aug 2023 21:51:24 +0900 Subject: [PATCH 1/3] postprocess --- .../default_observer.py | 29 +- .../latgas_abinitio_interface/model_setup.py | 3 +- abics/sampling/pamc.py | 214 +++++---- abics/sampling/rxmc.py | 247 +++++----- abics/sampling/simple_parallel.py | 67 +++ abics/scripts/main_dft_latgas.py | 82 ++-- abics/scripts/postproc.py | 61 +++ abics/scripts/postproc_dft_latgas.py | 435 ++++++++++++++++++ docs/sphinx/en/source/how_to_use/index.rst | 7 + docs/sphinx/ja/source/how_to_use/index.rst | 7 +- pyproject.toml | 1 + 11 files changed, 900 insertions(+), 253 deletions(-) create mode 100644 abics/scripts/postproc.py create mode 100644 abics/scripts/postproc_dft_latgas.py diff --git a/abics/applications/latgas_abinitio_interface/default_observer.py b/abics/applications/latgas_abinitio_interface/default_observer.py index bf3aa3bd..573561b0 100644 --- a/abics/applications/latgas_abinitio_interface/default_observer.py +++ b/abics/applications/latgas_abinitio_interface/default_observer.py @@ -81,7 +81,7 @@ class DefaultObserver(ObserverBase): reference_species: list[str] calculators: list - def __init__(self, comm, Lreload=False, params={}): + def __init__(self, comm, Lreload=False, params={}, with_energy=True): """ Parameters @@ -100,7 +100,11 @@ def __init__(self, comm, Lreload=False, params={}): with open(os.path.join(str(myrank), "obs.dat"), "r") as f: self.lprintcount = int(f.readlines()[-1].split()[0]) + 1 - self.names = ["energy_internal", "energy"] + self.with_energy = with_energy + if with_energy: + self.names = ["energy_internal", "energy"] + else: + self.names = [] params_solvers = params.get("solver", []) self.calculators = [] @@ -162,16 +166,17 @@ def logfunc(self, calc_state: MCAlgorithm) -> tuple[float, ...]: assert calc_state.config is not None structure: Structure = calc_state.config.structure_norel - energy_internal = calc_state.model.internal_energy(calc_state.config) - energy = calc_state.model.energy(calc_state.config) - - result = [energy_internal, energy] - - if self.minE is None or energy < self.minE: - self.minE = energy - with open("minEfi.dat", "a") as f: - f.write(str(self.minE) + "\n") - structure.to(fmt="POSCAR", filename="minE.vasp") + if self.with_energy: + energy_internal = calc_state.model.internal_energy(calc_state.config) + energy = calc_state.model.energy(calc_state.config) + result = [energy_internal, energy] + if self.minE is None or energy < self.minE: + self.minE = energy + with open("minEfi.dat", "a") as f: + f.write(str(self.minE) + "\n") + structure.to(fmt="POSCAR", filename="minE.vasp") + else: + result = [] for calculator in self.calculators: name = calculator["name"] diff --git a/abics/applications/latgas_abinitio_interface/model_setup.py b/abics/applications/latgas_abinitio_interface/model_setup.py index 22d15f31..7fff35d6 100644 --- a/abics/applications/latgas_abinitio_interface/model_setup.py +++ b/abics/applications/latgas_abinitio_interface/model_setup.py @@ -1317,7 +1317,7 @@ def reset_from_structure(self, st_in): st = map2perflat(st, st_in) st.remove_species(["X"]) - for defect_sublattice in self.defect_sublattices: + for defect_sublattice in self.defect_sublattices: # Extract group information for this sublattice d_sp2grp = {} sp = set() @@ -1359,6 +1359,7 @@ def reset_from_structure(self, st_in): raise InputError( "initial structure violates constraints specified by constraint_func" ) + self.structure_norel = copy.deepcopy(self.structure) def dummy_structure(self): """ diff --git a/abics/sampling/pamc.py b/abics/sampling/pamc.py index 2da50a22..42dd800a 100644 --- a/abics/sampling/pamc.py +++ b/abics/sampling/pamc.py @@ -35,6 +35,7 @@ logger = logging.getLogger("main") + class PAMCParams: """Parameter set for population annealing Monte Carlo @@ -193,7 +194,7 @@ def __init__( self.use_resample_old = False def reload(self): - # not supported yet + # not supported yet pass # self.mycalc.config = pickle_load(os.path.join(str(self.rank), "calc.pickle")) # self.obs_save0 = numpy_load(os.path.join(str(self.rank), "obs_save.npy")) @@ -220,6 +221,7 @@ def save(self, save_obs: bool): obs_save_ = np.array(self.obs_save) numpy_save(obs_save_, "obs_save.npy") numpy_save(self.logweight_history, "logweight_hist.npy") + numpy_save(self.dlogz, "dlogz.npy") numpy_save(self.kT_history, "kT_hist.npy") def anneal(self, energy: float): @@ -330,9 +332,9 @@ def run( kTnum = len(self.kTs) nba = nsteps // kTnum self.nsteps_between_anneal = nba * np.ones(kTnum, dtype=int) - if nsteps - nba*kTnum > 0: - self.nsteps_between_anneal[-(nsteps - nba*kTnum):] += 1 - self.isamples_offsets = np.zeros(kTnum+1, dtype=int) + if nsteps - nba * kTnum > 0: + self.nsteps_between_anneal[-(nsteps - nba * kTnum) :] += 1 + self.isamples_offsets = np.zeros(kTnum + 1, dtype=int) if subdirs: try: @@ -341,7 +343,7 @@ def run( pass os.chdir(str(self.rank)) if not self.Lreload: - #self.mycalc.config.shuffle() + # self.mycalc.config.shuffle() self.mycalc.energy = self.mycalc.model.energy(self.mycalc.config) self.Tindex = 0 self.anneal(self.mycalc.energy) @@ -356,7 +358,11 @@ def run( while self.Tindex < numT: if self.Tindex > 0: self.anneal(self.mycalc.energy) - logger.info("--Anneal from T={} to {}".format(self.kTs[self.Tindex - 1], self.kTs[self.Tindex])) + logger.info( + "--Anneal from T={} to {}".format( + self.kTs[self.Tindex - 1], self.kTs[self.Tindex] + ) + ) if self.Tindex % resample_frequency == 0: self.resample() logger.info("--Resampling finishes") @@ -407,93 +413,115 @@ def run( os.chdir("../") def postproc(self): - nT = len(self.kTs) - obs_arr = np.array(self.obs_save) - nobs = obs_arr.shape[1] - - obs1 = np.zeros((nT, nobs)) - obs2 = np.zeros((nT, nobs)) - logweights = np.zeros(nT) - dlogz = np.array(self.dlogz) - - for i, (offset, offset2) in enumerate(zip(self.isamples_offsets[:-1], - self.isamples_offsets[1:])): - logweights[i] = self.logweight_history[offset] - o_a = obs_arr[offset:offset2, :] - obs1[i, :] += o_a.mean(axis=0) - obs2[i, :] += (o_a*o_a).mean(axis=0) - - obs = np.zeros((nT, 2 * nobs), dtype=np.float64) - for iobs in range(nobs): - obs[:, 2 * iobs] = obs1[:, iobs] - obs[:, 2 * iobs + 1] = obs2[:, iobs] - - nreplicas = self.procs - buffer_all = np.zeros((nreplicas, nT, 2 + 2 * nobs), dtype=np.float64) - self.comm.Allgather( - np.concatenate( - [logweights.reshape(-1, 1), dlogz.reshape(-1, 1), obs], axis=1 - ), - buffer_all, + kTs = self.kTs + obs_save = self.obs_save + dlogz = self.dlogz + logweight_history = self.logweight_history + obsnames = self.obsnames + isamples_offsets = self.isamples_offsets + comm = self.comm + postproc( + kTs, obs_save, dlogz, logweight_history, obsnames, isamples_offsets, comm ) - logweights_all = buffer_all[:, :, 0] - dlogz_all = buffer_all[:, :, 1] - obs_all = buffer_all[:, :, 2:] - - logweights_max = logweights_all.max(axis=0) - weights = np.exp(logweights_all - logweights_max) - ow = np.einsum("ito,it->ito", obs_all, weights) - - lzw = logweights_all + dlogz_all - logweights_max - lzw_max = lzw.max(axis=0) - zw = np.exp(lzw - lzw_max) - - # bootstrap - index = np.random.randint(nreplicas, size=nreplicas) - numer = ow[index, :, :].mean(axis=0) - zw_numer = zw[index, :].mean(axis=0) - denom = weights[index, :].mean(axis=0) - o = np.zeros((nT, 3 * nobs + 1), dtype=np.float64) - for iobs in range(nobs): - o[:, 3 * iobs] = numer[:, 2 * iobs] / denom[:] - o[:, 3 * iobs + 1] = numer[:, 2 * iobs + 1] / denom[:] - o[:, 3 * iobs + 2] = o[:, 3 * iobs + 1] - o[:, 3 * iobs] ** 2 - o[:, 3 * nobs] = zw_numer / denom - o_all = np.zeros((nreplicas, nT, 3 * nobs + 1), dtype=np.float64) - self.comm.Allgather(o, o_all) - if self.rank == 0: - o_mean = o_all.mean(axis=0) - o_err = o_all.std(axis=0) - for iobs, oname in enumerate(self.obsnames): - with open(f"{oname}.dat", "w") as f: - f.write( "# $1: temperature\n") - f.write(f"# $2: <{oname}>\n") - f.write(f"# $3: ERROR of <{oname}>\n") - f.write(f"# $4: <{oname}^2>\n") - f.write(f"# $5: ERROR of <{oname}^2>\n") - f.write(f"# $6: <{oname}^2> - <{oname}>^2\n") - f.write(f"# $7: ERROR of <{oname}^2> - <{oname}>^2\n") - - for iT in range(nT): - f.write(f"{self.kTs[iT]}") - for j in range(3): - f.write(f" {o_mean[iT, 3*iobs+j]} {o_err[iT, 3*iobs+j]}") - f.write("\n") - dlogZ = np.log(o_mean[:, 3 * nobs]) + lzw_max[:] - dlogZ_err = o_err[:, 3 * nobs] / o_mean[:, 3 * nobs] - with open("logZ.dat", "w") as f: - f.write("# $1: temperature\n") - f.write("# $2: logZ\n") - f.write("# $3: ERROR of log(Z)\n") - f.write("# $4: log(Z/Z')\n") - f.write("# $5: ERROR of log(Z/Z')\n") - F = 0.0 - dF = 0.0 - f.write(f"inf 0.0 0.0 0.0 0.0\n") - for i, (dlz, dlz_e) in enumerate(zip(dlogZ, dlogZ_err)): - F += dlz - dF += dlz_e - f.write(f"{self.kTs[i]} {F} {dF} {dlz} {dlz_e}\n") - self.comm.Barrier() +def postproc( + kTs, + obs_save, + dlogz, + logweight_history, + obsnames, + isamples_offsets, + comm, +): + procs = comm.Get_size() + rank = comm.Get_rank() + + nT = len(kTs) + obs_arr = np.array(obs_save) + nobs = obs_arr.shape[1] + + obs1 = np.zeros((nT, nobs)) + obs2 = np.zeros((nT, nobs)) + logweights = np.zeros(nT) + dlogz = np.array(dlogz) + + for i, (offset, offset2) in enumerate( + zip(isamples_offsets[:-1], isamples_offsets[1:]) + ): + logweights[i] = logweight_history[offset] + o_a = obs_arr[offset:offset2, :] + obs1[i, :] += o_a.mean(axis=0) + obs2[i, :] += (o_a * o_a).mean(axis=0) + + obs = np.zeros((nT, 2 * nobs), dtype=np.float64) + for iobs in range(nobs): + obs[:, 2 * iobs] = obs1[:, iobs] + obs[:, 2 * iobs + 1] = obs2[:, iobs] + + nreplicas = procs + buffer_all = np.zeros((nreplicas, nT, 2 + 2 * nobs), dtype=np.float64) + comm.Allgather( + np.concatenate([logweights.reshape(-1, 1), dlogz.reshape(-1, 1), obs], axis=1), + buffer_all, + ) + + logweights_all = buffer_all[:, :, 0] + dlogz_all = buffer_all[:, :, 1] + obs_all = buffer_all[:, :, 2:] + + logweights_max = logweights_all.max(axis=0) + weights = np.exp(logweights_all - logweights_max) + ow = np.einsum("ito,it->ito", obs_all, weights) + + lzw = logweights_all + dlogz_all - logweights_max + lzw_max = lzw.max(axis=0) + zw = np.exp(lzw - lzw_max) + + # bootstrap + index = np.random.randint(nreplicas, size=nreplicas) + numer = ow[index, :, :].mean(axis=0) + zw_numer = zw[index, :].mean(axis=0) + denom = weights[index, :].mean(axis=0) + o = np.zeros((nT, 3 * nobs + 1), dtype=np.float64) + for iobs in range(nobs): + o[:, 3 * iobs] = numer[:, 2 * iobs] / denom[:] + o[:, 3 * iobs + 1] = numer[:, 2 * iobs + 1] / denom[:] + o[:, 3 * iobs + 2] = o[:, 3 * iobs + 1] - o[:, 3 * iobs] ** 2 + o[:, 3 * nobs] = zw_numer / denom + o_all = np.zeros((nreplicas, nT, 3 * nobs + 1), dtype=np.float64) + comm.Allgather(o, o_all) + if rank == 0: + o_mean = o_all.mean(axis=0) + o_err = o_all.std(axis=0) + for iobs, oname in enumerate(obsnames): + with open(f"{oname}.dat", "w") as f: + f.write("# $1: temperature\n") + f.write(f"# $2: <{oname}>\n") + f.write(f"# $3: ERROR of <{oname}>\n") + f.write(f"# $4: <{oname}^2>\n") + f.write(f"# $5: ERROR of <{oname}^2>\n") + f.write(f"# $6: <{oname}^2> - <{oname}>^2\n") + f.write(f"# $7: ERROR of <{oname}^2> - <{oname}>^2\n") + + for iT in range(nT): + f.write(f"{kTs[iT]}") + for j in range(3): + f.write(f" {o_mean[iT, 3*iobs+j]} {o_err[iT, 3*iobs+j]}") + f.write("\n") + dlogZ = np.log(o_mean[:, 3 * nobs]) + lzw_max[:] + dlogZ_err = o_err[:, 3 * nobs] / o_mean[:, 3 * nobs] + with open("logZ.dat", "w") as f: + f.write("# $1: temperature\n") + f.write("# $2: logZ\n") + f.write("# $3: ERROR of log(Z)\n") + f.write("# $4: log(Z/Z')\n") + f.write("# $5: ERROR of log(Z/Z')\n") + F = 0.0 + dF = 0.0 + f.write(f"inf 0.0 0.0 0.0 0.0\n") + for i, (dlz, dlz_e) in enumerate(zip(dlogZ, dlogZ_err)): + F += dlz + dF += dlz_e + f.write(f"{kTs[i]} {F} {dF} {dlz} {dlz_e}\n") + comm.Barrier() diff --git a/abics/sampling/rxmc.py b/abics/sampling/rxmc.py index b11f1bcb..259e5a00 100644 --- a/abics/sampling/rxmc.py +++ b/abics/sampling/rxmc.py @@ -313,15 +313,15 @@ def run( os.chdir(str(self.rank)) self.accept_count = 0 if not self.Lreload: - #self.mycalc.config.shuffle() + # self.mycalc.config.shuffle() self.mycalc.energy = self.mycalc.model.energy(self.mycalc.config) with open(os.devnull, "w") as f: test_observe = observer.observe(self.mycalc, f, lprint=False) obs_len = len(test_observe) obs = np.zeros([len(self.kTs), obs_len]) - #with open("obs.dat", "a") as output: + # with open("obs.dat", "a") as output: # obs_step = observer.observe(self.mycalc, output, True) - + nsample = 0 XCscheme = 0 with open("obs.dat", "a") as output: @@ -403,116 +403,10 @@ def save(self, save_obs: bool, subdirs: bool): def postproc(self, throw_out: int | float): assert throw_out >= 0 obs_save, Trank_hist, kT_hist = self.__merge_obs() - nsteps, nobs = obs_save.shape - nT = self.rank_to_T.size - if isinstance(throw_out, float): - throw_out = int(nsteps * throw_out) - - # gather observables with same temperature (Trank == myrank) - recv_obss = [] - start = throw_out - buffer_size = 1000 - while start < nsteps: - end = start + buffer_size - if end > nsteps: - end = nsteps - obs = [[] for _ in range(nT)] - for istep in range(start, end): - iT = Trank_hist[istep] - obs[iT].append(obs_save[istep, :]) - start = end - send_obs = [] - for rank in range(self.comm.size): - if len(obs[rank]) > 0: - send = np.array(obs[rank]) - else: - send = np.zeros((0,nobs)) - send_obs.append(send) - recv_obs = self.comm.alltoall(send_obs) - for ro in recv_obs: - recv_obss.append(ro) - X = np.concatenate(recv_obss) # nsamples X nobs - nsamples = X.shape[0] - - # jackknife method - X2 = X**2 - X_mean = X.mean(axis=0) - X_err = np.sqrt(X.var(axis=0, ddof=1) / (nsamples - 1)) - X_jk = self.__jackknife(X) - X2_mean = X2.mean(axis=0) - X2_err = np.sqrt(X2.var(axis=0, ddof=1) / (nsamples - 1)) - X2_jk = self.__jackknife(X2) - F = X2.mean(axis=0) - X.mean(axis=0) ** 2 # F stands for Fluctuation - F_jk = X2_jk - X_jk**2 - F_mean = F_jk.sum(axis=0) - F_jk.mean(axis=0) * (nsamples - 1) - F_err = np.sqrt((nsamples - 1) * F_jk.var(axis=0, ddof=0)) - - # estimate free energy - target_T = -1.0 - if self.kTs[0] <= self.kTs[-1]: - if self.rank > 0: - target_T = self.kTs[self.rank - 1] - else: - if self.rank < self.comm.size - 1: - target_T = self.kTs[self.rank + 1] - if target_T >= 0.0: - dbeta = 1.0 / target_T - 1.0 / self.kTs[self.rank] - energies = X[:, 0] - emin = energies.min() - Exp = np.exp(-dbeta * (energies - emin)) - Exp_jk = self.__jackknife(Exp) - Logz_jk = np.log(Exp_jk) - Logz_mean = Logz_jk.sum() - Logz_jk.mean() * (nsamples - 1) - dbeta * emin - Logz_err = np.sqrt((nsamples - 1) * Logz_jk.var(ddof=0)) - else: - Logz_mean = 0.0 - Logz_err = 0.0 - - obs = np.array([X_mean, X_err, X2_mean, X2_err, F_mean, F_err]) - obs_all = np.zeros([self.comm.size, *obs.shape]) # nT X nobs X ntype - self.comm.Allgather(obs, obs_all) - dlogz = self.comm.allgather([Logz_mean, Logz_err]) # nT X 2 - - if self.rank == 0: - ntype = obs.shape[0] - for iobs, oname in enumerate(self.obsnames): - with open(f"{oname}.dat", "w") as f: - f.write( "# $1: temperature\n") - f.write(f"# $2: <{oname}>\n") - f.write(f"# $3: ERROR of <{oname}>\n") - f.write(f"# $4: <{oname}^2>\n") - f.write(f"# $5: ERROR of <{oname}^2>\n") - f.write(f"# $6: <{oname}^2> - <{oname}>^2\n") - f.write(f"# $7: ERROR of <{oname}^2> - <{oname}>^2\n") - for iT in range(nT): - f.write(f"{self.kTs[iT]}") - for itype in range(ntype): - f.write(f" {obs_all[iT, itype, iobs]}") - f.write("\n") - - with open("logZ.dat", "w") as f: - f.write("# $1: temperature\n") - f.write("# $2: logZ\n") - f.write("# $3: ERROR of log(Z)\n") - f.write("# $4: log(Z/Z')\n") - f.write("# $5: ERROR of log(Z/Z')\n") - F = 0.0 - dF = 0.0 - if self.kTs[0] <= self.kTs[-1]: - f.write(f"{self.kTs[-1]} {F} {dF} {0.0} {0.0}\n") - for iT in np.arange(1, nT)[::-1]: - dlz, dlz_e = dlogz[iT] - F += dlz - dF += dlz_e - f.write(f"{self.kTs[iT-1]} {F} {dF} {dlz} {dlz_e}\n") - else: - f.write(f"{self.kTs[0]} {F} {dF} {0.0} {0.0}\n") - for iT in np.arange(0, nT-1): - dlz, dlz_e = dlogz[iT] - F += dlz - dF += dlz_e - f.write(f"{self.kTs[iT+1]} {F} {dF} {dlz} {dlz_e}\n") - self.comm.Barrier() + kTs = self.kTs + comm = self.comm + obsnames = self.obsnames + postproc(obs_save, Trank_hist, kT_hist, kTs, comm, obsnames, throw_out) def __merge_obs(self): if hasattr(self, "obs_save0"): @@ -525,8 +419,125 @@ def __merge_obs(self): kT_hist_ = np.array(self.kT_hist) return obs_save_, Trank_hist_, kT_hist_ - def __jackknife(self, X: np.ndarray) -> np.ndarray: - nsamples = X.shape[0] - S = X.sum(axis=0) - X_jk = (S - X) / (nsamples - 1) - return X_jk +def jackknife(X: np.ndarray) -> np.ndarray: + nsamples = X.shape[0] + S = X.sum(axis=0) + X_jk = (S - X) / (nsamples - 1) + return X_jk + + +def postproc(obs_save, Trank_hist, kT_hist, kTs, comm, + obsnames, throw_out: int | float): + assert throw_out >= 0 + rank = comm.Get_rank() + nT = comm.Get_size() + nsteps, nobs = obs_save.shape + # nT = rank_to_T.size + if isinstance(throw_out, float): + throw_out = int(nsteps * throw_out) + + # gather observables with same temperature (Trank == myrank) + recv_obss = [] + start = throw_out + buffer_size = 1000 + while start < nsteps: + end = start + buffer_size + if end > nsteps: + end = nsteps + obs = [[] for _ in range(nT)] + for istep in range(start, end): + iT = Trank_hist[istep] + obs[iT].append(obs_save[istep, :]) + start = end + send_obs = [] + for rk in range(comm.size): + if len(obs[rk]) > 0: + send = np.array(obs[rk]) + else: + send = np.zeros((0, nobs)) + send_obs.append(send) + recv_obs = comm.alltoall(send_obs) + for ro in recv_obs: + recv_obss.append(ro) + X = np.concatenate(recv_obss) # nsamples X nobs + nsamples = X.shape[0] + + # jackknife method + X2 = X**2 + X_mean = X.mean(axis=0) + X_err = np.sqrt(X.var(axis=0, ddof=1) / (nsamples - 1)) + X_jk = jackknife(X) + X2_mean = X2.mean(axis=0) + X2_err = np.sqrt(X2.var(axis=0, ddof=1) / (nsamples - 1)) + X2_jk = jackknife(X2) + F = X2.mean(axis=0) - X.mean(axis=0) ** 2 # F stands for Fluctuation + F_jk = X2_jk - X_jk**2 + F_mean = F_jk.sum(axis=0) - F_jk.mean(axis=0) * (nsamples - 1) + F_err = np.sqrt((nsamples - 1) * F_jk.var(axis=0, ddof=0)) + + # estimate free energy + target_T = -1.0 + if kTs[0] <= kTs[-1]: + if rank > 0: + target_T = kTs[rank - 1] + else: + if rank < comm.size - 1: + target_T = kTs[rank + 1] + if target_T >= 0.0: + dbeta = 1.0 / target_T - 1.0 / kTs[rank] + energies = X[:, 0] + emin = energies.min() + Exp = np.exp(-dbeta * (energies - emin)) + Exp_jk = jackknife(Exp) + Logz_jk = np.log(Exp_jk) + Logz_mean = Logz_jk.sum() - Logz_jk.mean() * (nsamples - 1) - dbeta * emin + Logz_err = np.sqrt((nsamples - 1) * Logz_jk.var(ddof=0)) + else: + Logz_mean = 0.0 + Logz_err = 0.0 + + obs = np.array([X_mean, X_err, X2_mean, X2_err, F_mean, F_err]) + obs_all = np.zeros([comm.size, *obs.shape]) # nT X nobs X ntype + comm.Allgather(obs, obs_all) + dlogz = comm.allgather([Logz_mean, Logz_err]) # nT X 2 + + if rank == 0: + ntype = obs.shape[0] + for iobs, oname in enumerate(obsnames): + with open(f"{oname}.dat", "w") as f: + f.write("# $1: temperature\n") + f.write(f"# $2: <{oname}>\n") + f.write(f"# $3: ERROR of <{oname}>\n") + f.write(f"# $4: <{oname}^2>\n") + f.write(f"# $5: ERROR of <{oname}^2>\n") + f.write(f"# $6: <{oname}^2> - <{oname}>^2\n") + f.write(f"# $7: ERROR of <{oname}^2> - <{oname}>^2\n") + for iT in range(nT): + f.write(f"{kTs[iT]}") + for itype in range(ntype): + f.write(f" {obs_all[iT, itype, iobs]}") + f.write("\n") + + with open("logZ.dat", "w") as f: + f.write("# $1: temperature\n") + f.write("# $2: logZ\n") + f.write("# $3: ERROR of log(Z)\n") + f.write("# $4: log(Z/Z')\n") + f.write("# $5: ERROR of log(Z/Z')\n") + F = 0.0 + dF = 0.0 + if kTs[0] <= kTs[-1]: + f.write(f"{kTs[-1]} {F} {dF} {0.0} {0.0}\n") + for iT in np.arange(1, nT)[::-1]: + dlz, dlz_e = dlogz[iT] + F += dlz + dF += dlz_e + f.write(f"{kTs[iT-1]} {F} {dF} {dlz} {dlz_e}\n") + else: + f.write(f"{kTs[0]} {F} {dF} {0.0} {0.0}\n") + for iT in np.arange(0, nT - 1): + dlz, dlz_e = dlogz[iT] + F += dlz + dF += dlz_e + f.write(f"{kTs[iT+1]} {F} {dF} {dlz} {dlz_e}\n") + comm.Barrier() diff --git a/abics/sampling/simple_parallel.py b/abics/sampling/simple_parallel.py index 2dd66ad3..622381e9 100644 --- a/abics/sampling/simple_parallel.py +++ b/abics/sampling/simple_parallel.py @@ -28,6 +28,7 @@ from abics.observer import ObserverBase from abics.sampling.mc import MCAlgorithm, verylargeint, write_obs_header from abics.sampling.mc_mpi import ParallelMC +from abics.sampling.rxmc import jackknife from abics.util import pickle_dump, pickle_load, numpy_save, numpy_load @@ -61,6 +62,8 @@ def __init__(self): self.print_frequency = 1 self.reload = False self.seed = 0 + self.throw_out = 0.5 + self.kT = 0.0 @classmethod def from_dict(cls, d): @@ -87,6 +90,8 @@ def from_dict(cls, d): params.print_frequency = d.get("print_frequency", 1) params.reload = d.get("reload", False) params.seed = d.get("seed", 0) + params.throw_out = d.get("throw_out", 0.5) + params.kT = d.get("kT", 0.0) return params @classmethod @@ -145,6 +150,7 @@ def __init__(self): self.print_frequency = 1 self.reload = False self.seed = 0 + self.throw_out = 0.5 @classmethod def from_dict(cls, d): @@ -173,6 +179,7 @@ def from_dict(cls, d): params.print_frequency = d.get("print_frequency", 1) params.reload = d.get("reload", False) params.seed = d.get("seed", 0) + params.throw_out = d.get("throw_out", 0.5) return params @classmethod @@ -225,6 +232,8 @@ def __init__( self.procs = self.comm.Get_size() if kTs is None: kTs = [0] * self.procs + if isinstance(kTs, (int, float)): + kTs = [kTs] * self.procs self.kTs = kTs self.model = model self.nreplicas = len(configs) @@ -261,6 +270,7 @@ def run( sample_frequency: int = verylargeint, print_frequency: int = verylargeint, nsubsteps_in_step: int = 1, + throw_out: int | float = 0.5, observer: ObserverBase = ObserverBase(), subdirs: bool = True, save_obs: bool = True, @@ -293,6 +303,8 @@ def run( except FileExistsError: pass os.chdir(str(self.rank)) + + self.obsnames = observer.names self.accept_count = 0 if not self.Lreload: self.mycalc.energy = self.mycalc.model.energy(self.mycalc.config) @@ -353,6 +365,9 @@ def run( if subdirs: os.chdir("../") + if save_obs: + self.postproc(throw_out) + if nsample != 0: obs = np.array(obs) obs_buffer = np.empty(obs.shape) @@ -365,6 +380,9 @@ def run( return obs_list + def postproc(self, throw_out): + postproc(obs_save=np.array(self.obs_save), kTs=self.kTs, comm=self.comm, obsnames=self.obsnames, throw_out=throw_out) + class RandomSampling_MPI(ParallelMC): def __init__( self, comm, MCalgo: type[MCAlgorithm], model: Model, configs, write_node=True @@ -394,3 +412,52 @@ def __init__( self.Trank_hist = [] self.kT_hist = [] self.write_node = write_node + + +def postproc(obs_save, kTs, comm, + obsnames, throw_out: int | float): + assert throw_out >= 0 + rank = comm.Get_rank() + nT = comm.Get_size() + nsteps, nobs = obs_save.shape + # nT = rank_to_T.size + if isinstance(throw_out, float): + throw_out = int(nsteps * throw_out) + + X = obs_save[throw_out:, :] + nsamples = X.shape[0] + + # jackknife method + X2 = X**2 + X_mean = X.mean(axis=0) + X_err = np.sqrt(X.var(axis=0, ddof=1) / (nsamples - 1)) + X_jk = jackknife(X) + X2_mean = X2.mean(axis=0) + X2_err = np.sqrt(X2.var(axis=0, ddof=1) / (nsamples - 1)) + X2_jk = jackknife(X2) + F = X2.mean(axis=0) - X.mean(axis=0) ** 2 # F stands for Fluctuation + F_jk = X2_jk - X_jk**2 + F_mean = F_jk.sum(axis=0) - F_jk.mean(axis=0) * (nsamples - 1) + F_err = np.sqrt((nsamples - 1) * F_jk.var(axis=0, ddof=0)) + + obs = np.array([X_mean, X_err, X2_mean, X2_err, F_mean, F_err]) + obs_all = np.zeros([comm.size, *obs.shape]) # nT X ntype X nobs + comm.Allgather(obs, obs_all) + + if rank == 0: + ntype = obs.shape[0] + for iobs, oname in enumerate(obsnames): + with open(f"{oname}.dat", "w") as f: + f.write("# $1: temperature\n") + f.write(f"# $2: <{oname}>\n") + f.write(f"# $3: ERROR of <{oname}>\n") + f.write(f"# $4: <{oname}^2>\n") + f.write(f"# $5: ERROR of <{oname}^2>\n") + f.write(f"# $6: <{oname}^2> - <{oname}>^2\n") + f.write(f"# $7: ERROR of <{oname}^2> - <{oname}>^2\n") + for iT in range(nT): + f.write(f"{kTs[iT]}") + for itype in range(ntype): + f.write(f" {obs_all[iT, itype, iobs]}") + f.write("\n") + comm.Barrier() diff --git a/abics/scripts/main_dft_latgas.py b/abics/scripts/main_dft_latgas.py index 092fef32..2397a313 100644 --- a/abics/scripts/main_dft_latgas.py +++ b/abics/scripts/main_dft_latgas.py @@ -31,7 +31,10 @@ from abics.sampling.mc_mpi import RX_MPI_init from abics.sampling.rxmc import TemperatureRX_MPI, RXParams from abics.sampling.pamc import PopulationAnnealing, PAMCParams -from abics.sampling.simple_parallel import EmbarrassinglyParallelSampling, ParallelRandomParams +from abics.sampling.simple_parallel import ( + EmbarrassinglyParallelSampling, + ParallelRandomParams, +) from abics.applications.latgas_abinitio_interface.default_observer import ( DefaultObserver, @@ -50,14 +53,19 @@ RunnerEnsemble, RunnerMultistep, ) -from abics.applications.latgas_abinitio_interface.base_solver import SolverBase, create_solver +from abics.applications.latgas_abinitio_interface.base_solver import ( + SolverBase, + create_solver, +) from abics.applications.latgas_abinitio_interface.params import DFTParams from abics.util import exists_on_all_nodes import logging + logger = logging.getLogger("main") + def main_dft_latgas(params_root: MutableMapping): params_sampling = params_root.get("sampling", None) if not params_sampling: @@ -75,7 +83,9 @@ def main_dft_latgas(params_root: MutableMapping): kB = constants.value("Boltzmann constant in eV/K") nensemble = len(dftparams.base_input_dir) - comm, commEnsemble, commAll = RX_MPI_init(rxparams.nreplicas, rxparams.seed, nensemble) + comm, commEnsemble, commAll = RX_MPI_init( + rxparams.nreplicas, rxparams.seed, nensemble + ) # RXMC parameters # specify temperatures for each replica, number of steps, etc. @@ -102,7 +112,9 @@ def main_dft_latgas(params_root: MutableMapping): kB = constants.value("Boltzmann constant in eV/K") nensemble = len(dftparams.base_input_dir) - comm, commEnsemble, commAll = RX_MPI_init(pamcparams.nreplicas, pamcparams.seed, nensemble) + comm, commEnsemble, commAll = RX_MPI_init( + pamcparams.nreplicas, pamcparams.seed, nensemble + ) # RXMC parameters # specify temperatures for each replica, number of steps, etc. @@ -124,18 +136,20 @@ def main_dft_latgas(params_root: MutableMapping): logger.info(f"--Anneal from {kTstart} K to {kTend} K") elif sampler_type == "parallelRand": - rxparams = ParallelRandomParams.from_dict(params_sampling) - nreplicas = rxparams.nreplicas - nprocs_per_replica = rxparams.nprocs_per_replica + prparams = ParallelRandomParams.from_dict(params_sampling) + nreplicas = prparams.nreplicas + nprocs_per_replica = prparams.nprocs_per_replica nensemble = len(dftparams.base_input_dir) - comm, commEnsemble, commAll = RX_MPI_init(rxparams.nreplicas, rxparams.seed, nensemble) + comm, commEnsemble, commAll = RX_MPI_init( + prparams.nreplicas, prparams.seed, nensemble + ) # Set Lreload to True when restarting - Lreload = rxparams.reload + Lreload = prparams.reload - nsteps = rxparams.nsteps - sample_frequency = rxparams.sample_frequency - print_frequency = rxparams.print_frequency + nsteps = prparams.nsteps + sample_frequency = prparams.sample_frequency + print_frequency = prparams.print_frequency logger.info(f"-Running parallel random sampling") elif sampler_type == "parallelMC": @@ -146,7 +160,9 @@ def main_dft_latgas(params_root: MutableMapping): kB = constants.value("Boltzmann constant in eV/K") nensemble = len(dftparams.base_input_dir) - comm, commEnsemble, commAll = RX_MPI_init(rxparams.nreplicas, rxparams.seed, nensemble) + comm, commEnsemble, commAll = RX_MPI_init( + rxparams.nreplicas, rxparams.seed, nensemble + ) # RXMC parameters # specify temperatures for each replica, number of steps, etc. @@ -166,14 +182,15 @@ def main_dft_latgas(params_root: MutableMapping): logger.error("Unknown sampler. Exiting...") sys.exit(1) - solvers = [] for i in range(len(dftparams.base_input_dir)): solver: SolverBase = create_solver(dftparams.solver, dftparams) solvers.append(solver) - + logger.info(f"-Setting up {dftparams.solver} solver for configuration energies") - logger.info("--Base input is taken from {}".format(",".join(dftparams.base_input_dir))) + logger.info( + "--Base input is taken from {}".format(",".join(dftparams.base_input_dir)) + ) # model setup # we first choose a "model" defining how to perform energy calculations and trial steps @@ -181,7 +198,9 @@ def main_dft_latgas(params_root: MutableMapping): energy_calculator: Union[Runner, RunnerEnsemble, RunnerMultistep] if dftparams.ensemble: if len(dftparams.base_input_dir) == 1: - logger.error("You must specify more than one base_input_dir for ensemble calculator") + logger.error( + "You must specify more than one base_input_dir for ensemble calculator" + ) sys.exit(1) energy_calculator = RunnerEnsemble( base_input_dirs=dftparams.base_input_dir, @@ -216,18 +235,19 @@ def main_dft_latgas(params_root: MutableMapping): use_tmpdir=dftparams.use_tmpdir, ) - gc_flag = params_sampling.get("enable_grandcanonical", False) + gc_flag = params_sampling.get("enable_grandcanonical", False) gc_ratio = params_sampling.get("gc_ratio", 0.3) - model = DFTLatticeGas(energy_calculator, - save_history=False, - enable_grandcanonical=gc_flag, - gc_ratio=gc_ratio, + model = DFTLatticeGas( + energy_calculator, + save_history=False, + enable_grandcanonical=gc_flag, + gc_ratio=gc_ratio, ) logger.info("--Success.") logger.info("-Setting up the on-lattice model.") - + configparams = DFTConfigParams.from_dict(params_root["config"]) spinel_config = defect_config(configparams) @@ -268,7 +288,9 @@ def main_dft_latgas(params_root: MutableMapping): ) for base_input_dir in ensembleparams.base_input_dirs ] - observer: DefaultObserver = EnsembleErrorObserver(commEnsemble, energy_calculators, Lreload) + observer: DefaultObserver = EnsembleErrorObserver( + commEnsemble, energy_calculators, Lreload + ) else: observer = DefaultObserver(comm, Lreload, params_observer) @@ -299,7 +321,9 @@ def main_dft_latgas(params_root: MutableMapping): logger.info(f"--MC sampling will be run in MC{i}") os.mkdir("MC{}".format(i)) if dftparams.use_tmpdir: - logger.info(f"---Will use local tmpdir for {dftparams.solver} run") + logger.info( + f"---Will use local tmpdir for {dftparams.solver} run" + ) # backup baseinput for this AL step for j, d in enumerate(dftparams.base_input_dir): shutil.copytree(d, "MC{}/baseinput{}".format(i, j)) @@ -333,7 +357,7 @@ def main_dft_latgas(params_root: MutableMapping): RXcalc.reload() logger.info("-Starting RXMC calculation") - + obs = RXcalc.run( nsteps, RXtrial_frequency, @@ -341,6 +365,7 @@ def main_dft_latgas(params_root: MutableMapping): print_frequency=print_frequency, observer=observer, subdirs=True, + throw_out=rxparams.throw_out, ) elif sampler_type == "PAMC": @@ -353,7 +378,7 @@ def main_dft_latgas(params_root: MutableMapping): PAcalc.reload() logger.info("-Starting PAMC calculation") - + obs = PAcalc.run( nsteps, resample_frequency, @@ -365,7 +390,7 @@ def main_dft_latgas(params_root: MutableMapping): elif sampler_type == "parallelRand": calc = EmbarrassinglyParallelSampling( - comm, RandomSampling, model, configs, write_node=write_node + comm, RandomSampling, model, configs, prparams.kT, write_node=write_node ) if Lreload: calc.reload() @@ -389,6 +414,7 @@ def main_dft_latgas(params_root: MutableMapping): print_frequency=print_frequency, observer=observer, subdirs=True, + throw_out=rxparams.throw_out, ) logger.info("--Sampling completed sucessfully.") diff --git a/abics/scripts/postproc.py b/abics/scripts/postproc.py new file mode 100644 index 00000000..c4e76322 --- /dev/null +++ b/abics/scripts/postproc.py @@ -0,0 +1,61 @@ +# ab-Initio Configuration Sampling tool kit (abICS) +# Copyright (C) 2019- The University of Tokyo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +from typing import MutableMapping + +import sys +import datetime + +import toml + +from abics import __version__ + +import logging +import abics.loggers as loggers +# from pathlib import Path + +logger = logging.getLogger("main") + +def main_impl(params: MutableMapping): + solver_type = params["sampling"]["solver"]["type"] + if solver_type == "potts": + raise NotImplementedError("Potts solver is not implemented yet.") + else: + import abics.scripts.postproc_dft_latgas + abics.scripts.postproc_dft_latgas.postproc_dft_latgas(params) + + +def main(): + now = datetime.datetime.now() + + tomlfile = sys.argv[1] if len(sys.argv) > 1 else "input.toml" + params = toml.load(tomlfile) + + loggers.set_log_handles( + app_name = "postproc", + level = logging.INFO, + logfile_path = None, + logfile_mode = "master", + params=params.get("log", {})) + + logger.info(f"Running abics_postproc (abICS v{__version__}) on {now}") + logger.info("-Reading input from: {}".format(tomlfile)) + + main_impl(params) + + +if __name__ == "__main__": + main() diff --git a/abics/scripts/postproc_dft_latgas.py b/abics/scripts/postproc_dft_latgas.py new file mode 100644 index 00000000..cda40cd7 --- /dev/null +++ b/abics/scripts/postproc_dft_latgas.py @@ -0,0 +1,435 @@ +# ab-Initio Configuration Sampling tool kit (abICS) +# Copyright (C) 2019- The University of Tokyo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +from typing import MutableMapping, Union + +from mpi4py import MPI +import copy +import sys, os, shutil +import datetime + +import numpy as np +import scipy.constants as constants + +from pymatgen.core import Structure + +from abics import __version__ +from abics.mc import CanonicalMonteCarlo, WeightedCanonicalMonteCarlo, RandomSampling +from abics.observer import ObserverParams + +import abics.sampling.simple_parallel as simple_parallel +import abics.sampling.rxmc as rxmc +import abics.sampling.pamc as pamc + +from abics.sampling.mc_mpi import RX_MPI_init +from abics.sampling.rxmc import TemperatureRX_MPI, RXParams +from abics.sampling.pamc import PopulationAnnealing, PAMCParams +from abics.sampling.simple_parallel import ( + EmbarrassinglyParallelSampling, + ParallelRandomParams, +) + +from abics.applications.latgas_abinitio_interface.default_observer import ( + DefaultObserver, + EnsembleParams, + EnsembleErrorObserver, +) +from abics.applications.latgas_abinitio_interface.model_setup import ( + DFTLatticeGas, +) +from abics.applications.latgas_abinitio_interface.defect import ( + defect_config, + DFTConfigParams, +) +from abics.applications.latgas_abinitio_interface.run_base_mpi import ( + Runner, + RunnerEnsemble, + RunnerMultistep, +) +from abics.applications.latgas_abinitio_interface.base_solver import ( + SolverBase, + create_solver, +) +from abics.applications.latgas_abinitio_interface.params import DFTParams + +from abics.util import exists_on_all_nodes + +import logging + +logger = logging.getLogger("main") + + +def postproc_dft_latgas(params_root: MutableMapping): + params_sampling = params_root.get("sampling", None) + if not params_sampling: + logger.error("sampling section is missing in parameters") + sys.exit(1) + dftparams = DFTParams.from_dict(params_sampling["solver"]) + sampler_type = params_sampling.get("sampler", "RXMC") + params_observer = params_root.get("observer", {}) + + if sampler_type == "RXMC": + rxparams = RXParams.from_dict(params_sampling) + nreplicas = rxparams.nreplicas + nprocs_per_replica = rxparams.nprocs_per_replica + + kB = constants.value("Boltzmann constant in eV/K") + + nensemble = len(dftparams.base_input_dir) + comm, commEnsemble, commAll = RX_MPI_init( + rxparams.nreplicas, rxparams.seed, nensemble + ) + + # RXMC parameters + # specify temperatures for each replica, number of steps, etc. + kTs = kB * rxparams.kTs + kTstart = rxparams.kTs[0] + kTend = rxparams.kTs[-1] + + # Set Lreload to True when restarting + Lreload = rxparams.reload + + nsteps = rxparams.nsteps + RXtrial_frequency = rxparams.RXtrial_frequency + sample_frequency = rxparams.sample_frequency + print_frequency = rxparams.print_frequency + + logger.info(f"-Running RXMC calculation with {nreplicas} replicas") + logger.info(f"--Temperature varies from {kTstart} K to {kTend} K") + + elif sampler_type == "PAMC": + pamcparams = PAMCParams.from_dict(params_sampling) + nreplicas = pamcparams.nreplicas + nprocs_per_replica = pamcparams.nprocs_per_replica + + kB = constants.value("Boltzmann constant in eV/K") + + nensemble = len(dftparams.base_input_dir) + comm, commEnsemble, commAll = RX_MPI_init( + pamcparams.nreplicas, pamcparams.seed, nensemble + ) + + # RXMC parameters + # specify temperatures for each replica, number of steps, etc. + kTstart = pamcparams.kTs[0] + kTend = pamcparams.kTs[-1] + if kTstart < kTend: + kTstart, kTend = kTend, kTstart + kTs = kB * pamcparams.kTs + + # Set Lreload to True when restarting + Lreload = pamcparams.reload + + nsteps = pamcparams.nsteps + resample_frequency = pamcparams.resample_frequency + sample_frequency = pamcparams.sample_frequency + print_frequency = pamcparams.print_frequency + + logger.info(f"-Running PAMC calculation with {nreplicas} replicas") + logger.info(f"--Anneal from {kTstart} K to {kTend} K") + + elif sampler_type == "parallelRand": + prparams = ParallelRandomParams.from_dict(params_sampling) + nreplicas = prparams.nreplicas + nprocs_per_replica = prparams.nprocs_per_replica + nensemble = len(dftparams.base_input_dir) + comm, commEnsemble, commAll = RX_MPI_init( + prparams.nreplicas, prparams.seed, nensemble + ) + + # Set Lreload to True when restarting + Lreload = prparams.reload + + nsteps = prparams.nsteps + sample_frequency = prparams.sample_frequency + print_frequency = prparams.print_frequency + logger.info(f"-Running parallel random sampling") + + elif sampler_type == "parallelMC": + rxparams = RXParams.from_dict(params_sampling) + nreplicas = rxparams.nreplicas + nprocs_per_replica = rxparams.nprocs_per_replica + + kB = constants.value("Boltzmann constant in eV/K") + + nensemble = len(dftparams.base_input_dir) + comm, commEnsemble, commAll = RX_MPI_init( + rxparams.nreplicas, rxparams.seed, nensemble + ) + + # RXMC parameters + # specify temperatures for each replica, number of steps, etc. + kTstart = rxparams.kTs[0] + kTend = rxparams.kTs[-1] + kTs = kB * rxparams.kTs + + # Set Lreload to True when restarting + Lreload = rxparams.reload + + nsteps = rxparams.nsteps + sample_frequency = rxparams.sample_frequency + print_frequency = rxparams.print_frequency + logger.info(f"-Running parallel MC sampling") + + else: + logger.error("Unknown sampler. Exiting...") + sys.exit(1) + + solvers = [] + for i in range(len(dftparams.base_input_dir)): + solver: SolverBase = create_solver(dftparams.solver, dftparams) + solvers.append(solver) + + logger.info(f"-Setting up {dftparams.solver} solver for configuration energies") + logger.info( + "--Base input is taken from {}".format(",".join(dftparams.base_input_dir)) + ) + + # model setup + # we first choose a "model" defining how to perform energy calculations and trial steps + # on the "configuration" defined below + energy_calculator: Union[Runner, RunnerEnsemble, RunnerMultistep] + if dftparams.ensemble: + if len(dftparams.base_input_dir) == 1: + logger.error( + "You must specify more than one base_input_dir for ensemble calculator" + ) + sys.exit(1) + energy_calculator = RunnerEnsemble( + base_input_dirs=dftparams.base_input_dir, + Solvers=solvers, + runner=Runner, + nprocs_per_solver=nprocs_per_replica, + comm=commEnsemble, + perturb=dftparams.perturb, + solver_run_scheme=dftparams.solver_run_scheme, + use_tmpdir=dftparams.use_tmpdir, + ) + else: + if len(dftparams.base_input_dir) == 1: + energy_calculator = Runner( + base_input_dir=dftparams.base_input_dir[0], + Solver=solvers[0], + nprocs_per_solver=nprocs_per_replica, + comm=MPI.COMM_SELF, + perturb=dftparams.perturb, + solver_run_scheme=dftparams.solver_run_scheme, + use_tmpdir=dftparams.use_tmpdir, + ) + else: + energy_calculator = RunnerMultistep( + base_input_dirs=dftparams.base_input_dir, + Solvers=solvers, + runner=Runner, + nprocs_per_solver=nprocs_per_replica, + comm=MPI.COMM_SELF, + perturb=dftparams.perturb, + solver_run_scheme=dftparams.solver_run_scheme, + use_tmpdir=dftparams.use_tmpdir, + ) + + gc_flag = params_sampling.get("enable_grandcanonical", False) + gc_ratio = params_sampling.get("gc_ratio", 0.3) + + model = DFTLatticeGas( + energy_calculator, + save_history=False, + enable_grandcanonical=gc_flag, + gc_ratio=gc_ratio, + ) + + logger.info("--Success.") + logger.info("-Setting up the on-lattice model.") + + configparams = DFTConfigParams.from_dict(params_root["config"]) + + spinel_config = defect_config(configparams) + if configparams.init_structure is None: + exit_code, msg = spinel_config.shuffle() + logger.info("--Rank {}: {}".format(commAll.Get_rank(), msg)) + exit_code = commAll.allgather(exit_code) + if np.sum(exit_code) != 0: + logger.error("Failed to initialize configuration. Exiting.") + sys.exit(1) + else: + logger.info("--Initial structure will be set to 'config.init_structure'.") + spinel_config.reset_from_structure(configparams.init_structure) + + # configs = [] + # for i in range(nreplicas): + # configs.append(copy.deepcopy(spinel_config)) + configs = [spinel_config] * nreplicas + + obsparams = ObserverParams.from_dict(params_observer) + + logger.info("--Success.") + + # NNP ensemble error estimation + if "ensemble" in params_root: + raise NotImplementedError("Ensemble error estimation is not implemented yet") + # ensembleparams = EnsembleParams.from_dict(params_root["ensemble"]) + # solver = create_solver(ensembleparams.solver, ensembleparams) + # + # energy_calculators = [ + # Runner( + # base_input_dir=base_input_dir, + # Solver=copy.deepcopy(solver), + # nprocs_per_solver=nprocs_per_replica, + # comm=MPI.COMM_SELF, + # perturb=ensembleparams.perturb, + # solver_run_scheme=ensembleparams.solver_run_scheme, + # use_tmpdir=dftparams.use_tmpdir, + # ) + # for base_input_dir in ensembleparams.base_input_dirs + # ] + # observer: DefaultObserver = EnsembleErrorObserver( + # commEnsemble, energy_calculators, Lreload + # ) + else: + observer = DefaultObserver(comm, Lreload, params_observer, with_energy=False) + + ALrun = exists_on_all_nodes(commAll, "ALloop.progress") + + # Active learning mode + if ALrun: + logger.info(f"-Running in active learning mode.") + + if "train0" in os.listdir(): + # Check how many AL iterations have been performed + i = 0 + while exists_on_all_nodes(commAll, "MC{}".format(i)): + i += 1 + with open("ALloop.progress", "r") as fi: + last_li = fi.readlines(-1)[-1] + if "train" not in last_li: + logger.error("You should train before next MC sampling.") + sys.exit(1) + if Lreload: + logger.info(f"--Restarting run in MC{i-1}") + rootdir = os.getcwd() + os.chdir("MC{}".format(i - 1)) + MCid = i - 1 + else: + # Make new directory and perform sampling there + if commAll.Get_rank() == 0: + logger.info(f"--MC sampling will be run in MC{i}") + os.mkdir("MC{}".format(i)) + if dftparams.use_tmpdir: + logger.info( + f"---Will use local tmpdir for {dftparams.solver} run" + ) + # backup baseinput for this AL step + for j, d in enumerate(dftparams.base_input_dir): + shutil.copytree(d, "MC{}/baseinput{}".format(i, j)) + sys.stdout.flush() + commAll.Barrier() + rootdir = os.getcwd() + while not exists_on_all_nodes(commAll, "MC{}".format(i)): + pass + os.chdir("MC{}".format(i)) + MCid = i + else: + logger.error("You should train before MC sampling in AL mode.") + sys.exit(1) + + if gc_flag == True: + mc_class = WeightedCanonicalMonteCarlo + else: + mc_class = CanonicalMonteCarlo + + if commEnsemble.Get_rank() == 0: + write_node = True + else: + write_node = False + + rankdir = str(comm.Get_rank()) + kT_hist = np.load(os.path.join(rankdir, "kT_hist.npy")) + obs_save_e = np.load(os.path.join(rankdir, "obs_save.npy"))[:, 0:2] + nsamples = obs_save_e.shape[0] + nobs = len(observer.names) + obs_save = np.zeros((nsamples, 2 + nobs)) + structure = Structure.from_file(os.path.join(rankdir, "structure.0.vasp")) + config = defect_config(configparams) + config.reset_from_structure(structure) + calc_state = mc_class(model, kT_hist[0], config) + for isamples, kT in enumerate(kT_hist): + structure = Structure.from_file( + os.path.join(rankdir, f"structure.{isamples}.vasp") + ) + config.reset_from_structure(structure) + calc_state.config = config + obs = observer.logfunc(calc_state) + obs_save[isamples, 0:2] = obs_save_e[isamples, :] + obs_save[isamples, 2:] = obs + obsnames = ["energy_internal", "energy"] + obsnames.extend(observer.names) + if sampler_type == "RXMC": + Trank_hist = np.load(os.path.join(rankdir, "Trank_hist.npy")) + throw_out = rxparams.throw_out + rxmc.postproc( + obs_save=obs_save, + Trank_hist=Trank_hist, + kT_hist=kT_hist, + kTs=kTs, + comm=comm, + obsnames=obsnames, + throw_out=throw_out, + ) + + elif sampler_type == "PAMC": + isamples_offsets = np.zeros(len(kTs) + 1, dtype=int) + kT_old = kT_hist[0] + ikT = 1 + for kT in kT_hist: + if kT != kT_old: + kT_old = kT + ikT += 1 + isamples_offsets[ikT] = isamples_offsets[ikT - 1] + isamples_offsets[ikT] += 1 + + logweight_history = np.load(os.path.join(rankdir, "logweight_hist.npy")) + dlogz = np.load(os.path.join(rankdir, "dlogz.npy")) + pamc.postproc( + kTs=kTs, + obs_save=obs_save, + dlogz=dlogz, + logweight_history=logweight_history, + obsnames=obsnames, + isamples_offsets=isamples_offsets, + comm=comm, + ) + + elif sampler_type == "parallelRand": + throw_out = prparams.throw_out + simple_parallel.postproc( + obs_save=obs_save, + kTs=prparams.kT, + comm=comm, + obsnames=obsnames, + throw_out=throw_out, + ) + + elif sampler_type == "parallelMC": + throw_out = rxparams.throw_out + simple_parallel.postproc( + obs_save=obs_save, + kTs=kTs, + comm=comm, + obsnames=obsnames, + throw_out=throw_out, + ) + logger.info("--Sampling completed sucessfully.") + logger.info("Exiting normally on {}\n".format(datetime.datetime.now())) diff --git a/docs/sphinx/en/source/how_to_use/index.rst b/docs/sphinx/en/source/how_to_use/index.rst index f6c00a3d..130f9df1 100644 --- a/docs/sphinx/en/source/how_to_use/index.rst +++ b/docs/sphinx/en/source/how_to_use/index.rst @@ -190,3 +190,10 @@ abICS can call the ``aenet`` library via the LAMMPS interface (``aenetPyLammps`` This is faster than calling ``aenet`` directly because it does not need file I/O. To use ``aenetPyLammps``, you need to install ``aenet-lammps`` and ``lammps``. For details, please refer to the :ref:`tutorial_aenet_lammps`. + + +Post-processing +------------------ + +``abics_sampling`` outputs the expectation values of physical quantities for each temperature. +To calculate other physical quantities using the configurations sampled by ``abics_sampling`` (without resampling), use ``abics_postproc``. diff --git a/docs/sphinx/ja/source/how_to_use/index.rst b/docs/sphinx/ja/source/how_to_use/index.rst index 6428c216..4596e29b 100644 --- a/docs/sphinx/ja/source/how_to_use/index.rst +++ b/docs/sphinx/ja/source/how_to_use/index.rst @@ -50,7 +50,7 @@ abICSの入力ファイルは, 以下の5つのセクションから構成され 4. [observer] セクション - 取得する物理量の種類などを指定します. + 計算する物理量を指定します. 5. [config] セクション @@ -191,3 +191,8 @@ aenet ``aenetPyLammps`` の利用には、 `aenet-lammps `_ および `LAMMPS `_ のインストールが必要です。 インストールや使い方の詳細は :ref:`tutorial_aenet_lammps` を参照してください。 +期待値計算 +------------- + +``abics_sampling`` は最後に物理量の期待値を温度ごとに計算・出力します。 +また、 ``abics_sampling`` でサンプリングした結果を用いて(新たにサンプリングせずに)別の物理量を計算するためには、 ``abics_postproc`` を利用できます。 diff --git a/pyproject.toml b/pyproject.toml index 62ca8462..444f797c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ st2abics = "abics.scripts.st2abics_config:main" abicsRXsepT = "abics.scripts.abicsRXsepT:main" abics_mlref = "abics.scripts.activelearn:main" abics_train = "abics.scripts.train:main" +abics_postproc = "abics.scripts.postproc:main" [tool.mypy] files = "abics" From f6503e96de6b0cfd1c4824692856150552e4f7ec Mon Sep 17 00:00:00 2001 From: Yuichi Motoyama Date: Tue, 3 Oct 2023 12:25:38 +0900 Subject: [PATCH 2/3] fixed jackknife method --- abics/sampling/rxmc.py | 4 ++-- abics/sampling/simple_parallel.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/abics/sampling/rxmc.py b/abics/sampling/rxmc.py index 259e5a00..2d10a2df 100644 --- a/abics/sampling/rxmc.py +++ b/abics/sampling/rxmc.py @@ -472,7 +472,7 @@ def postproc(obs_save, Trank_hist, kT_hist, kTs, comm, X2_jk = jackknife(X2) F = X2.mean(axis=0) - X.mean(axis=0) ** 2 # F stands for Fluctuation F_jk = X2_jk - X_jk**2 - F_mean = F_jk.sum(axis=0) - F_jk.mean(axis=0) * (nsamples - 1) + F_mean = F - F_jk.mean(axis=0) * (nsamples - 1) F_err = np.sqrt((nsamples - 1) * F_jk.var(axis=0, ddof=0)) # estimate free energy @@ -490,7 +490,7 @@ def postproc(obs_save, Trank_hist, kT_hist, kTs, comm, Exp = np.exp(-dbeta * (energies - emin)) Exp_jk = jackknife(Exp) Logz_jk = np.log(Exp_jk) - Logz_mean = Logz_jk.sum() - Logz_jk.mean() * (nsamples - 1) - dbeta * emin + Logz_mean = np.log(Exp.mean()) - Logz_jk.mean() * (nsamples - 1) - dbeta * emin Logz_err = np.sqrt((nsamples - 1) * Logz_jk.var(ddof=0)) else: Logz_mean = 0.0 diff --git a/abics/sampling/simple_parallel.py b/abics/sampling/simple_parallel.py index 622381e9..024a5637 100644 --- a/abics/sampling/simple_parallel.py +++ b/abics/sampling/simple_parallel.py @@ -437,7 +437,7 @@ def postproc(obs_save, kTs, comm, X2_jk = jackknife(X2) F = X2.mean(axis=0) - X.mean(axis=0) ** 2 # F stands for Fluctuation F_jk = X2_jk - X_jk**2 - F_mean = F_jk.sum(axis=0) - F_jk.mean(axis=0) * (nsamples - 1) + F_mean = F - F_jk.mean(axis=0) * (nsamples - 1) F_err = np.sqrt((nsamples - 1) * F_jk.var(axis=0, ddof=0)) obs = np.array([X_mean, X_err, X2_mean, X2_err, F_mean, F_err]) From d9e55e6b8215e05a296d5b925954d6a0c7929b00 Mon Sep 17 00:00:00 2001 From: Yuichi Motoyama Date: Tue, 3 Oct 2023 17:35:42 +0900 Subject: [PATCH 3/3] fix --- abics/sampling/rxmc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/abics/sampling/rxmc.py b/abics/sampling/rxmc.py index 2d10a2df..72208f03 100644 --- a/abics/sampling/rxmc.py +++ b/abics/sampling/rxmc.py @@ -472,7 +472,7 @@ def postproc(obs_save, Trank_hist, kT_hist, kTs, comm, X2_jk = jackknife(X2) F = X2.mean(axis=0) - X.mean(axis=0) ** 2 # F stands for Fluctuation F_jk = X2_jk - X_jk**2 - F_mean = F - F_jk.mean(axis=0) * (nsamples - 1) + F_mean = nsamples * F - F_jk.mean(axis=0) * (nsamples - 1) F_err = np.sqrt((nsamples - 1) * F_jk.var(axis=0, ddof=0)) # estimate free energy @@ -490,7 +490,7 @@ def postproc(obs_save, Trank_hist, kT_hist, kTs, comm, Exp = np.exp(-dbeta * (energies - emin)) Exp_jk = jackknife(Exp) Logz_jk = np.log(Exp_jk) - Logz_mean = np.log(Exp.mean()) - Logz_jk.mean() * (nsamples - 1) - dbeta * emin + Logz_mean = nsamples * np.log(Exp.mean()) - Logz_jk.mean() * (nsamples - 1) - dbeta * emin Logz_err = np.sqrt((nsamples - 1) * Logz_jk.var(ddof=0)) else: Logz_mean = 0.0