From a8a111ad2f8b2467ec7e2722a93e8d2ae937914e Mon Sep 17 00:00:00 2001 From: "Jeremy M. G. Leung" <63817169+jeremyleung521@users.noreply.github.com> Date: Fri, 6 Sep 2024 14:20:33 -0400 Subject: [PATCH 01/14] remove w_fluxanl deprecation warning (#443) --- src/westpa/cli/tools/w_fluxanl.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/westpa/cli/tools/w_fluxanl.py b/src/westpa/cli/tools/w_fluxanl.py index 9cc47b14a..a393e80f9 100644 --- a/src/westpa/cli/tools/w_fluxanl.py +++ b/src/westpa/cli/tools/w_fluxanl.py @@ -1,7 +1,6 @@ import h5py import numpy as np from scipy.signal import fftconvolve -from warnings import warn import westpa @@ -370,7 +369,6 @@ def go(self): def entry_point(): - warn('w_fluxanl is being deprecated. Please use w_assign and w_direct instead.') WFluxanlTool().main() From 2e8112896cd78d3a2ff55a10aa7ff49189c9769d Mon Sep 17 00:00:00 2001 From: "Jeremy M. G. Leung" <63817169+jeremyleung521@users.noreply.github.com> Date: Thu, 12 Sep 2024 17:07:38 -0400 Subject: [PATCH 02/14] swap to miniforge from mambaforge (#444) --- .github/workflows/test.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 799f65076..f80221a9a 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -53,7 +53,6 @@ jobs: - uses: conda-incubator/setup-miniconda@v3 with: python-version: ${{ matrix.python-version }} - miniforge-variant: Mambaforge environment-file: devtools/conda-envs/test_env.yaml activate-environment: test_env channel-priority: true From b22a8e85ede3017d80c2cc775d4feb741501df57 Mon Sep 17 00:00:00 2001 From: Hayden Scheiber Date: Thu, 12 Sep 2024 14:08:10 -0700 Subject: [PATCH 03/14] Fixing the MAB skip function in 2+ dimensions. (#436) * Fixed bug in MAB skip function and refactored MAB code. Updated docs for MAB. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixed linting and added some basic MAB tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixed linting * further refactored code and wrote rigorous tests in higher dimensions * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update mab.py fixed some undefined variables used in reporting * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update mab.py * Update mab.py * Small fixes and added MAB test ref data. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fixed n_blockneck_filled tracker and updated MAB log bin boundaries. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Found and fixed another buggit add --all! Conceptually simplified special MAB bin assignment codee and refactored MAB tests. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * removed unused import --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- src/westpa/core/binning/mab.py | 618 +++++++++++++++++------------- tests/refs/mab_assignments_ref.h5 | Bin 0 -> 159776 bytes tests/test_binning.py | 320 ++++++++++++++++ 3 files changed, 664 insertions(+), 274 deletions(-) create mode 100644 tests/refs/mab_assignments_ref.h5 diff --git a/src/westpa/core/binning/mab.py b/src/westpa/core/binning/mab.py index 42caccf6b..56a78cb60 100644 --- a/src/westpa/core/binning/mab.py +++ b/src/westpa/core/binning/mab.py @@ -1,73 +1,67 @@ import logging +from typing import List, Optional import numpy as np import westpa from westpa.core.binning import FuncBinMapper from os.path import expandvars - log = logging.getLogger(__name__) class MABBinMapper(FuncBinMapper): """ - Adaptively place bins in between minimum and maximum segments along - the progress coordinte. Extrema and bottleneck segments are assigned + Adaptively place bins between minimum and maximum segments along + the progress coordinate. Extrema and bottleneck segments are assigned to their own bins. - """ def __init__( self, - nbins, - direction=None, - skip=None, - bottleneck=True, - pca=False, - mab_log=False, - bin_log=False, - bin_log_path="$WEST_SIM_ROOT/binbounds.log", + nbins: List[int], + direction: Optional[List[int]] = None, + skip: Optional[List[int]] = None, + bottleneck: bool = True, + pca: bool = False, + mab_log: bool = False, + bin_log: bool = False, + bin_log_path: str = "$WEST_SIM_ROOT/binbounds.log", ): """ Parameters ---------- nbins : list of int - List of int for nbins in each dimension. - direction : Union(list of int, None), default: None - List of int for 'direction' in each dimension. - Direction options are as follows: + List of number of bins in each dimension. + direction : Optional[list of int], default: None + List of directions in each dimension. Direction options: 0 : default split at leading and lagging boundaries 1 : split at leading boundary only -1 : split at lagging boundary only - 86 : no splitting at either leading or lagging boundary - skip : Union(list of int, None), default: None - List of int for each dimension. Default None for skip=0. - Set to 1 to 'skip' running mab in a dimension. + 86 : no splitting at either leading or lagging boundary (both bottlenecks included) + skip : Optional[list of int], default: None + List of skip flags for each dimension. Default None (no skipping). bottleneck : bool, default: True - Whether to turn on or off bottleneck walker splitting. + Whether to enable bottleneck walker splitting. pca : bool, default: False - Can be True or False (default) to run PCA on pcoords before bin assignment. + Whether to perform PCA on progress coordinates before bin assignment. mab_log : bool, default: False - Whether to output mab info to west.log. + Whether to output MAB info to west.log. bin_log : bool, default: False - Whether to output mab bin boundaries to bin_log_path file. + Whether to output MAB bin boundaries to a log file. bin_log_path : str, default: "$WEST_SIM_ROOT/binbounds.log" Path to output bin boundaries. - """ # Verifying parameters if nbins is None: - raise ValueError("nbins_per_dim is missing") + raise ValueError("nbins is missing") ndim = len(nbins) - if direction is None: - direction = [0] * ndim - elif len(direction) != ndim: + direction = direction or [0] * ndim + if len(direction) != ndim: direction = [0] * ndim log.warning("Direction list is not the correct dimensions, setting to defaults.") - if skip is None: - skip = [0] * ndim - elif len(skip) != ndim: + skip = skip or [0] * ndim + if len(skip) != ndim: skip = [0] * ndim log.warning("Skip list is not the correct dimensions, setting to defaults.") @@ -86,9 +80,12 @@ def __init__( super().__init__(map_mab, n_total_bins, kwargs=kwargs) - def determine_total_bins(self, nbins_per_dim, direction, skip, bottleneck, **kwargs): + def determine_total_bins( + self, nbins_per_dim: List[int], direction: List[int], skip: List[int], bottleneck: bool, **kwargs + ) -> int: """ - The following is neccessary because functional bin mappers need to "reserve" + Calculate the total number of bins needed, taking direction and skipping into account. + This function is necessary because functional bin mappers need to "reserve" bins and tell the sim manager how many bins they will need to use, this is determined by taking all direction/skipping info into account. @@ -97,65 +94,66 @@ def determine_total_bins(self, nbins_per_dim, direction, skip, bottleneck, **kwa nbins_per_dim : list of int Number of total bins in each dimension within the linear portion. direction : list of int - Direction in each dimension. See __init__ for more information. + Direction in each dimension. skip : list of int - List of 0s and 1s indicating whether to skip each dimension. + List indicating whether to skip each dimension. bottleneck : bool - Whether to include separate bin for bottleneck walker(s). + Whether to include a separate bin for bottleneck walker(s). **kwargs : dict - Arbitary keyword arguments. Contains unneeded MAB parameters. + Additional MAB parameters (unused). Returns ------- n_total_bins : int Number of total bins. - """ - n_total_bins = np.prod(nbins_per_dim) # Number of Bins in the linear portion - ndim = len(nbins_per_dim) - for i in range(ndim): - if skip[i] == 0: - if direction[i] == 0: - # Both directions (leading/trailing) + bottlenecks if enabled + # Update nbins_per_dim with any skipped dimensions, setting number of bins along skipped dimensions to 1 + skip = np.array([bool(s) for s in skip]) + nbins_per_dim = np.array(nbins_per_dim) + nbins_per_dim[skip] = 1 + + # Total bins is product of all linear bins plus and special bins + n_total_bins = nbins_per_dim.prod() + for direct, skip_dim in zip(direction, skip): + if not skip_dim: + if direct in [-1, 1]: + # 1 lead or lag bin + 1 bottleneck bin + n_total_bins += 1 + 1 * bottleneck + elif direct == 0: + # 2 lead/lag bins + 2 bottleneck bins n_total_bins += 2 + 2 * bottleneck - elif direction[i] == 86: - # No leading/trailing, 2 bottlenecks if enabled + elif direct == 86: + # 0 lead/lag + 2 bottleneck bins n_total_bins += 2 * bottleneck - else: - # direction[i] == -1 or 1, Just one leading/trailing + 1 bottleneck - n_total_bins += 1 + 1 * bottleneck - else: - n_total_bins -= nbins_per_dim[i] - 1 - n_total_bins += 1 * ndim # or else it will be one bin short return n_total_bins -def map_mab(coords, mask, output, *args, **kwargs): +def map_mab(coords: np.ndarray, mask: np.ndarray, output: List[int], *args, **kwargs) -> List[int]: """ - Binning which adaptively places bins based on the positions of extrema segments and - bottleneck segments, which are where the difference in probability is the greatest - along the progress coordinate. Operates per dimension and places a fixed number of + Adaptively place bins based on extrema and bottleneck segments along the progress coordinate. + + Bottleneck segments are where the difference in probability is the greatest + along the progress coordinate. Operates per dimension (unless skipped) and places a fixed number of evenly spaced bins between the segments with the min and max pcoord values. Extrema and bottleneck segments are assigned their own bins. Parameters ---------- - coords : ndarray + coords : np.ndarray An array with pcoord and weight info. - mask : ndarray - Array of 1 (True) and 0 (False), to filter out unwanted segment info. + mask : np.ndarray + Boolean array to filter out unwanted segments. output : list The main list that, for each segment, holds the bin assignment. *args : list - Variable length arguments. + Additional arguments. **kwargs : dict - Arbitary keyword arguments. Contains most of the MAB-needed parameters. + Additional keyword arguments. Contains most of the MAB-needed parameters. Returns ------ output : list - The main list that, for each segment, holds the bin assignment. - + List with bin assignments for each segment. """ # Argument Processing @@ -163,8 +161,8 @@ def map_mab(coords, mask, output, *args, **kwargs): ndim = len(nbins_per_dim) pca = kwargs.get("pca", False) bottleneck = kwargs.get("bottleneck", True) - direction = kwargs.get("direction", ([0] * ndim)) - skip = kwargs.get("skip", ([0] * ndim)) + direction = kwargs.get("direction", [0] * ndim) + skip = kwargs.get("skip", [0] * ndim) mab_log = kwargs.get("mab_log", False) bin_log = kwargs.get("bin_log", False) bin_log_path = kwargs.get("bin_log_path", "$WEST_SIM_ROOT/binbounds.log") @@ -190,11 +188,11 @@ def map_mab(coords, mask, output, *args, **kwargs): if coords[0, -1] == 0: report = True if coords.shape[1] > ndim + 1: - isfinal = allcoords[:, ndim + 1].astype(np.bool_) + isfinal = allcoords[:, ndim + 1].astype(bool) else: - isfinal = np.ones(coords.shape[0], dtype=np.bool_) + isfinal = np.ones(coords.shape[0], dtype=bool) coords = coords[isfinal, :ndim] - weights = allcoords[isfinal, ndim + 0] + weights = allcoords[isfinal, ndim] mask = mask[isfinal] splitting = True @@ -204,229 +202,301 @@ def map_mab(coords, mask, output, *args, **kwargs): weights = None splitting = False - varcoords = np.copy(coords) originalcoords = np.copy(coords) if pca and len(output) > 1: - colavg = np.mean(coords, axis=0) - for i in range(len(coords)): - for j in range(len(coords[i])): - varcoords[i][j] = coords[i][j] - colavg[j] - covcoords = np.cov(np.transpose(varcoords), aweights=weights) - eigval, eigvec = np.linalg.eigh(covcoords) - eigvec = eigvec[:, np.argmax(np.absolute(eigvec), axis=1)] - for i in range(len(eigvec)): - if eigvec[i, i] < 0: - eigvec[:, i] = -1 * eigvec[:, i] - for i in range(ndim): - for j in range(len(output)): - coords[j][i] = np.dot(varcoords[j], eigvec[:, i]) - - maxlist = [] - minlist = [] - difflist = [] - flipdifflist = [] - for n in range(ndim): - # identify the boundary segments - maxcoord = np.max(coords[mask, n]) - mincoord = np.min(coords[mask, n]) - maxlist.append(maxcoord) - minlist.append(mincoord) - - # detect the bottleneck segments, this uses the weights - if splitting: - temp = np.column_stack((originalcoords[mask, n], weights[mask])) - sorted_indices = temp[:, 0].argsort() - temp = temp[sorted_indices] - for p in range(len(temp)): - if temp[p][1] == 0: - temp[p][1] = 10**-323 - fliptemp = np.flipud(temp) - - difflist.append(None) - flipdifflist.append(None) - maxdiff = 0 - flipmaxdiff = 0 - for i in range(1, len(temp) - 1): - comprob = 0 - flipcomprob = 0 - j = i + 1 - while j < len(temp): - comprob = comprob + temp[j][1] - flipcomprob = flipcomprob + fliptemp[j][1] - j = j + 1 - diff = -np.log(comprob) + np.log(temp[i][1]) - if diff > maxdiff: - difflist[n] = temp[i][0] - maxdiff = diff - flipdiff = -np.log(flipcomprob) + np.log(fliptemp[i][1]) - if flipdiff > flipmaxdiff: - flipdifflist[n] = fliptemp[i][0] - flipmaxdiff = flipdiff + coords = apply_pca(coords, weights) + + # Computing special bins (bottleneck and boundary bins) + minlist, maxlist, bottlenecks_forward, bottlenecks_reverse = calculate_bin_boundaries( + originalcoords, weights, mask, skip, splitting, bottleneck + ) if mab_log and report: - westpa.rc.pstatus("################ MAB stats ################") - westpa.rc.pstatus("minima in each dimension: {}".format(minlist)) - westpa.rc.pstatus("maxima in each dimension: {}".format(maxlist)) - westpa.rc.pstatus("direction in each dimension: {}".format(direction)) - westpa.rc.pstatus("skip in each dimension: {}".format(skip)) - westpa.rc.pstatus("###########################################") - westpa.rc.pflush() - - # assign segments to bins - # the total number of linear bins is the boundary base - boundary_base = np.prod(nbins_per_dim) - - # the bottleneck base is offset by the number of boundary walkers, - # which is two per dimension unless there is a direction specified - # in a particluar dimension, then it's just one - bottleneck_base = boundary_base + log_mab_stats(minlist, maxlist, direction, skip) + + # Assign segments to bins + n_bottleneck_filled = bin_assignment( + allcoords, + allmask, + minlist, + maxlist, + bottlenecks_forward, + bottlenecks_reverse, + nbins_per_dim, + direction, + skip, + splitting, + bottleneck, + output, + ) + + # Report MAB bin statistics + if bin_log and report and westpa.rc.sim_manager.n_iter: + log_bin_boundaries( + skip, + bottleneck, + direction, + bin_log_path, + minlist, + maxlist, + nbins_per_dim, + n_bottleneck_filled, + bottlenecks_forward, + bottlenecks_reverse, + ) + + return output + + +def apply_pca(coords, weights): + colavg = np.mean(coords, axis=0) + varcoords = coords - colavg + covcoords = np.cov(varcoords.T, aweights=weights) + eigval, eigvec = np.linalg.eigh(covcoords) + eigvec = eigvec[:, np.argmax(np.abs(eigvec), axis=1)] + eigvec[:, np.diag(eigvec) < 0] *= -1 + return np.dot(varcoords, eigvec) + + +def calculate_bin_boundaries(coords, weights, mask, skip, splitting, bottleneck): + """ + This function calculates minima, maxima, and bottleneck segments. + """ + skip = np.array([bool(s) for s in skip]) + + # Initialize lists to hold minima and maxima along each dimension + minlist, maxlist = [], [] + # Initialize lists to hold bottleneck segments along each dimension + bottlenecks_forward, bottlenecks_reverse = [None] * len(coords[0]), [None] * len(coords[0]) + # number of unmasked coords + n_coords = mask.sum() + # Grabbing all unmasked coords and weights + unmasked_coords = coords[mask, :] + unmasked_weights = weights[mask] if weights is not None else None + # Replace any zero weights with non-zero values so that log(weight) is well-defined + if unmasked_weights is not None: + unmasked_weights[unmasked_weights == 0] = 10**-323 + # Looping over each dimension of progress coordinate, even those being skipped + for n in range(len(coords[0])): + # We calculate the min and max pcoord along each dimension (boundary segments) even if skipping + maxlist.append(np.max(coords[mask, n])) + minlist.append(np.min(coords[mask, n])) + # Now we calculate the bottleneck segments + if splitting and bottleneck and not skip[n]: + bottlenecks_forward[n], bottlenecks_reverse[n] = detect_bottlenecks(unmasked_coords, unmasked_weights, n_coords, n) + + return minlist, maxlist, bottlenecks_forward, bottlenecks_reverse + + +def detect_bottlenecks(unmasked_coords, unmasked_weights, n_coords, n): + """ + Detect the bottleneck segments along the given coordinate n, this uses the weights + """ + # Grabbing all unmasked coords in current dimension, plus corresponding weights + # Sort by current dimension in coord, smallest to largest + sorted_indices = unmasked_coords[:, n].argsort() + # Grab sorted coords and weights + coords_srt = unmasked_coords[sorted_indices, :] + weights_srt = unmasked_weights[sorted_indices] + # Also sort in reverse order for opposite direction + coords_srt_flip = np.flipud(coords_srt) + weights_srt_flip = np.flipud(weights_srt) + # Initialize the max directional differences along current dimension as None (these may not be updated) + bottleneck_coords, bottleneck_coords_flip = None, None + maxdiff, maxdiff_flip = -np.inf, -np.inf + # Looping through all non-boundary coords + # Compute the cumulative weight on either side of each non-boundary walker + for i in range(1, n_coords - 1): + # Summing up weights of all walkers ahead of current walker along current dim in both directions + cumulative_prob = np.sum(weights_srt[i + 1 :]) + cumulative_prob_flip = np.sum(weights_srt_flip[i + 1 :]) + # Compute the difference of log cumulative weight of current walker and all walkers ahead of it (Z im the MAB paper) + # We use the log as weights vary over many orders of magnitude + # Note a negative Z indicates the cumulative weight ahead of the current walker is larger than the weight of the current walker, + # while a positive Z indicates the cumulative weight ahead of the current walker is smaller, indicating a barrier + Z = np.log(weights_srt[i]) - np.log(cumulative_prob) + Z_flip = np.log(weights_srt_flip[i]) - np.log(cumulative_prob_flip) + # Update ALL coords of the current walker into bottlenecks_forward if it is largest + # This way we uniquely identify a walker by its full set of coordinates + if Z > maxdiff: + bottleneck_coords = coords_srt[i, :] + maxdiff = Z + if Z_flip > maxdiff_flip: + bottleneck_coords_flip = coords_srt_flip[i, :] + maxdiff_flip = Z_flip + return bottleneck_coords, bottleneck_coords_flip + + +def log_mab_stats(minlist, maxlist, direction, skip): + westpa.rc.pstatus("################ MAB stats ################") + westpa.rc.pstatus(f"minima in each dimension: {minlist}") + westpa.rc.pstatus(f"maxima in each dimension: {maxlist}") + westpa.rc.pstatus(f"direction in each dimension: {direction}") + westpa.rc.pstatus(f"skip in each dimension: {skip}") + westpa.rc.pstatus("###########################################") + westpa.rc.pflush() + + +def bin_assignment( + coords, + mask, + minlist, + maxlist, + bottlenecks_forward, + bottlenecks_reverse, + nbins_per_dim, + direction, + skip, + splitting, + bottleneck, + output, +): + """ + Assign segments to bins based on the minima, maxima, and + bottleneck segments along the progress coordinate. + """ + # Update nbins_per_dim with any skipped dimensions, setting number of bins along skipped dimensions to 1 + skip = np.array([bool(s) for s in skip]) + nbins_per_dim = np.array(nbins_per_dim) + nbins_per_dim[skip] = 1 + direction = np.array(direction) + + ndim = len(nbins_per_dim) n_bottleneck_filled = 0 - for i in range(0, ndim): - # for single direction, 1 boundary walker - if direction[i] == 1 or direction[i] == -1: - bottleneck_base += 1 - # 2 boundary walkers with 0 direction - elif direction[i] == 0: - bottleneck_base += 2 - # for 86 direction, no boundary walkers so offset of 0 - elif direction[i] == 86: - bottleneck_base += 0 - - # if a dimension is being "skipped", leave only one bin total as - # the offset - for i in range(0, ndim): - if skip[i] != 0: - boundary_base -= nbins_per_dim[i] - 1 + # Boolean arrays that track use of special bins along each dimension + skip_bneck_fwd = np.array([d == -1 if bottleneck else True for d in direction]) + skip + skip_bneck_rev = np.array([d == 1 if bottleneck else True for d in direction]) + skip + skip_lead = np.array([d in [86, -1] for d in direction]) + skip + skip_lag = np.array([d in [86, 1] for d in direction]) + skip - for i in range(len(output)): - if not allmask[i]: - continue + # List of dimensions that are not skipped + active_dims = np.array([n for n in range(ndim) if not skip[n]]) - # special means either a boundary or bottleneck walker (not a walker in the linear space) - special = False - # this holder is the bin number, which only needs to be unique for different walker groups - holder = 0 - if splitting: - for n in range(ndim): - coord = allcoords[i][n] + # Compute the boundary bin ID offsets + # In forward direction, this is all the linear bins + boundary_bin_id_offset_fwd = nbins_per_dim.prod() + # In reverse, we add the number of forward boundary bins to the offset + boundary_bin_id_offset_rev = boundary_bin_id_offset_fwd + (~skip_lead).sum() - # if skipped, just assign the walkers to the same bin (offset of boundary base) - if skip[n] != 0: - holder = boundary_base + n + # Compute the bottleneck bin ID offsets + # In forward direction, bin IDs are offset by all linear and boundary bins + bneck_bin_id_offset_fwd = boundary_bin_id_offset_rev + (~skip_lag).sum() + # In reverse, we add the number of forward bottleneck bins to the offset + bneck_bin_id_offset_rev = bneck_bin_id_offset_fwd + (~skip_bneck_fwd).sum() + + # Bin assignment loop over all walkers + for i in range(len(output)): + # Skip masked walkers, these walkers bin IDs are unchanged + if not mask[i]: + continue + # Initialize bin ID and special tracker for current coord + # The special variable indicates a boundary or bottleneck walker (not assigned to the linear space) + bin_id, special = 0, False + + # Searching for bottleneck bins first + if splitting and bottleneck: + for n in active_dims: + # Grab coord(s) of current walker + coord = coords[i][:ndim] + # Assign bottlenecks, taking directionality into account + # Check both directions when using 0 or 86 + # Note: 86 implies no leading or lagging bins, but does add bottlenecks for *both* directions when bottleneck is enabled + # Note: All bottleneck bins will typically be filled unless a walker is simultaneously in bottleneck bins along multiple dimensions + # or there are too few walkers to compute free energy barriers + if (coord == bottlenecks_forward[n]).all() and not skip_bneck_fwd[n]: + bin_id = bneck_bin_id_offset_fwd + n - skip_bneck_fwd[:n].sum() + special = True + n_bottleneck_filled += 1 + break + elif (coord == bottlenecks_reverse[n]).all() and not skip_bneck_rev[n]: + bin_id = bneck_bin_id_offset_rev + n - skip_bneck_rev[:n].sum() + special = True + n_bottleneck_filled += 1 break - # assign bottlenecks, taking directionality into account - if bottleneck: - if direction[n] == -1: - if coord == flipdifflist[n]: - holder = bottleneck_base + n - special = True - n_bottleneck_filled += 1 - break - - if direction[n] == 1: - if coord == difflist[n]: - holder = bottleneck_base + n - special = True - n_bottleneck_filled += 1 - break - - # both directions when using 0 or with - # special value of 86 for no lead/lag split - if direction[n] == 0 or direction[n] == 86: - if coord == difflist[n]: - holder = bottleneck_base + n - special = True - n_bottleneck_filled += 1 - break - elif coord == flipdifflist[n]: - holder = bottleneck_base + n + 1 - special = True - n_bottleneck_filled += 1 - break - - # assign boundary walkers, taking directionality into account - if direction[n] == -1: - if coord == minlist[n]: - holder = boundary_base + n - special = True - break - - elif direction[n] == 1: - if coord == maxlist[n]: - holder = boundary_base + n - special = True - break - - elif direction[n] == 0: - if coord == minlist[n]: - holder = boundary_base + n - special = True - break - elif coord == maxlist[n]: - holder = boundary_base + n + 1 - special = True - break - - # special value for direction with no lead/lag split - elif direction[n] == 86: - # westpa.rc.pstatus(f"No lead/lag split for dim {n}") - # westpa.rc.pflush() - # nornmally adds to special bin but here just leaving it forever empty - # holder = boundary_base + n + # Now check for boundary walkers, taking directionality into account + # This should only be done after fully checking for bottleneck walkers + if splitting and not special: + for n in active_dims: + # Grab coord of current walker along current dimension + coord = coords[i, n] + if (coord == maxlist[n]) and not skip_lead[n]: + bin_id = boundary_bin_id_offset_fwd + n - skip_lead[:n].sum() + special = True + break + elif (coord == minlist[n]) and not skip_lag[n]: + bin_id = boundary_bin_id_offset_rev + n - skip_lag[:n].sum() + special = True break - # the following are for the "linear" portion + # Now check for linear bin walkers if not special: + # Again we loop over the dimensions + # Note: no need to worry about skipping as we've already set all skipped dimensions to 1 bin for n in range(ndim): - # if skipped, it's added to the same bin as the special walkers above - if skip[n] != 0: - holder = boundary_base + n - break - - coord = allcoords[i][n] + coord = coords[i][n] nbins = nbins_per_dim[n] minp = minlist[n] maxp = maxlist[n] + # Generate the bins along this dimension bins = np.linspace(minp, maxp, nbins + 1) - bin_number = np.digitize(coord, bins) - 1 - - if isfinal is None or not isfinal[i]: - if bin_number >= nbins: - bin_number = nbins - 1 - elif bin_number < 0: - bin_number = 0 - elif bin_number >= nbins or bin_number < 0: - if np.isclose(bins[-1], coord): - bin_number = nbins - 1 - elif np.isclose(bins[0], coord): - bin_number = 0 - else: - raise ValueError("Walker out of boundary") - - holder += bin_number * np.prod(nbins_per_dim[:n]) - - # output is the main list that, for each segment, holds the bin assignment - output[i] = holder - - if bin_log and report: - if westpa.rc.sim_manager.n_iter: - with open(expandvars(bin_log_path), 'a') as bb_file: - # Iteration Number - bb_file.write(f'iteration: {westpa.rc.sim_manager.n_iter}\n') - bb_file.write('bin boundaries: ') - for n in range(ndim): - # Write binbounds per dim - bb_file.write(f'{np.linspace(minlist[n], maxlist[n], nbins_per_dim[n] + 1)}\t') - # Min/Max pcoord - bb_file.write(f'\nmin/max pcoord: {minlist} {maxlist}\n') - bb_file.write(f'bottleneck bins: {n_bottleneck_filled}\n') - if n_bottleneck_filled > 0: - # Bottlenecks bins exist (passes any of the if bottleneck: checks) - bb_file.write(f'bottleneck pcoord: {flipdifflist} {difflist}\n\n') - else: - bb_file.write('\n') - return output + # Assign walker to a bin along this dimension + bin_number = np.digitize(coord, bins) - 1 # note np.digitize is 1-indexed + + # Sometimes the walker is exactly at the max/min value, + # which would put it in the next bin + if bin_number == nbins: + bin_number -= 1 + elif bin_number == -1: + bin_number = 0 + elif bin_number > nbins or bin_number < -1: + raise ValueError("Walker out of boundary.") + + # Assign to bin within the full dimensional space + bin_id += bin_number * np.prod(nbins_per_dim[:n]) + + # Output is the main list that, for each segment, holds the bin assignment + output[i] = bin_id + return n_bottleneck_filled + + +def log_bin_boundaries( + skip, + bottleneck, + direction, + bin_log_path, + minlist, + maxlist, + nbins_per_dim, + n_bottleneck_filled, + bottlenecks_forward, + bottlenecks_reverse, +): + ndim = len(nbins_per_dim) + skip = np.array([bool(s) for s in skip]) + active_dims = np.array([n for n in range(ndim) if not skip[n]]) + max_bottleneck = np.sum([1 if direction[n] in [-1, 1] else 2 for n in active_dims]) if bottleneck else 0 + with open(expandvars(bin_log_path), 'a') as bb_file: + # Iteration Number + bb_file.write(f'Iteration: {westpa.rc.sim_manager.n_iter}\n') + bb_file.write('MAB linear bin boundaries: ') + for n in range(ndim): + # Write binbounds per dim + bb_file.write(f'{np.linspace(minlist[n], maxlist[n], nbins_per_dim[n] + 1)}\t') + # Min/Max pcoord + bb_file.write(f'\nLagging pcoord in each dimension: {minlist}\n') + bb_file.write(f'Leading pcoord in each dimension: {maxlist}\n') + # Bottlenecks bins exist + if bottleneck: + bb_file.write(f'Number of bottleneck bins filled: {n_bottleneck_filled} / {max_bottleneck}\n') + for n in active_dims: + if direction[n] in [0, 1, 86]: + bb_file.write(f'Dimension {n} forward bottleneck walker at: {list(bottlenecks_forward[n])}\n') + if direction[n] in [0, -1, 86]: + bb_file.write(f'Dimension {n} backward bottleneck walker at: {list(bottlenecks_reverse[n])}\n') + bb_file.write('\n') + else: + bb_file.write('\n') diff --git a/tests/refs/mab_assignments_ref.h5 b/tests/refs/mab_assignments_ref.h5 new file mode 100644 index 0000000000000000000000000000000000000000..a21919e4374421c87274b47cf1897e6acfc8e944 GIT binary patch literal 159776 zcmeI5&yHMY5yg8vF$@U-CoBlzpO@JnLK282HbIJ!VUeID5?%laQX+!H0>z04uwfZk z`4PNi=|`|wv5a^EKZ4&N%$@K4EYF!+T~)XHo9^)~N1EfRI$u@yxpk|0#+mMa-gy1z zPe1wmldIv+(b4LO)s>UK^5u*1&(%L)I&onWe{cND!HA!Y`0qwv9i05PT0J-VpBeKj z@13`8} ztIt-SwD_Ixt>@kUVY?BY`~Fy9gty;#^Yw$(;lufRckEa013fpk>FVm(-|)2GnZut+ zK7G8WAI-(J4px_sx6|>TQILaC(Sw8L_|NDE50CfU;JR`wXw&qy;q-BqqZ9w7OP5Yg z9F2Jn5B~9$6T<4s*BbKmHhpPZ9gEbdC zf7Q)N?dfU7OXu^|IXaJ(r*k)IbA2^m)Q;9sUe#@VRXR`Wp#2iH>8sv*PxDi^|IZn2 zk3S*07(Zv|ug9Gqv<@mg1|1F$?*6p&7=C8gegJiNaPJqT$D?|D{_BgRM|-TR)yE&) z`uN?CKDhPCpFVzf__v=&?H`H#lGrbc{fgK>7W*e+zj|!*`k0?@9=j(0^q+V4KWK$7 z#=PU-P5AjN`rRAUSy`EFo7yXA=L;psex+Nv}8q8P2CysF!7 zmeP4z2kn=rt$o#N?`dA@f*S8->pq+q!E0U2+quZ|U0Y-#IJX|>s_ulS6E5_ci|74w z1|BEZH##5TMdyUYtGca;(9=Rozo@@-UWU%S_MYagJvcG^*SMIobCLUZO^M0S+UEe_g?YC(ESUlzi`>8iR)h^?Tx}zBe=4=+Z zf2wXw>a4!-cdc{##q-#6e3ZrG&!RqGy+1wMx-UKd(&`?K7mNGX(^cc1o>qKmI`7N# z)$gAyd;BHW*}q?+&$svfBY*9|N2jm<_v~DTuh-YFbech)Ebh8)?ZI{(bq}a*&3(_W z*H`m;wwK?E*B*>rSM;}Z^!&TJzFODXd+OiT9z>VZQ_#1v^AWyYC%@8Z26?hrbx-T) zSy_GI?^6Yu$HoX4NVT|eKY+`r7pr99v2E4@S4+u8dTe(AhkSB;`u7E5PUa=o3^ z7d|@I9$c?4QFO~9I-mDGsy)b+i(&SLb=N`X?K6etBsfl{CpCpgSpagLwsWRagQ z&c$p!_;c%Vjyzf9d=}^af1*6TY(3?ZCySaVYiH}h&(={sd9tW^vNk{ejw1j6Wt4C6 z&TIcsr}p6bd8|?9>|E4*QG0GZ&PD4ekMnb}_Fz-gn_t=aqqUB&dbu7|9_@8YG3u< zf8^J??drzA&V9XJb>kWj%+k5(<>{b(bzZMyt8TkKQN5$z7bvfE-i^+^_aFK7`m(2b zqt`W$%oul&+UUW_|B+woHZ|j4y*X^ft8QH5fobXJ`IoM9bUwn1&I#qI z&g6@t=8M`@x7|mj^Ry1pK2o0cRqy>reyz)%>Wx~rsTu$3&0#BEb>kWjOiM@4zjU3W z^AVoTgQ%@KlP`+VI?AiM?LI1-%XWsWLio73I7FFkHJ$TuA@T+dSkI>OWbj#w>b?&|Y$geuHr+TB-)%ok$ z%-bwh-P(hL0#<)z`w71Gl{XhPo}G{It8VS1K>_V2Z2BaN`_fnJsotp1xA*=dzt*+p zlI3UqWU=bDxnb$5{;a<6m##B&D2mxR1z&ZJ)`Lf$ELPpxzvr=kd+$H;Yd^E6dZX6W z`Rm!t+bmYy+Jl1vR)1yt3BL9fdnSt6Jp{k%wtg<1r*)uDvbZmO#h&Vo`h0uuKk{o` zYc5%S=1&%@Zo6MBUDcn}7yi<9j?QIx**OJYbtYdF$&FYW)f@Fb>b?KS zuXU}tWcistS**HkZdkgiKdUeNrR!|XCCi`PfACdjE5GF@PZq0g?cej*zrFV#`L&B4`>1rD)*;$Q%G17LPxVHv+k5|!U+XqC<6pfwY{jc?T;qXh z>FD{Fu5)xg!qa&WwN+>GMKM}Oc~!UFN2T+$4%#nKTl=c_{v*HEWl!}+t=rU$fA!|D z6|cH+jR&Tsqvv0`&e8b@FFGfbr#h1_ikdHKSKW3WmCn;TMEgj2+E?tU-l%nZ??3Wu z-KJ*zt2c+Oc-4(-JTNUCJ^#{mj?PDTIuD|@>P)^UM(ZfA>bCo+be`5h`z2~?U-jO9 zj-W;~#RX48jz_fJq{7ct4Iv?Rh=Y;Z9XYxf+^F{5d+wP;%d0K~P zA1P1!iapgEwQleIM}Do_)Qo@i=CBp7x^ayMrlq6jU%JlG`3O(vLDW{A$rr_F9pzQs zb|00_(>iFsL~ZS>-usXIT9-Z58?~0U>{Mp}Q zhp#$Y`7J+rvRHN7ePrpX{;a<6m##B=syC|6z4ssawJvivip<+AR^8fzg928cW%~)f z_La^-&t~prvFg@78WgblE89=-_oc6T??3YE^JP!)`MSlYyY0d{$)?~M(yX``;YuuSLd&1GjFq4b!!g}3RwM> z?I-x!SJvLK{Op%3R^8Ummagj0>I=Vg-i^+^_aFK7`m(2bqt>2fym>W-sXV^K7nH7Wd`)vZs2ZKHuK^kNjF!=dWio zZ?jl+YYz?zSpAjlC-~Y|-h9${c22>sy0wo61+<^A>60w(OJDWgf8^Ka%bx0uTGyIO zmY?~P#j4xphNY|ev--kcy3Wj@C}!sreAPKx4<324Saoavp2z-WPxVIa=id8|{94!g zoRw|+t8Tl0x2>z@E1gT{K>_WHP3`KdW%X6>{YQT7EA~`x)ViB~9!vCm)ou4tRIjI2 zJ|pm=@3)~m)tP)z)O=C9>bCo+be`5B+DFRMzUsaI$gg$TQ@v5^=B9JK-?y^skGHk@=h}LGd+$H;Yu((msy!H; z)1`B+v!c2xPj%M$>)ASYqjuHZ&3yK*r^ch#Q+Z3jzH|3l_MGa>p6ZQSH#e747VsIJQU+F0hP5q180w$9zCt=HG9qw#1RNUptFcx81*-% z=ec_=droy`PxVHv%iN73^EQiBxAx$mfYoQ&euA%k<;_KnGxxGsb!#6D3TQuJXZs2M zzVuagpUvg#^Xv={J@{3( z_V0P@-?@7&drteAJ=GhvuFhZ2X5MD8>ee0{6tMa$+fVSduh=tDWWQvw>b8C^ou_rk z_7nVl>8tEMo6Fbd+k5|!U+Y?P$?`LQvRHN7{bK2={;a<6m#%YkF2iH~WKne{Ulgp3360tu0p3b6QvDuV*uFvsiU&4-N`g{gv$}_}W+2 zJhc4T`3S%2wtlvBRX@6Av2@;z&e?r7m#^11y2m!T0XZ3}@ zbe(nndUkd$!&jZXIvOWW7OU=V_JUqN&(2;~`1^8w=kB%aIeosH?ziZ9t-I;xu|&^P z_nFqlTmM`b>v2$*-!HRxQU8_Tx&33E+n2t|?z6dk zeZJ8>wkc2R^0_L#vFf(DQ95r|hw7_&de-MIt=DboeKmKlWzVV3o9?&hd9B;JA^uh0 z&m(GA-MGdBGpeidmaen)z9N=C`u-xyQ=P5+mS6Kl?W)`EBTHBHM|D-+(sj=6v$=fL zIl9L-->w_Rk!xhpn%n1(LPe%zVwx~*XnbZ zTHkwf$%^asZQ0^qt-PMU>c%x5n4X?id|!07=91;tITW?^`C9ocf3%MBs&4CNOIP)) zeo=erI$L{i+kVw~)9>@5=e2HbN?!aqS_V2>}Ve;Py1@y`_9Uzb)$Q1Q=ZmcKNZhLy?JQGtL|AX*Lf@*J^#{mW*$UQ z=Rwp~oul>OMe8W9>bAQE9X(Y2qW02tw)WPx{i^e(`z?B2>#m!{Q!#H2Tk)zprKat( zboBg7)44u(sr8Vz4~R$ifbz7j$QMP;7qzSIZm#dP_nnnbuWxjZZOYTS`up3S&F^bx zvFf&d_Vgv5)ffKKb@qP0S>ycvau!u*&Fk6Odhn}myN^75iKAN&*}?*={j3;$?|9C6nxd$%5V9}lf|lA`}aKdueGU9t!vFC%g_ADV%2T^Z0V~0tiJGzJm_4U;y*;4u=pImB)!ofr(Cg>fdflS- zzFc2xK3M+jynwII*UE4C$&0CMw3eXSRLhUQ-`?qcTsr$}b*RK!nR`!MbX;=IB>r3r7RbP1M zsJ|>pfw$jy^Yw$(;fj9w>~HIz{c~gZ#>@PM^E97+y7|g_-t_5<=RH_mZttBsz7fH} zsws7F&>S~M8NkEUl@tDzW8s_jubn*U%g5Gm;?ku{CkKzlJckE&e%VsoN;Rnf&ZSlwtUwPOX=NzbUuP%Ie{G0>7 z)`h1CA3rwdd3+kj29=LZKILI6a!%_ik2uKB*)1Mx}z_C zkUSvgLCz899C7^E#PMU32cBnVUtdwjLh4y@_0w*i;B#y1<#o@yP z@hfT^TT#zhHs^?AgX+gt^!Q#pn@{w7@Im~F#4S`lHhi_! z@5Pm`Hs?X&7QzGJVT0<&2H}DDLHrQ*RxV|3HI6VA{RvbTQo%7;) z4%<6NT#@sN77sssY(?T(BoF7*RzGpYD35dX`l>009_R<+2R%P=kaHk@5Wj`+KzP`S z#6jZNAbt-mzULeA?Nu260kw*9FIfmU~61$c%a6qi(iq)$9a%*#6ix(1Br7EBo5*S)lVM$ zAaywh5(n{v*r4(`2f`yi$oC2G@RQFv zr*S>cIs72`u`M3?;VTbY%I6$@cp!P;f%rKG>N#xAf%rlEp!&(Lar_{03*mwKJiNN(g-0Iz znor~KIIlK*5T4rjIZs_|5T3?0AAFE=@bH7gLFHL~>%8^&yu8FYk4=62AoW4`Am@m4 zjyQg7;`p)21JAShzMDMovB?AC2dRr~@rZ+*tJhaeadbgH5I?AXbi@zpb=33l@Pp*T zh6h@Hjgv?HdY;D#4;v&8Hasg2evtE=(>&Oq@;L{>BR^QLubMLGgf7_i`s%bk=t95I zSNK76B(8k?%I6$@cp!P;f%rKG>N#xAf%rlEp!&(Lar_{03*mwKJiNN(g-0Iznor~K zIIlK*5T4rjIZs_|5T3?0AAFE=@bH7gLFHL~>%8^&yu8FYk4=62AoW4`Am@m4jyQg7 P;`p)21JARwudn_O(iLX& literal 0 HcmV?d00001 diff --git a/tests/test_binning.py b/tests/test_binning.py index 77712205d..f9de81ee2 100644 --- a/tests/test_binning.py +++ b/tests/test_binning.py @@ -1,5 +1,7 @@ import pytest +import h5py +import os import numpy as np from scipy.spatial.distance import cdist @@ -13,6 +15,10 @@ ) from westpa.core.binning.assign import coord_dtype from westpa.core.binning.mab import MABBinMapper +from westpa.core.binning.mab import map_mab + + +REFERENCE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'refs') class TestRectilinearBinMapper: @@ -327,6 +333,102 @@ def test2dRectilinearRecursion(self): assert (assignments == expected).all() +@pytest.fixture() +def test_input_mab_data(): + # Create synthetic test data: a 2D grid of points with Gaussian weights distribution + N_point_2d = 15 + n_dim_2d = 2 + N_total_2d = N_point_2d**n_dim_2d + coords_2d = np.zeros((N_total_2d, 2)) + for i in range(N_total_2d): + coords_2d[i, 0] = i % N_point_2d + coords_2d[i, 1] = i // N_point_2d + coords_2d /= N_point_2d + + # generate weights as a 2D gaussian where the max is at the center of the 2D space + weights_2d = np.zeros(N_total_2d) + for i in range(N_total_2d): + weights_2d[i] = np.exp(-((coords_2d[i, 0] - 1 / 2) ** 2 + (coords_2d[i, 1] - 1 / 2) ** 2) / (1 / 2) ** 2) + weights_2d /= np.sum(weights_2d) + + # Tile coords + allcoords_2d_grid = np.ones((N_total_2d, 4)) + allcoords_2d_grid[:, :2] = coords_2d + allcoords_2d_grid[:, 2] = weights_2d + allcoords_2d_grid = np.tile(allcoords_2d_grid, (2, 1)) + allcoords_2d_grid[0:N_total_2d, 3] = 0 + + # Create deterministic 3D array of grid points, again with 3D Gaussian distribution of weights + N_point_3d = 15 + n_dim_3d = 3 + N_total_3d = N_point_3d**n_dim_3d + coords_3d = np.zeros((N_total_3d, 3)) + for i in range(N_total_3d): + coords_3d[i, 0] = i % N_point_3d + coords_3d[i, 1] = (i // N_point_3d) % N_point_3d + coords_3d[i, 2] = i // (N_point_3d**2) + coords_3d /= N_point_3d + + # Generate weights as a n_dim gaussian where the max is at the center of the 3D space + weights_3d = np.zeros(N_total_3d) + for i in range(N_total_3d): + weights_3d[i] = np.exp( + -((coords_3d[i, 0] - 1 / 2) ** 2 + (coords_3d[i, 1] - 1 / 2) ** 2 + (coords_3d[i, 2] - 1 / 2) ** 2) / (1 / 2) ** 2 + ) + weights_3d /= np.sum(weights_3d) + + # Tile coords + allcoords_3d_grid = np.ones((N_total_3d, n_dim_3d + 2)) + allcoords_3d_grid[:, :n_dim_3d] = coords_3d + allcoords_3d_grid[:, n_dim_3d] = weights_3d + allcoords_3d_grid = np.tile(allcoords_3d_grid, (2, 1)) + allcoords_3d_grid[0:N_total_3d, n_dim_3d + 1] = 0 + + # Lastly, test bin assignment with 2D coordinates and weights on pseudorandom deterministic Gaussian points + N_total_random = 300 + np.random.seed(0) + coords_random = np.random.normal(loc=[0.5, 0.5], scale=0.25, size=(N_total_random, n_dim_2d)) + + # Generate weights as a n_dim sine curve with given wavelength plus some deterministic noise + weights_random = np.zeros(N_total_random) + wavelength = 0.25 + noise_level = 0.1 + for i in range(n_dim_2d): + weights_random += np.sin(2 * np.pi * coords_random[:, i] / wavelength) + noise_level * np.cos( + 4 * 2 * np.pi * coords_random[:, i] / wavelength + ) + weights_random = np.abs(weights_random) / np.sum(np.abs(weights_random)) + + # Tile coords + allcoords_2d_gauss = np.ones((N_total_random, 4)) + allcoords_2d_gauss[:, :2] = coords_random + allcoords_2d_gauss[:, 2] = weights_random + allcoords_2d_gauss = np.tile(allcoords_2d_gauss, (2, 1)) + allcoords_2d_gauss[0:N_total_random, 3] = 0 + + return { + 'allcoords_2d_grid': allcoords_2d_grid, + 'allcoords_3d_grid': allcoords_3d_grid, + 'allcoords_2d_gauss': allcoords_2d_gauss, + } + + +@pytest.fixture() +def ref_mab_results(): + test_refs = {'2d_grid': [], '3d_grid': [], '2d_gauss': []} + with h5py.File(os.path.join(REFERENCE_PATH, 'mab_assignments_ref.h5'), 'r') as f: + n_tests_2d_grid = len(f['2d_grid'].keys()) + for i in range(n_tests_2d_grid): + test_refs['2d_grid'].append(f[f'2d_grid/test_result_{i:d}'][:]) + n_tests_3d_grid = len(f['3d_grid'].keys()) + for i in range(n_tests_3d_grid): + test_refs['3d_grid'].append(f[f'3d_grid/test_result_{i:d}'][:]) + n_tests_2d_gauss = len(f['2d_gauss'].keys()) + for i in range(n_tests_2d_gauss): + test_refs['2d_gauss'].append(f[f'2d_gauss/test_result_{i:d}'][:]) + return test_refs + + class TestMABBinMapper: def test_init(self): mab = MABBinMapper([5]) @@ -335,3 +437,221 @@ def test_init(self): def test_determine_total_bins(self): mab = MABBinMapper([5]) assert mab.determine_total_bins(nbins_per_dim=[5, 1], direction=[1, 86], skip=[0, 0], bottleneck=True) == 9 + assert mab.determine_total_bins(nbins_per_dim=[5, 5], direction=[0, 0], skip=[0, 0], bottleneck=True) == 33 + assert mab.determine_total_bins(nbins_per_dim=[5, 5, 5], direction=[0, 0, 0], skip=[0, 0, 0], bottleneck=True) == 137 + assert mab.determine_total_bins(nbins_per_dim=[5, 5], direction=[0, 0], skip=[1, 0], bottleneck=True) == 9 + assert mab.determine_total_bins(nbins_per_dim=[5, 1], direction=[1, 86], skip=[0, 0], bottleneck=False) == 6 + + @pytest.mark.parametrize( + "nbins_per_dim, direction, bottleneck, skip, ref_index", + [ + ([2, 2], [1, 1], False, [0, 0], 0), + ([2, 2], [0, 0], False, [0, 0], 1), + ([2, 2], [-1, -1], True, [0, 0], 2), + ([2, 2], [0, 0], True, [0, 0], 3), + ([2, 2], [0, 0], True, [0, 1], 4), + ([2, 2], [0, 0], True, [1, 1], 5), + ([2, 2], [86, 0], True, [0, 0], 6), + ([2, 2], [86, 86], False, [0, 0], 7), + ], + ) + def test_2d_grid_mab_bin_assignments( + self, test_input_mab_data, ref_mab_results, nbins_per_dim, direction, bottleneck, skip, ref_index + ): + allcoords = test_input_mab_data['allcoords_2d_grid'] + N_total = allcoords.shape[0] // 2 + mask = np.full((N_total * 2), True) + output = list(np.zeros((N_total * 2), dtype=int)) + output = map_mab( + coords=allcoords, + mask=mask, + output=output, + nbins_per_dim=nbins_per_dim, + direction=direction, + bottleneck=bottleneck, + skip=skip, + ) + assert output[:N_total] == output[N_total:], "Expected first half of bin assignments to equal second half" + assert output == list( + ref_mab_results['2d_grid'][ref_index] + ), f"Unexpected 2D grid MAB bin assignments with direction={direction}, bottleneck={bottleneck}, and skip={skip}" + + @pytest.mark.parametrize( + "nbins_per_dim, direction, bottleneck, skip, ref_index", + [ + ([2, 2, 2], [0, 0, 0], False, [0, 0, 0], 0), + ([2, 2, 2], [0, 0, 0], True, [0, 0, 0], 1), + ], + ) + def test_3d_grid_mab_bin_assignments( + self, test_input_mab_data, ref_mab_results, nbins_per_dim, direction, bottleneck, skip, ref_index + ): + allcoords = test_input_mab_data['allcoords_3d_grid'] + N_total = allcoords.shape[0] // 2 + mask = np.full((N_total * 2), True) + output = list(np.zeros((N_total * 2), dtype=int)) + output = map_mab( + coords=allcoords, + mask=mask, + output=output, + nbins_per_dim=nbins_per_dim, + direction=direction, + bottleneck=bottleneck, + skip=skip, + ) + assert output[:N_total] == output[N_total:], "Expected first half of bin assignments to equal second half" + assert output == list( + ref_mab_results['3d_grid'][ref_index] + ), f"Unexpected 3D grid MAB bin assignments with direction={direction}, bottleneck={bottleneck}, and skip={skip}" + + @pytest.mark.parametrize( + "nbins_per_dim, direction, bottleneck, skip, ref_index", + [ + ([2, 2], [0, 0], True, [0, 0], 0), + ([2, 2], [0, 0], True, [0, 1], 1), + ([2, 2], [86, -1], True, [0, 0], 2), + ], + ) + def test_2d_gaussian_mab_bin_assignments( + self, test_input_mab_data, ref_mab_results, nbins_per_dim, direction, bottleneck, skip, ref_index + ): + allcoords = test_input_mab_data['allcoords_2d_gauss'] + N_total = allcoords.shape[0] // 2 + mask = np.full((N_total * 2), True) + output = list(np.zeros((N_total * 2), dtype=int)) + output = map_mab( + coords=allcoords, + mask=mask, + output=output, + nbins_per_dim=nbins_per_dim, + direction=direction, + bottleneck=bottleneck, + skip=skip, + ) + assert output[:N_total] == output[N_total:], "Expected first half of bin assignments to equal second half" + assert output == list( + ref_mab_results['2d_gauss'][ref_index] + ), f"Unexpected 2D Gaussian MAB bin assignments with direction={direction}, bottleneck={bottleneck}, and skip={skip}" + + +if __name__ == "__main__": + import matplotlib.pyplot as plt + + # Run as a script to regenerate the MAB binning reference data and figures + # Comment out the fixture tag on test_input_mab_data() to do this + input_data = test_input_mab_data() + with h5py.File(os.path.join(REFERENCE_PATH, 'mab_assignments_ref.h5'), 'w') as f: + # 2D Grid + for i, (nbins_per_dim, direction, bottleneck, skip) in enumerate( + [ + ([2, 2], [1, 1], False, [0, 0]), + ([2, 2], [0, 0], False, [0, 0]), + ([2, 2], [-1, -1], True, [0, 0]), + ([2, 2], [0, 0], True, [0, 0]), + ([2, 2], [0, 0], True, [0, 1]), + ([2, 2], [0, 0], True, [1, 1]), + ([2, 2], [86, 0], True, [0, 0]), + ([2, 2], [86, 86], False, [0, 0]), + ] + ): + allcoords = input_data['allcoords_2d_grid'] + N_total = allcoords.shape[0] // 2 + mask = np.full((N_total * 2), True) + output = list(np.zeros((N_total * 2), dtype=int)) + output = map_mab( + coords=allcoords, + mask=mask, + output=output, + nbins_per_dim=nbins_per_dim, + direction=direction, + bottleneck=bottleneck, + skip=skip, + ) + + f.create_dataset(f'2d_grid/test_result_{i:d}', data=output) + + # Create a cmap with the same number of colors as the number of bins + cmap = plt.cm.get_cmap('tab20', int(np.max(output) + 1)) + + # Plot the synthetic data in 2D using a scatter plot + # Include a cbar to shown the bin assignments + plt.scatter( + allcoords[:N_total, 0], + allcoords[:N_total, 1], + s=allcoords[:N_total, 2] * 10000, + c=output[:N_total], + cmap=cmap, + vmin=-0.5, + vmax=int(np.max(output)) + 0.5, + ) + plt.colorbar(label='Bin ID', ticks=range(int(np.max(output) + 1))) + plt.title(f'nbins_per_dim={nbins_per_dim}, direction={direction},\n bottleneck={bottleneck}, skip={skip}') + plt.savefig(f'2d_grid_ref_result_{i}.png') + plt.clf() + + # 3D grid + for i, (nbins_per_dim, direction, bottleneck, skip) in enumerate( + [ + ([2, 2, 2], [0, 0, 0], False, [0, 0, 0]), + ([2, 2, 2], [0, 0, 0], True, [0, 0, 0]), + ] + ): + allcoords = input_data['allcoords_3d_grid'] + N_total = allcoords.shape[0] // 2 + mask = np.full((N_total * 2), True) + output = list(np.zeros((N_total * 2), dtype=int)) + output = map_mab( + coords=allcoords, + mask=mask, + output=output, + nbins_per_dim=nbins_per_dim, + direction=direction, + bottleneck=bottleneck, + skip=skip, + ) + + f.create_dataset(f'3d_grid/test_result_{i:d}', data=output) + + # 2D Gaussian + for i, (nbins_per_dim, direction, bottleneck, skip) in enumerate( + [ + ([2, 2], [0, 0], True, [0, 0]), + ([2, 2], [0, 0], True, [0, 1]), + ([2, 2], [86, -1], True, [0, 0]), + ] + ): + allcoords = input_data['allcoords_2d_gauss'] + N_total = allcoords.shape[0] // 2 + mask = np.full((N_total * 2), True) + output = list(np.zeros((N_total * 2), dtype=int)) + output = map_mab( + coords=allcoords, + mask=mask, + output=output, + nbins_per_dim=nbins_per_dim, + direction=direction, + bottleneck=bottleneck, + skip=skip, + ) + + f.create_dataset(f'2d_gauss/test_result_{i:d}', data=output) + + # Create a cmap with the same number of colors as the number of bins + cmap = plt.cm.get_cmap('tab20', int(np.max(output) + 1)) + + # Plot the synthetic data in 2D using a scatter plot + # Include a cbar to shown the bin assignments + plt.scatter( + allcoords[:N_total, 0], + allcoords[:N_total, 1], + s=allcoords[:N_total, 2] * 10000, + c=output[:N_total], + cmap=cmap, + vmin=-0.5, + vmax=int(np.max(output)) + 0.5, + ) + plt.colorbar(label='Bin ID', ticks=range(int(np.max(output) + 1))) + plt.title(f'nbins_per_dim={nbins_per_dim}, direction={direction},\n bottleneck={bottleneck}, skip={skip}') + plt.savefig(f'2d_gauss_ref_result_{i}.png') + plt.clf() + print("Reference data generated and saved to file.") From 33e6adbd3219338a0b25f48395e53dc1a5603068 Mon Sep 17 00:00:00 2001 From: "Jeremy M. G. Leung" <63817169+jeremyleung521@users.noreply.github.com> Date: Mon, 16 Sep 2024 11:56:24 -0400 Subject: [PATCH 04/14] allow reruns on fluxanl test (#446) --- tests/test_tools/common.py | 5 +++++ tests/test_tools/test_w_fluxanl.py | 2 ++ 2 files changed, 7 insertions(+) diff --git a/tests/test_tools/common.py b/tests/test_tools/common.py index bb73340e4..0bb8d1eb2 100755 --- a/tests/test_tools/common.py +++ b/tests/test_tools/common.py @@ -1,5 +1,10 @@ '''Module of common tool test class''' +import sys +import pytest + +flaky_on_macos = pytest.mark.flaky(condition=sys.platform.startswith('darwin'), reruns=5, reason='flaky on macos') + # Placeholder class that will set all kwargs as attributes class MockArgs: diff --git a/tests/test_tools/test_w_fluxanl.py b/tests/test_tools/test_w_fluxanl.py index d28a9f246..97ba24443 100755 --- a/tests/test_tools/test_w_fluxanl.py +++ b/tests/test_tools/test_w_fluxanl.py @@ -3,6 +3,7 @@ import h5py import numpy +from common import flaky_on_macos from westpa.cli.tools.w_fluxanl import WFluxanlTool @@ -90,6 +91,7 @@ def check_output(self, outfile, **kwargs): class Test_W_Fluxanl_System: '''System level tests for w_fluxanl''' + @flaky_on_macos def test_alpha(self, ref_50iter): '''Confidence interval size decreases as alpha increases''' From d84dfffbcc0311e8ff2422d4ed326c5753d505d3 Mon Sep 17 00:00:00 2001 From: "Jeremy M. G. Leung" <63817169+jeremyleung521@users.noreply.github.com> Date: Mon, 16 Sep 2024 11:56:40 -0400 Subject: [PATCH 05/14] Clean up new MAB tests (#445) * refactor mab_tests * update the code for generating test data * docstring update * update docstrings * output test files in mab_tests --- tests/conftest.py | 15 ++- tests/refs/mab_assignments_ref.h5 | Bin 159776 -> 159776 bytes tests/test_binning.py | 161 +++++++++++++++++++----------- 3 files changed, 109 insertions(+), 67 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 7aff9291d..57d6be85f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,8 @@ import pytest import os import glob - -from shutil import copyfile, copy import tempfile +from shutil import copyfile, copy import westpa @@ -24,6 +23,12 @@ def copy_ref(dest_dir): copy(filename, dest_dir) +def clear_state(): + os.chdir(STARTING_PATH) + del os.environ['WEST_SIM_ROOT'] + westpa.rc = westpa.core._rc.WESTRC() + + @pytest.fixture def ref_3iter(request): """ @@ -224,9 +229,3 @@ def ref_executable(request, tmpdir): westpa.rc = westpa.core._rc.WESTRC() request.addfinalizer(clear_state) - - -def clear_state(): - os.chdir(STARTING_PATH) - del os.environ['WEST_SIM_ROOT'] - westpa.rc = westpa.core._rc.WESTRC() diff --git a/tests/refs/mab_assignments_ref.h5 b/tests/refs/mab_assignments_ref.h5 index a21919e4374421c87274b47cf1897e6acfc8e944..97682ba9b82c3bd665f16109612186f27d8d4595 100644 GIT binary patch delta 2930 zcmeHJy^7mF5aza82TZ0^cPdqGcd~u46#|DN*%Prz}jIjU+gj~E8_%L0o&~C1Xi!m%Y>T;06Kjd6)p_H@C?izJ; zQ*e6$d&Nt5G;d*wUO<;_<~xT@7bPBXmqWCfdWWVMv3VotOqm{l#USKvN849@2E)SL z2L(HZ=}IDy6$(%KZ7kd(XVq;t;J`(u5>B#S^MN0S-F)&b>cIMrn;!R{Vtyp8(8l{2 zW~)=pb7`zHibU+ESRlH!WiKlrMzQco_Z jrwB@ZKK`0uj7hI~Sv-^(cFd)`|DCwUPtgJ7l2qq!Uwlj~ delta 3215 zcmeHJy>1gh5SCX$S|eN9j_X*_;PSfIi3vwVh-~K+B$AE;iAYeOdjuCefXPZ8N|3taB+Kkd$aTN?d&(RDBFv&J^j|YFjqTrxo0BD_e~UB z*Gbhkw)I$kCg!PryO+jGwP)}LLs)fm5{2Y23QK20oV)X`6@LsR+0a->SSfa)ZSmPj z);E6s3L45CZ01S=LsImiuXwLZ5x1xNrdL}jCy=HhDZgNrCy-2s#(!Vh#$F_|57_Ct zHe|1D_zjO%yAZ_ygLI)dtS4$h5~{fA8suNVi7A6x^)b^=wb}S&<&?@Q3~)M{e}quN zm3V-_5MfxISV<;WNg2u-8ZK|Vbi^xiMM8w7~QuV+%*XUI*YKR9bp)wJ2>-Zu9%zaKF7 zWWg)oMy4N@Z72CBa7{J=|ICg^GYRjt`oy=yBXc0x7&L}Qs+7Z9b1k&svK#Y`JYTD@ z+!aefrMjaaJNkFfv{}RTk7RjFvngb&alOGyd}Mn16B8e@POKl~DR3?>YvUaMs&f7Y z=a5fSA=r~HC24KEr_PZBG#prL1XQYkC~O=sxQ1cLw#pe(YS2xXdC_5Qo$rlNK+w~O z6#}R{1+XY7TVU@RST9SK<6Fg=B}T4D=q%s=bGdFk3cor}?x9%UEGz#~tm7Od_~8pR zJEppWH~}5~RjEM<=%DoS-#bf&%QsQo(y$X^_vP7aRA>sfc_!OD(;of~I>Akzv6=h> D_BQ^r diff --git a/tests/test_binning.py b/tests/test_binning.py index f9de81ee2..533f98232 100644 --- a/tests/test_binning.py +++ b/tests/test_binning.py @@ -1,7 +1,7 @@ +import os import pytest import h5py -import os import numpy as np from scipy.spatial.distance import cdist @@ -14,8 +14,7 @@ RecursiveBinMapper, ) from westpa.core.binning.assign import coord_dtype -from westpa.core.binning.mab import MABBinMapper -from westpa.core.binning.mab import map_mab +from westpa.core.binning.mab import MABBinMapper, map_mab REFERENCE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'refs') @@ -333,8 +332,15 @@ def test2dRectilinearRecursion(self): assert (assignments == expected).all() -@pytest.fixture() -def test_input_mab_data(): +# Following section is for MAB Testing +@pytest.fixture(scope='class') +def gen_input_mab_data_fixture(request): + '''A fixture to assign `gen_input_mab_data`'s return as a class attribute for tests.''' + request.cls.input_mab_data = gen_input_mab_data() + + +def gen_input_mab_data(): + '''Function to generate test data for MABBinMapper Testing.''' # Create synthetic test data: a 2D grid of points with Gaussian weights distribution N_point_2d = 15 n_dim_2d = 2 @@ -386,8 +392,8 @@ def test_input_mab_data(): # Lastly, test bin assignment with 2D coordinates and weights on pseudorandom deterministic Gaussian points N_total_random = 300 - np.random.seed(0) - coords_random = np.random.normal(loc=[0.5, 0.5], scale=0.25, size=(N_total_random, n_dim_2d)) + rng = np.random.default_rng(seed=0) + coords_random = rng.normal(loc=[0.5, 0.5], scale=0.25, size=(N_total_random, n_dim_2d)) # Generate weights as a n_dim sine curve with given wavelength plus some deterministic noise weights_random = np.zeros(N_total_random) @@ -413,34 +419,38 @@ def test_input_mab_data(): } -@pytest.fixture() -def ref_mab_results(): - test_refs = {'2d_grid': [], '3d_grid': [], '2d_gauss': []} +@pytest.fixture(scope='class') +def ref_mab_results(request): + '''Function for reading the reference datasets from `refs/mab_assignments_ref.h5`.''' + # Setting up empty things dictionary with corresponding keys + request.cls.ref_mab_results = {} + test_keys = ['2d_grid', '3d_grid', '2d_gauss'] + + # Loading in the three sets of reference data from the reference file into a dictionary with h5py.File(os.path.join(REFERENCE_PATH, 'mab_assignments_ref.h5'), 'r') as f: - n_tests_2d_grid = len(f['2d_grid'].keys()) - for i in range(n_tests_2d_grid): - test_refs['2d_grid'].append(f[f'2d_grid/test_result_{i:d}'][:]) - n_tests_3d_grid = len(f['3d_grid'].keys()) - for i in range(n_tests_3d_grid): - test_refs['3d_grid'].append(f[f'3d_grid/test_result_{i:d}'][:]) - n_tests_2d_gauss = len(f['2d_gauss'].keys()) - for i in range(n_tests_2d_gauss): - test_refs['2d_gauss'].append(f[f'2d_gauss/test_result_{i:d}'][:]) - return test_refs + for key in test_keys: + request.cls.ref_mab_results[key] = [f[f'{key}/test_result_{i:d}'][...] for i in range(len(f[key]))] class TestMABBinMapper: - def test_init(self): - mab = MABBinMapper([5]) - assert mab.nbins == 9 - - def test_determine_total_bins(self): - mab = MABBinMapper([5]) - assert mab.determine_total_bins(nbins_per_dim=[5, 1], direction=[1, 86], skip=[0, 0], bottleneck=True) == 9 - assert mab.determine_total_bins(nbins_per_dim=[5, 5], direction=[0, 0], skip=[0, 0], bottleneck=True) == 33 - assert mab.determine_total_bins(nbins_per_dim=[5, 5, 5], direction=[0, 0, 0], skip=[0, 0, 0], bottleneck=True) == 137 - assert mab.determine_total_bins(nbins_per_dim=[5, 5], direction=[0, 0], skip=[1, 0], bottleneck=True) == 9 - assert mab.determine_total_bins(nbins_per_dim=[5, 1], direction=[1, 86], skip=[0, 0], bottleneck=False) == 6 + '''Test class for MABBinMapper''' + + @pytest.mark.parametrize( + 'nbins, direction, skip, bottleneck, ref_value', + [ + ([5, 1], [1, 86], [0, 0], True, 9), + ([5, 5], [0, 0], [0, 0], True, 33), + ([5, 5, 5], [0, 0, 0], [0, 0, 0], True, 137), + ([5, 5], [0, 0], [1, 0], True, 9), + ([5, 1], [1, 86], [0, 0], False, 6), + ], + ids=['5x1, direction=[1,86]', '5x5', '5x5x5', '5x5, skip=[1,0]', '5x1, direction=[1,86], no bottleneck'], + ) + def test_determine_total_bins(self, nbins, direction, skip, bottleneck, ref_value): + '''Runs through different configurations to see if mab reports the correct number of total bins''' + mab = MABBinMapper([5]) # Creating a dummy MABBinMapper + + assert mab.determine_total_bins(nbins_per_dim=nbins, direction=direction, skip=skip, bottleneck=bottleneck) == ref_value @pytest.mark.parametrize( "nbins_per_dim, direction, bottleneck, skip, ref_index", @@ -454,14 +464,25 @@ def test_determine_total_bins(self): ([2, 2], [86, 0], True, [0, 0], 6), ([2, 2], [86, 86], False, [0, 0], 7), ], + ids=[ + 'direction=[1,1], no bottleneck', + 'direction=[0,0], no bottleneck', + 'direction=[-1,-1]', + 'direction=[0,0]', + 'direction=[0,0], skip=[0,1]', + 'direction=[0,0], skip=[1,1]', + 'direction=[86,0]', + 'direction=[86,86], no bottleneck', + ], ) - def test_2d_grid_mab_bin_assignments( - self, test_input_mab_data, ref_mab_results, nbins_per_dim, direction, bottleneck, skip, ref_index + def test_2x2_2d_grid_mab_bin_assignments( + self, gen_input_mab_data_fixture, ref_mab_results, nbins_per_dim, direction, bottleneck, skip, ref_index ): - allcoords = test_input_mab_data['allcoords_2d_grid'] + '''Test MABBinMapper with 2x2 linear section on 2D space''' + allcoords = self.input_mab_data['allcoords_2d_grid'] N_total = allcoords.shape[0] // 2 mask = np.full((N_total * 2), True) - output = list(np.zeros((N_total * 2), dtype=int)) + output = np.zeros((N_total * 2), dtype=int) output = map_mab( coords=allcoords, mask=mask, @@ -471,9 +492,9 @@ def test_2d_grid_mab_bin_assignments( bottleneck=bottleneck, skip=skip, ) - assert output[:N_total] == output[N_total:], "Expected first half of bin assignments to equal second half" - assert output == list( - ref_mab_results['2d_grid'][ref_index] + assert np.all(output[:N_total] == output[N_total:]), "Expected first half of bin assignments to equal second half" + assert np.all( + output == self.ref_mab_results['2d_grid'][ref_index] ), f"Unexpected 2D grid MAB bin assignments with direction={direction}, bottleneck={bottleneck}, and skip={skip}" @pytest.mark.parametrize( @@ -482,11 +503,16 @@ def test_2d_grid_mab_bin_assignments( ([2, 2, 2], [0, 0, 0], False, [0, 0, 0], 0), ([2, 2, 2], [0, 0, 0], True, [0, 0, 0], 1), ], + ids=[ + 'no bottleneck', + 'with bottleneck', + ], ) - def test_3d_grid_mab_bin_assignments( - self, test_input_mab_data, ref_mab_results, nbins_per_dim, direction, bottleneck, skip, ref_index + def test_2x2x2_3d_grid_mab_bin_assignments( + self, gen_input_mab_data_fixture, ref_mab_results, nbins_per_dim, direction, bottleneck, skip, ref_index ): - allcoords = test_input_mab_data['allcoords_3d_grid'] + '''Test MABBinMapper with 2x2x2 linear section on 3D space''' + allcoords = self.input_mab_data['allcoords_3d_grid'] N_total = allcoords.shape[0] // 2 mask = np.full((N_total * 2), True) output = list(np.zeros((N_total * 2), dtype=int)) @@ -501,7 +527,7 @@ def test_3d_grid_mab_bin_assignments( ) assert output[:N_total] == output[N_total:], "Expected first half of bin assignments to equal second half" assert output == list( - ref_mab_results['3d_grid'][ref_index] + self.ref_mab_results['3d_grid'][ref_index] ), f"Unexpected 3D grid MAB bin assignments with direction={direction}, bottleneck={bottleneck}, and skip={skip}" @pytest.mark.parametrize( @@ -511,14 +537,20 @@ def test_3d_grid_mab_bin_assignments( ([2, 2], [0, 0], True, [0, 1], 1), ([2, 2], [86, -1], True, [0, 0], 2), ], + ids=[ + 'direction=[0,0], no skip', + 'direction=[0,0], skip=[0,1]', + 'direction=[86,-1], no skip', + ], ) - def test_2d_gaussian_mab_bin_assignments( - self, test_input_mab_data, ref_mab_results, nbins_per_dim, direction, bottleneck, skip, ref_index + def test_2x2_2d_gaussian_mab_bin_assignments( + self, gen_input_mab_data_fixture, ref_mab_results, nbins_per_dim, direction, bottleneck, skip, ref_index ): - allcoords = test_input_mab_data['allcoords_2d_gauss'] + '''Test MABBinMapper with 2x2 linear section on a 2D Gaussian space''' + allcoords = self.input_mab_data['allcoords_2d_gauss'] N_total = allcoords.shape[0] // 2 mask = np.full((N_total * 2), True) - output = list(np.zeros((N_total * 2), dtype=int)) + output = np.zeros((N_total * 2), dtype=int) output = map_mab( coords=allcoords, mask=mask, @@ -528,19 +560,30 @@ def test_2d_gaussian_mab_bin_assignments( bottleneck=bottleneck, skip=skip, ) - assert output[:N_total] == output[N_total:], "Expected first half of bin assignments to equal second half" - assert output == list( - ref_mab_results['2d_gauss'][ref_index] + assert np.all(output[:N_total] == output[N_total:]), "Expected first half of bin assignments to equal second half" + assert np.all( + output == self.ref_mab_results['2d_gauss'][ref_index] ), f"Unexpected 2D Gaussian MAB bin assignments with direction={direction}, bottleneck={bottleneck}, and skip={skip}" -if __name__ == "__main__": +def output_mab_reference(): + ''' + Function to generate the reference test files for the MAB tests. + + To run this, run `python -c 'from test_binning import output_mab_reference; output_mab_reference()'` + in the command line (assuming you're in the `tests/` folder). + + It will overwrite the file 'tests/refs/mab_assignments_ref.h5' and create 11 png files in `tests/` visualizing the binning. + ''' import matplotlib.pyplot as plt - # Run as a script to regenerate the MAB binning reference data and figures - # Comment out the fixture tag on test_input_mab_data() to do this - input_data = test_input_mab_data() - with h5py.File(os.path.join(REFERENCE_PATH, 'mab_assignments_ref.h5'), 'w') as f: + try: + os.mkdir('mab_tests') + except FileExistsError: + pass + + input_data = gen_input_mab_data() + with h5py.File(f'{REFERENCE_PATH}/mab_assignments_ref.h5', 'w') as f: # 2D Grid for i, (nbins_per_dim, direction, bottleneck, skip) in enumerate( [ @@ -557,7 +600,7 @@ def test_2d_gaussian_mab_bin_assignments( allcoords = input_data['allcoords_2d_grid'] N_total = allcoords.shape[0] // 2 mask = np.full((N_total * 2), True) - output = list(np.zeros((N_total * 2), dtype=int)) + output = np.zeros((N_total * 2), dtype=int) output = map_mab( coords=allcoords, mask=mask, @@ -586,7 +629,7 @@ def test_2d_gaussian_mab_bin_assignments( ) plt.colorbar(label='Bin ID', ticks=range(int(np.max(output) + 1))) plt.title(f'nbins_per_dim={nbins_per_dim}, direction={direction},\n bottleneck={bottleneck}, skip={skip}') - plt.savefig(f'2d_grid_ref_result_{i}.png') + plt.savefig(f'mab_tests/2d_grid_ref_result_{i}.png') plt.clf() # 3D grid @@ -599,7 +642,7 @@ def test_2d_gaussian_mab_bin_assignments( allcoords = input_data['allcoords_3d_grid'] N_total = allcoords.shape[0] // 2 mask = np.full((N_total * 2), True) - output = list(np.zeros((N_total * 2), dtype=int)) + output = np.zeros((N_total * 2), dtype=int) output = map_mab( coords=allcoords, mask=mask, @@ -623,7 +666,7 @@ def test_2d_gaussian_mab_bin_assignments( allcoords = input_data['allcoords_2d_gauss'] N_total = allcoords.shape[0] // 2 mask = np.full((N_total * 2), True) - output = list(np.zeros((N_total * 2), dtype=int)) + output = np.zeros((N_total * 2), dtype=int) output = map_mab( coords=allcoords, mask=mask, @@ -652,6 +695,6 @@ def test_2d_gaussian_mab_bin_assignments( ) plt.colorbar(label='Bin ID', ticks=range(int(np.max(output) + 1))) plt.title(f'nbins_per_dim={nbins_per_dim}, direction={direction},\n bottleneck={bottleneck}, skip={skip}') - plt.savefig(f'2d_gauss_ref_result_{i}.png') + plt.savefig(f'mab_tests/2d_gauss_ref_result_{i}.png') plt.clf() print("Reference data generated and saved to file.") From 16397a8598fc838f2f438daff3be360c4f7315ef Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 1 Oct 2024 11:01:08 -0400 Subject: [PATCH 06/14] Bump pypa/cibuildwheel from 2.20.0 to 2.21.1 (#447) Bumps [pypa/cibuildwheel](https://github.com/pypa/cibuildwheel) from 2.20.0 to 2.21.1. - [Release notes](https://github.com/pypa/cibuildwheel/releases) - [Changelog](https://github.com/pypa/cibuildwheel/blob/main/docs/changelog.md) - [Commits](https://github.com/pypa/cibuildwheel/compare/v2.20.0...v2.21.1) --- updated-dependencies: - dependency-name: pypa/cibuildwheel dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 0dfa08792..1cdd5b3f0 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -36,7 +36,7 @@ jobs: run: "brew install hdf5" - name: Build wheels - uses: pypa/cibuildwheel@v2.20.0 + uses: pypa/cibuildwheel@v2.21.1 env: CIBW_SKIP: "pp* *-musllinux*" CIBW_BUILD: "cp3${{ matrix.python-version }}-*" From e683bbcaa32c11bdc52b80cf1d31a26b6ca670df Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 10 Oct 2024 11:09:24 -0400 Subject: [PATCH 07/14] [pre-commit.ci] pre-commit autoupdate (#448) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/pre-commit-hooks: v4.6.0 → v5.0.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.6.0...v5.0.0) - [github.com/psf/black-pre-commit-mirror: 24.8.0 → 24.10.0](https://github.com/psf/black-pre-commit-mirror/compare/24.8.0...24.10.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 556856100..66798992f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v5.0.0 hooks: - id: end-of-file-fixer types: [python] @@ -8,7 +8,7 @@ repos: types: [python] - repo: https://github.com/psf/black-pre-commit-mirror - rev: 24.8.0 + rev: 24.10.0 hooks: - id: black language_version: python3 From 1af3dcf7c616a6f5c1f84034e49fd0a9e75609aa Mon Sep 17 00:00:00 2001 From: "Jeremy M. G. Leung" <63817169+jeremyleung521@users.noreply.github.com> Date: Mon, 14 Oct 2024 12:15:08 -0400 Subject: [PATCH 08/14] Dropping Python 3.8 Testing (#449) * drop python 3.8, EOL * Update cibuildwheel version * allow rerun for w_fluxanl test --- .github/workflows/build.yaml | 7 ++----- .github/workflows/test.yaml | 2 +- tests/test_tools/common.py | 2 ++ tests/test_tools/test_w_fluxanl.py | 4 ++-- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 1cdd5b3f0..0447b49db 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -24,10 +24,7 @@ jobs: strategy: matrix: os: ["ubuntu-latest", "macos-13", "macos-latest"] # macos-13 is x86_64, macos-latest is arm64 - python-version: [8, 9, 10, 11, 12] # sub-versions of Python - exclude: - - os: macos-14 - python-version: 8 + python-version: [9, 10, 11, 12] # sub-versions of Python steps: - uses: actions/checkout@v4 @@ -36,7 +33,7 @@ jobs: run: "brew install hdf5" - name: Build wheels - uses: pypa/cibuildwheel@v2.21.1 + uses: pypa/cibuildwheel@v2.21.2 env: CIBW_SKIP: "pp* *-musllinux*" CIBW_BUILD: "cp3${{ matrix.python-version }}-*" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f80221a9a..206f93d5d 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -35,7 +35,7 @@ jobs: strategy: matrix: os: ["ubuntu-latest", "macos-13", "macos-latest"] # macos-13 is x86-64, macos-latest is arm64 - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 diff --git a/tests/test_tools/common.py b/tests/test_tools/common.py index 0bb8d1eb2..c73e9b572 100755 --- a/tests/test_tools/common.py +++ b/tests/test_tools/common.py @@ -5,6 +5,8 @@ flaky_on_macos = pytest.mark.flaky(condition=sys.platform.startswith('darwin'), reruns=5, reason='flaky on macos') +flaky_on_all = pytest.mark.flaky(reruns=5, reason='flaky on all platforms') + # Placeholder class that will set all kwargs as attributes class MockArgs: diff --git a/tests/test_tools/test_w_fluxanl.py b/tests/test_tools/test_w_fluxanl.py index 97ba24443..60c46241c 100755 --- a/tests/test_tools/test_w_fluxanl.py +++ b/tests/test_tools/test_w_fluxanl.py @@ -3,7 +3,7 @@ import h5py import numpy -from common import flaky_on_macos +from common import flaky_on_all from westpa.cli.tools.w_fluxanl import WFluxanlTool @@ -91,7 +91,7 @@ def check_output(self, outfile, **kwargs): class Test_W_Fluxanl_System: '''System level tests for w_fluxanl''' - @flaky_on_macos + @flaky_on_all def test_alpha(self, ref_50iter): '''Confidence interval size decreases as alpha increases''' From d1a101001c4444edc9d09d109eeaa42a7f362f53 Mon Sep 17 00:00:00 2001 From: gma57 <140017072+gma57@users.noreply.github.com> Date: Fri, 25 Oct 2024 12:26:40 -0400 Subject: [PATCH 09/14] Allow WESS and WEED to detect when a simulation has been truncated (#452) * Modified 'wess' attr to a dataset, so additional iterations can be appended * added a check to w_truncate for wess history, which also truncates reweighting events after the cutoff iter * Walked back changes to w_truncate and added truncate logic to wess_driver.py and weed_driver.py * reinstated last_reweighting attribute to ensure backwards compatibility * removed reading in numpy array --- src/westpa/westext/weed/weed_driver.py | 10 ++++++++++ src/westpa/westext/wess/wess_driver.py | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/src/westpa/westext/weed/weed_driver.py b/src/westpa/westext/weed/weed_driver.py index 41ac6483e..d0c10fc3a 100644 --- a/src/westpa/westext/weed/weed_driver.py +++ b/src/westpa/westext/weed/weed_driver.py @@ -95,7 +95,15 @@ def prepare_new_iteration(self): with self.data_manager.lock: weed_global_group = self.data_manager.we_h5file.require_group('weed') + reweighting_history_dataset = weed_global_group.require_dataset( + 'reweighting_history', (1,), maxshape=(None,), dtype=int + ) last_reweighting = int(weed_global_group.attrs.get('last_reweighting', 0)) + if last_reweighting > n_iter: + last_reweighting = n_iter - 1 + reweighting_history = reweighting_history_dataset[:] + reweighting_history = reweighting_history[reweighting_history < n_iter] + reweighting_history_dataset.resize((reweighting_history.size), axis=0) if n_iter - last_reweighting < self.reweight_period: # Not time to reweight yet @@ -172,6 +180,8 @@ def prepare_new_iteration(self): for bin, newprob in zip(bins, binprobs): bin.reweight(newprob) + reweighting_history_dataset.resize((reweighting_history_dataset.shape[0] + 1), axis=0) + reweighting_history_dataset[-1] = n_iter weed_global_group.attrs['last_reweighting'] = n_iter assert ( diff --git a/src/westpa/westext/wess/wess_driver.py b/src/westpa/westext/wess/wess_driver.py index 9b523ea78..14c81c021 100644 --- a/src/westpa/westext/wess/wess_driver.py +++ b/src/westpa/westext/wess/wess_driver.py @@ -112,7 +112,15 @@ def prepare_new_iteration(self): with self.data_manager.lock: wess_global_group = self.data_manager.we_h5file.require_group('wess') + reweighting_history_dataset = wess_global_group.require_dataset( + 'reweighting_history', (1,), maxshape=(None,), dtype=int + ) last_reweighting = int(wess_global_group.attrs.get('last_reweighting', 0)) + if last_reweighting > n_iter: + last_reweighting = n_iter - 1 + reweighting_history = reweighting_history_dataset[:] + reweighting_history = reweighting_history[reweighting_history < n_iter] + reweighting_history_dataset.resize((reweighting_history.size), axis=0) if n_iter - last_reweighting < self.reweight_period: # Not time to reweight yet @@ -197,6 +205,8 @@ def prepare_new_iteration(self): if len(bin): bin.reweight(newprob) + reweighting_history_dataset.resize((reweighting_history_dataset.shape[0] + 1), axis=0) + reweighting_history_dataset[-1] = n_iter wess_global_group.attrs['last_reweighting'] = n_iter assert ( From 33b858190d952c4ad3f052d7771fe46d9c2c5805 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 1 Nov 2024 11:31:27 -0400 Subject: [PATCH 10/14] Bump pypa/cibuildwheel from 2.21.2 to 2.21.3 (#455) Bumps [pypa/cibuildwheel](https://github.com/pypa/cibuildwheel) from 2.21.2 to 2.21.3. - [Release notes](https://github.com/pypa/cibuildwheel/releases) - [Changelog](https://github.com/pypa/cibuildwheel/blob/main/docs/changelog.md) - [Commits](https://github.com/pypa/cibuildwheel/compare/v2.21.2...v2.21.3) --- updated-dependencies: - dependency-name: pypa/cibuildwheel dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 0447b49db..9be2acd4d 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -33,7 +33,7 @@ jobs: run: "brew install hdf5" - name: Build wheels - uses: pypa/cibuildwheel@v2.21.2 + uses: pypa/cibuildwheel@v2.21.3 env: CIBW_SKIP: "pp* *-musllinux*" CIBW_BUILD: "cp3${{ matrix.python-version }}-*" From 5cfa0382f0cdf3178f786d7f0a288c4081ccea0f Mon Sep 17 00:00:00 2001 From: "Jeremy M. G. Leung" <63817169+jeremyleung521@users.noreply.github.com> Date: Fri, 1 Nov 2024 12:32:47 -0400 Subject: [PATCH 11/14] RTD Fix (#454) * pin python 3.12 for rtd * add pytables and mdtraj to rtd env --- doc/doc_env.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/doc_env.yaml b/doc/doc_env.yaml index 345de8423..3f6619fe5 100644 --- a/doc/doc_env.yaml +++ b/doc/doc_env.yaml @@ -10,6 +10,8 @@ dependencies: - h5py - pyyaml - pyzmq + - pytables + - mdtraj - matplotlib-base - blessings - ipykernel From a51772e3b275e6318ce4b5e09b0b565bceaa641a Mon Sep 17 00:00:00 2001 From: "Jeremy M. G. Leung" <63817169+jeremyleung521@users.noreply.github.com> Date: Wed, 6 Nov 2024 12:44:50 -0500 Subject: [PATCH 12/14] Update AUTHORS (#458) Co-authored-by: Lillian Chong --- AUTHORS | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/AUTHORS b/AUTHORS index e09a816fb..9285afe2c 100644 --- a/AUTHORS +++ b/AUTHORS @@ -4,18 +4,18 @@ in collaboration with Daniel Zuckerman (zuckermd@ohsu.edu) The original version of WESTPA was written by Matthew Zwier (matthew.zwier@drake.edu) as part of his Ph.D. dissertation with Lillian Chong. -Other contributors are the following (In alphabetical order) +Other core contributors are the following (in alphabetical order) Joshua Adelman -Alex DeGrave -Page Harrison -Joseph Kaus -Patrick Pisciuneri -A. J. Pratt -Nicholas Rego +Anthony Bogetti +Jeremy Leung +AJ Pratt +John Russo Ali Saglam -David Wang +Jeff Thompson Kim Wong +Darian Yang +She Zhang The work manager interface is derived from the ``concurrent.futures`` module of Python 3.2 by Brian Quinlan, (C) 2011 the Python Software Foundation. From 991ea94614e7c0815de3eeb965776fa3bfe0c224 Mon Sep 17 00:00:00 2001 From: "Jeremy M. G. Leung" <63817169+jeremyleung521@users.noreply.github.com> Date: Fri, 8 Nov 2024 15:43:08 -0500 Subject: [PATCH 13/14] Fix w_init when using hdf5 framework with processes work manager (#457) * copy the bstates completely during pcoord calculation, required for processes work manager + hdf5 framework * update docstrings for get_bstate_pcoord --- src/westpa/core/sim_manager.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/westpa/core/sim_manager.py b/src/westpa/core/sim_manager.py index 8afeccba6..f45a17c7e 100644 --- a/src/westpa/core/sim_manager.py +++ b/src/westpa/core/sim_manager.py @@ -229,13 +229,15 @@ def report_bin_statistics(self, bins, target_states, save_summary=False): def get_bstate_pcoords(self, basis_states, label='basis'): '''For each of the given ``basis_states``, calculate progress coordinate values - as necessary. The HDF5 file is not updated.''' + as necessary. The HDF5 file is not updated. The BasisState objects are explicitly + copied from the futures in order to retain auxdata/restart files (under BasisState.data) + from certain work managers (e.g., the ``processes`` work manager.)''' self.rc.pstatus('Calculating progress coordinate values for {} states.'.format(label)) futures = [self.work_manager.submit(wm_ops.get_pcoord, args=(basis_state,)) for basis_state in basis_states] fmap = {future: i for (i, future) in enumerate(futures)} for future in self.work_manager.as_completed(futures): - basis_states[fmap[future]].pcoord = future.get_result().pcoord + basis_states[fmap[future]] = future.get_result() def report_basis_states(self, basis_states, label='basis'): pstatus = self.rc.pstatus From b7bf03d8d2455f95de151fc548ad6ea406327db7 Mon Sep 17 00:00:00 2001 From: "Jeremy M. G. Leung" <63817169+jeremyleung521@users.noreply.github.com> Date: Fri, 8 Nov 2024 15:43:38 -0500 Subject: [PATCH 14/14] GH Branches Automated Mirroring (#459) * automated mirroring and updates of tags after publishing * add GH TOKEN --- .github/workflows/build.yaml | 18 ++++++++++++++++++ .github/workflows/mirror.yaml | 22 ++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 .github/workflows/mirror.yaml diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 9be2acd4d..304ef8691 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -153,3 +153,21 @@ jobs: - uses: pypa/gh-action-pypi-publish@release/v1 # To test: #repository_url: https://test.pypi.org/legacy/ + + update-tags: + name: Update develop branch tags + needs: [upload_pypi] + runs-on: ubuntu-latest + # Assuming that since it has sucessfully uploaded to PyPI, this release is safe. + # We will now automatically update the tags for develop. + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: pulling tags + run: | + git checkout develop + git pull --tags origin westpa2 + git push origin develop --tags + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/mirror.yaml b/.github/workflows/mirror.yaml new file mode 100644 index 000000000..a3b06bb9c --- /dev/null +++ b/.github/workflows/mirror.yaml @@ -0,0 +1,22 @@ +name: Mirror develop to westpa2 + +on: + schedule: + # Once a month on the 28th day at 4 pm UTC, which is ~11 am EST. + - cron: "00 16 28 * *" + +jobs: + mirror-develop-to-westpa2: + runs-on: ubuntu-latest + name: Mirror develop branch to westpa2 branch + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: mirroring step + run: | + git checkout westpa2 + git pull --ff-only origin develop + git push origin westpa2 + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}