From edc42296d4be5bdc7500e1bfa58842633c9a5350 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Mon, 29 Mar 2021 11:05:12 -0600 Subject: [PATCH 01/12] added multi file h5 exclusion functionality to handler with tests --- reV/handlers/exclusions.py | 39 +++++++++++++--- reV/supply_curve/exclusions.py | 16 +++---- reV/utilities/exceptions.py | 6 +++ tests/test_handlers_exclusions.py | 74 +++++++++++++++++++++++++++++++ 4 files changed, 122 insertions(+), 13 deletions(-) diff --git a/reV/handlers/exclusions.py b/reV/handlers/exclusions.py index 5f11b33ac..9bacbbd40 100644 --- a/reV/handlers/exclusions.py +++ b/reV/handlers/exclusions.py @@ -6,10 +6,11 @@ import json import numpy as np -from reV.utilities.exceptions import HandlerKeyError +from reV.utilities.exceptions import HandlerKeyError, MultiFileExclusionError from rex.utilities.parse_keys import parse_keys from rex.resource import Resource +from rex.multi_file_resource import MultiFileResource logger = logging.getLogger(__name__) @@ -22,14 +23,26 @@ def __init__(self, h5_file, hsds=False): """ Parameters ---------- - h5_file : str - .h5 file containing exclusion layers and techmap + h5_file : str | list | tuple + .h5 file containing exclusion layers and techmap, + or a list of h5 files hsds : bool Boolean flag to use h5pyd to handle .h5 'files' hosted on AWS behind HSDS """ + self.h5_file = h5_file - self._h5 = Resource(h5_file, hsds=hsds) + + if isinstance(h5_file, str): + self._h5 = Resource(h5_file, hsds=hsds) + elif isinstance(h5_file, (list, tuple)): + self._h5 = MultiFileResource(h5_file, check_files=False) + self._preflight_multi_file() + else: + msg = ('Expected str, list, or tuple for h5_file input but ' + 'received {}'.format(type(h5_file))) + logger.error(msg) + raise TypeError(msg) self._iarr = None @@ -65,6 +78,22 @@ def __getitem__(self, keys): def __contains__(self, layer): return layer in self.layers + def _preflight_multi_file(self): + """Run simple multi-file exclusion checks.""" + lat_shape = self.h5.shapes['latitude'] + lon_shape = self.h5.shapes['longitude'] + for layer in self.layers: + lshape = self.h5.shapes[layer] + lshape = lshape[1:] if len(lshape) > 2 else lshape + if lshape != lon_shape or lshape != lat_shape: + msg = ('Shape of layer "{}" is {} which does not match ' + 'latitude and longitude shapes of {} and {}. ' + 'Check your exclusion file inputs: {}' + .format(layer, self.h5.shapes[layer], + lat_shape, lon_shape, self.h5._h5_files)) + logger.error(msg) + raise MultiFileExclusionError(msg) + def close(self): """ Close h5 instance @@ -78,7 +107,7 @@ def h5(self): Returns ------- - h5 : rex.Resource + h5 : rex.MultiFileResource | rex.Resource """ return self._h5 diff --git a/reV/supply_curve/exclusions.py b/reV/supply_curve/exclusions.py index 2ca32fae2..5145b384d 100644 --- a/reV/supply_curve/exclusions.py +++ b/reV/supply_curve/exclusions.py @@ -396,8 +396,8 @@ def __init__(self, excl_h5, layers=None, min_area=None, """ Parameters ---------- - excl_h5 : str - Path to exclusions .h5 file + excl_h5 : str | list | tuple + Path to one or more exclusions .h5 files layers : list | NoneType list of LayerMask instances for each exclusion layer to combine min_area : float | NoneType @@ -824,8 +824,8 @@ def run(cls, excl_h5, layers=None, min_area=None, Parameters ---------- - excl_h5 : str - Path to exclusions .h5 file + excl_h5 : str | list | tuple + Path to one or more exclusions .h5 files layers : list | NoneType list of LayerMask instances for each exclusion layer to combine min_area : float | NoneType @@ -857,8 +857,8 @@ def __init__(self, excl_h5, layers_dict=None, min_area=None, """ Parameters ---------- - excl_h5 : str - Path to exclusions .h5 file + excl_h5 : str | list | tuple + Path to one or more exclusions .h5 files layers_dict : dict | NoneType Dictionary of LayerMask arugments {layer: {kwarg: value}} min_area : float | NoneType @@ -890,8 +890,8 @@ def run(cls, excl_h5, layers_dict=None, min_area=None, Parameters ---------- - excl_h5 : str - Path to exclusions .h5 file + excl_h5 : str | list | tuple + Path to one or more exclusions .h5 files layers_dict : dict | NoneType Dictionary of LayerMask arugments {layer: {kwarg: value}} min_area : float | NoneType diff --git a/reV/utilities/exceptions.py b/reV/utilities/exceptions.py index 91cff8811..c483966f6 100644 --- a/reV/utilities/exceptions.py +++ b/reV/utilities/exceptions.py @@ -58,6 +58,12 @@ class HandlerValueError(Exception): """ +class MultiFileExclusionError(Exception): + """ + Error for bad multi file exclusion inputs. + """ + + class CollectionValueError(HandlerValueError): """ ValueError for collection handler. diff --git a/tests/test_handlers_exclusions.py b/tests/test_handlers_exclusions.py index b7b740a88..bf986898b 100644 --- a/tests/test_handlers_exclusions.py +++ b/tests/test_handlers_exclusions.py @@ -9,6 +9,10 @@ import pandas as pd from pandas.testing import assert_frame_equal import pytest +import tempfile +import shutil + +from reV.utilities.exceptions import MultiFileExclusionError from reV import TESTDATADIR from reV.handlers.exclusions import ExclusionLayers @@ -123,6 +127,76 @@ def test_shape(): assert np.allclose(truth, test) +@pytest.mark.parametrize(('layer', 'ds_slice'), [ + ('excl_test', None), + ('excl_test', (100, 100)), + ('excl_test', (slice(10, 100), slice(40, 50))), + ('ri_padus', (slice(None, 100), slice(None, 100))), + ('ri_srtm_slope', (100, 100)), + ('ri_srtm_slope', (slice(None, 100), slice(None, 100)))]) +def test_multi_h5(layer, ds_slice): + """Test the exclusion handler with multiple source files""" + excl_h5 = os.path.join(TESTDATADIR, 'ri_exclusions', 'ri_exclusions.h5') + with tempfile.TemporaryDirectory() as td: + excl_temp_1 = os.path.join(td, 'excl1.h5') + excl_temp_2 = os.path.join(td, 'excl2.h5') + shutil.copy(excl_h5, excl_temp_1) + shutil.copy(excl_h5, excl_temp_2) + + with h5py.File(excl_temp_1, 'a') as f: + shape = f['ri_srtm_slope'].shape + attrs = dict(f['ri_srtm_slope'].attrs) + test_dset = 'excl_test' + data = np.ones(shape) * 0.5 + f.create_dataset(test_dset, shape, data=data) + for k, v in attrs.items(): + f[test_dset].attrs[k] = v + + # make sure ri_srtm_slope can be pulled from the other file + del f['ri_srtm_slope'] + + fp_temp = excl_temp_1 + if layer == 'ri_srtm_slope': + fp_temp = excl_temp_2 + + with h5py.File(fp_temp, mode='r') as f: + truth = f[layer][0] + if ds_slice is not None: + truth = truth[ds_slice] + + with ExclusionLayers([excl_temp_1, excl_temp_2]) as f: + if ds_slice is None: + test = f[layer] + else: + keys = (layer,) + ds_slice + test = f[keys] + + assert np.allclose(truth, test) + + +def test_bad_multi_h5(): + """Test the exclusion handler with multiple source files and a poorly + shaped dataset""" + excl_h5 = os.path.join(TESTDATADIR, 'ri_exclusions', 'ri_exclusions.h5') + with tempfile.TemporaryDirectory() as td: + excl_temp_1 = os.path.join(td, 'excl1.h5') + excl_temp_2 = os.path.join(td, 'excl2.h5') + shutil.copy(excl_h5, excl_temp_1) + shutil.copy(excl_h5, excl_temp_2) + + with h5py.File(excl_temp_2, 'a') as f: + bad_dset = 'excl_test_bad' + bad_shape = (1, 100, 300) + attrs = dict(f['ri_srtm_slope'].attrs) + data = np.ones(bad_shape) + f.create_dataset(bad_dset, bad_shape, data=data) + for k, v in attrs.items(): + f[bad_dset].attrs[k] = v + + with pytest.raises(MultiFileExclusionError): + ExclusionLayers([excl_temp_1, excl_temp_2]) + + def execute_pytest(capture='all', flags='-rapP'): """Execute module as pytest with detailed summary report. From f5a43d93150e36be0586a8971ea472da6297115e Mon Sep 17 00:00:00 2001 From: grantbuster Date: Mon, 29 Mar 2021 11:33:01 -0600 Subject: [PATCH 02/12] added multi file exclusion input to aggregation modules --- reV/supply_curve/aggregation.py | 50 ++++++++++++--------- reV/supply_curve/points.py | 16 +++---- reV/supply_curve/sc_aggregation.py | 30 ++++++++----- tests/test_supply_curve_sc_aggregation.py | 53 ++++++++++++++++++++--- 4 files changed, 104 insertions(+), 45 deletions(-) diff --git a/reV/supply_curve/aggregation.py b/reV/supply_curve/aggregation.py index 398cd9916..e2812694d 100644 --- a/reV/supply_curve/aggregation.py +++ b/reV/supply_curve/aggregation.py @@ -33,8 +33,9 @@ def __init__(self, excl_fpath, excl_dict=None, area_filter_kernel='queen', """ Parameters ---------- - excl_fpath : str - Filepath to exclusions h5 with techmap dataset. + excl_fpath : str | list | tuple + Filepath to exclusions h5 with techmap dataset + (can be one or more filepaths). excl_dict : dict, optional Dictionary of exclusion LayerMask arugments {layer: {kwarg: value}} by default None @@ -93,8 +94,9 @@ def __init__(self, excl_fpath, h5_fpath, excl_dict=None, """ Parameters ---------- - excl_fpath : str - Filepath to exclusions h5 with techmap dataset. + excl_fpath : str | list | tuple + Filepath to exclusions h5 with techmap dataset + (can be one or more filepaths). h5_fpath : str Filepath to .h5 file to be aggregated excl_dict : dict, optional @@ -141,8 +143,9 @@ def __init__(self, excl_fpath, tm_dset, excl_dict=None, """ Parameters ---------- - excl_fpath : str - Filepath to exclusions h5 with techmap dataset. + excl_fpath : str | list | tuple + Filepath to exclusions h5 with techmap dataset + (can be one or more filepaths). tm_dset : str Dataset name in the techmap file containing the exclusions-to-resource mapping data. @@ -216,8 +219,9 @@ def _get_excl_area(excl_fpath, excl_area=None): Parameters ---------- - excl_fpath : str - Filepath to exclusions h5 with techmap dataset. + excl_fpath : str | list | tuple + Filepath to exclusions h5 with techmap dataset + (can be one or more filepaths). excl_area : float, optional Area of an exclusion pixel in km2. None will try to infer the area from the profile transform attribute in excl_fpath, by default None @@ -250,8 +254,9 @@ def _extract_inclusion_mask(excl_fpath, tm_dset, excl_dict=None, Parameters ---------- - excl_fpath : str - Filepath to exclusions h5 with techmap dataset. + excl_fpath : str | list | tuple + Filepath to exclusions h5 with techmap dataset + (can be one or more filepaths). tm_dset : str Dataset name in the techmap file containing the exclusions-to-resource mapping data. @@ -428,8 +433,9 @@ def run_serial(cls, sc_point_method, excl_fpath, tm_dset, ---------- sc_point_method : method Supply Curve Point Method to operate on a single SC point. - excl_fpath : str - Filepath to exclusions h5 with techmap dataset. + excl_fpath : str | list | tuple + Filepath to exclusions h5 with techmap dataset + (can be one or more filepaths). tm_dset : str Dataset name in the exclusions file containing the exclusions-to-resource mapping data. @@ -658,8 +664,9 @@ def run(cls, excl_fpath, tm_dset, sc_point_method, excl_dict=None, Parameters ---------- - excl_fpath : str - Filepath to exclusions h5 with techmap dataset. + excl_fpath : str | list | tuple + Filepath to exclusions h5 with techmap dataset + (can be one or more filepaths). tm_dset : str Dataset name in the techmap file containing the exclusions-to-resource mapping data. @@ -728,8 +735,9 @@ def __init__(self, excl_fpath, h5_fpath, tm_dset, *agg_dset, """ Parameters ---------- - excl_fpath : str - Filepath to exclusions h5 with techmap dataset. + excl_fpath : str | list | tuple + Filepath to exclusions h5 with techmap dataset + (can be one or more filepaths). h5_fpath : str Filepath to .h5 file to aggregate tm_dset : str @@ -813,8 +821,9 @@ def run_serial(cls, excl_fpath, h5_fpath, tm_dset, *agg_dset, Parameters ---------- - excl_fpath : str - Filepath to exclusions h5 with techmap dataset. + excl_fpath : str | list | tuple + Filepath to exclusions h5 with techmap dataset + (can be one or more filepaths). h5_fpath : str Filepath to .h5 file to aggregate tm_dset : str @@ -1116,8 +1125,9 @@ def run(cls, excl_fpath, h5_fpath, tm_dset, *agg_dset, Parameters ---------- - excl_fpath : str - Filepath to exclusions h5 with techmap dataset. + excl_fpath : str | list | tuple + Filepath to exclusions h5 with techmap dataset + (can be one or more filepaths). h5_fpath : str Filepath to .h5 file to aggregate tm_dset : str diff --git a/reV/supply_curve/points.py b/reV/supply_curve/points.py index 4c1f431dc..9ef1d12e2 100644 --- a/reV/supply_curve/points.py +++ b/reV/supply_curve/points.py @@ -164,8 +164,8 @@ def __init__(self, gid, excl, tm_dset, excl_dict=None, inclusion_mask=None, ---------- gid : int gid for supply curve point to analyze. - excl : str | ExclusionMask - Filepath to exclusions h5 or ExclusionMask file handler. + excl : str | list | tuple | ExclusionMask + Filepath(s) to exclusions h5 or ExclusionMask file handler. tm_dset : str Dataset name in the exclusions file containing the exclusions-to-resource mapping data. @@ -229,14 +229,14 @@ def _parse_excl_file(excl): Returns ------- - excl_fpath : str - Filepath for exclusions file + excl_fpath : str | list | tuple + Filepath(s) for exclusions file exclusions : ExclusionMask | None Exclusions mask if input is already an open handler or None if it is to be lazy instantiated. """ - if isinstance(excl, str): + if isinstance(excl, (str, list, tuple)): excl_fpath = excl exclusions = None elif isinstance(excl, ExclusionMask): @@ -1208,8 +1208,8 @@ def __init__(self, f_excl, resolution=64): """ Parameters ---------- - f_excl : str | ExclusionLayers - File path to the exclusions grid, or pre-initialized + f_excl : str | list | tuple | ExclusionLayers + File path(s) to the exclusions grid, or pre-initialized ExclusionLayers. The exclusions dictate the SC analysis extent. resolution : int Number of exclusion points per SC point along an axis. @@ -1225,7 +1225,7 @@ def __init__(self, f_excl, resolution=64): 'an integer but received: {}' .format(type(resolution))) - if isinstance(f_excl, str): + if isinstance(f_excl, (str, list, tuple)): self._excl_fpath = f_excl self._excls = ExclusionLayers(f_excl) elif isinstance(f_excl, ExclusionLayers): diff --git a/reV/supply_curve/sc_aggregation.py b/reV/supply_curve/sc_aggregation.py index ef7486c1d..91ac5dfe4 100644 --- a/reV/supply_curve/sc_aggregation.py +++ b/reV/supply_curve/sc_aggregation.py @@ -53,8 +53,9 @@ def __init__(self, excl_fpath, gen_fpath, econ_fpath=None, """ Parameters ---------- - excl_fpath : str - Filepath to exclusions h5 with techmap dataset. + excl_fpath : str | list | tuple + Filepath to exclusions h5 with techmap dataset + (can be one or more filepaths). gen_fpath : str Filepath to .h5 reV generation output results. econ_fpath : str | None @@ -414,8 +415,9 @@ def _nearest_sc_points(excl_fpath, resolution, lat_lons): Parameters ---------- - excl_fpath : str - Filepath to exclusions h5 with techmap dataset. + excl_fpath : str | list | tuple + Filepath to exclusions h5 with techmap dataset + (can be one or more filepaths). resolution : int SC resolution lat_lons : ndarray @@ -450,8 +452,9 @@ def run(cls, summary, handler, excl_fpath, res_data, res_class_bins, List of dictionaries, each being an onshore SC point summary. handler : SupplyCurveAggFileHandler Instantiated SupplyCurveAggFileHandler. - excl_fpath : str - Filepath to exclusions h5 with techmap dataset. + excl_fpath : str | list | tuple + Filepath to exclusions h5 with techmap dataset + (can be one or more filepaths). res_data : np.ndarray | None Extracted resource data from res_class_dset res_class_bins : list @@ -690,8 +693,9 @@ def __init__(self, excl_fpath, gen_fpath, tm_dset, econ_fpath=None, """ Parameters ---------- - excl_fpath : str - Filepath to exclusions h5 with techmap dataset. + excl_fpath : str | list | tuple + Filepath to exclusions h5 with techmap dataset + (can be one or more filepaths). gen_fpath : str Filepath to .h5 reV generation output results. tm_dset : str @@ -980,8 +984,9 @@ def run_serial(cls, excl_fpath, gen_fpath, tm_dset, gen_index, Parameters ---------- - excl_fpath : str - Filepath to exclusions h5 with techmap dataset. + excl_fpath : str | list | tuple + Filepath to exclusions h5 with techmap dataset + (can be one or more filepaths). gen_fpath : str Filepath to .h5 reV generation output results. tm_dset : str @@ -1445,8 +1450,9 @@ def summary(cls, excl_fpath, gen_fpath, tm_dset, econ_fpath=None, Parameters ---------- - excl_fpath : str - Filepath to exclusions h5 with techmap dataset. + excl_fpath : str | list | tuple + Filepath to exclusions h5 with techmap dataset + (can be one or more filepaths). gen_fpath : str Filepath to .h5 reV generation output results. tm_dset : str diff --git a/tests/test_supply_curve_sc_aggregation.py b/tests/test_supply_curve_sc_aggregation.py index 796125eb8..81f8d1f81 100644 --- a/tests/test_supply_curve_sc_aggregation.py +++ b/tests/test_supply_curve_sc_aggregation.py @@ -11,6 +11,9 @@ import pandas as pd from pandas.testing import assert_frame_equal import pytest +import tempfile +import shutil +import h5py from reV.supply_curve.sc_aggregation import SupplyCurveAggregation from reV import TESTDATADIR @@ -38,7 +41,7 @@ RTOL = 0.001 -def test_aggregation_extent(resolution=64): +def test_agg_extent(resolution=64): """Get the SC points aggregation summary and test that there are expected columns and that all resource gids were found""" @@ -82,7 +85,7 @@ def test_parallel_agg(resolution=64): assert all(summary_serial == summary_parallel) -def test_aggregation_summary(): +def test_agg_summary(): """Test the aggregation summary method against a baseline file.""" s = SupplyCurveAggregation.summary(EXCL, GEN, TM_DSET, @@ -106,6 +109,46 @@ def test_aggregation_summary(): assert_frame_equal(s, s_baseline, check_dtype=False, rtol=0.0001) +def test_multi_file_excl(): + """Test sc aggregation with multple exclusion file inputs.""" + + excl_dict = {'ri_srtm_slope': {'inclusion_range': (None, 5), + 'exclude_nodata': True}, + 'ri_padus': {'exclude_values': [1], + 'exclude_nodata': True}, + 'excl_test': {'include_values': [1], + 'weight': 0.5}, + } + + with tempfile.TemporaryDirectory() as td: + excl_temp_1 = os.path.join(td, 'excl1.h5') + excl_temp_2 = os.path.join(td, 'excl2.h5') + excl_files = (excl_temp_1, excl_temp_2) + shutil.copy(EXCL, excl_temp_1) + shutil.copy(EXCL, excl_temp_2) + + with h5py.File(excl_temp_1, 'a') as f: + shape = f['latitude'].shape + attrs = dict(f['ri_srtm_slope'].attrs) + data = np.ones(shape) + test_dset = 'excl_test' + f.create_dataset(test_dset, shape, data=data) + for k, v in attrs.items(): + f[test_dset].attrs[k] = v + del f['ri_srtm_slope'] + + summary = SupplyCurveAggregation.summary((excl_temp_1, excl_temp_2), + GEN, TM_DSET, + excl_dict=excl_dict, + res_class_dset=RES_CLASS_DSET, + res_class_bins=RES_CLASS_BINS, + ) + + s_baseline = pd.read_csv(AGG_BASELINE, index_col=0) + + assert np.allclose(summary['area_sq_km'] * 2, s_baseline['area_sq_km']) + + @pytest.mark.parametrize('pre_extract', (True, False)) def test_pre_extract_inclusions(pre_extract): """Test the aggregation summary w/ and w/out pre-extracting inclusions""" @@ -132,7 +175,7 @@ def test_pre_extract_inclusions(pre_extract): assert_frame_equal(s, s_baseline, check_dtype=False, rtol=0.0001) -def test_aggregation_gen_econ(): +def test_agg_gen_econ(): """Test the aggregation summary method with separate gen and econ input files.""" @@ -152,7 +195,7 @@ def test_aggregation_gen_econ(): assert_frame_equal(s1, s2) -def test_aggregation_extra_dsets(): +def test_agg_extra_dsets(): """Test aggregation with extra datasets to aggregate.""" h5_dsets = ['lcoe_fcr-2012', 'lcoe_fcr-2013', 'lcoe_fcr-stdev'] s = SupplyCurveAggregation.summary(EXCL, ONLY_GEN, TM_DSET, @@ -176,7 +219,7 @@ def test_aggregation_extra_dsets(): assert np.allclose(avg.values, s['mean_lcoe'].values) -def test_aggregation_scalar_excl(): +def test_agg_scalar_excl(): """Test the aggregation summary with exclusions of 0.5""" gids_subset = list(range(0, 20)) From 2a440c4561b911b8eb15e45d5a662a872964f073 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Mon, 29 Mar 2021 11:44:37 -0600 Subject: [PATCH 03/12] allowed for excl_fpath input to sc agg cli to be str or list --- reV/config/supply_curve_configs.py | 12 ++---------- reV/supply_curve/cli_sc_aggregation.py | 7 ++++--- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/reV/config/supply_curve_configs.py b/reV/config/supply_curve_configs.py index 372fc98ed..b7efa29bd 100644 --- a/reV/config/supply_curve_configs.py +++ b/reV/config/supply_curve_configs.py @@ -54,16 +54,8 @@ def _sc_agg_preflight(self): @property def excl_fpath(self): - """Get the exclusions filepath""" - - fpath = self['excl_fpath'] - - if fpath == 'PIPELINE': - fpath = Pipeline.parse_previous( - self.dirout, 'aggregation', target='fpath', - target_module='exclusions')[0] - - return fpath + """Get the exclusions filepath(s)""" + return self['excl_fpath'] @property def gen_fpath(self): diff --git a/reV/supply_curve/cli_sc_aggregation.py b/reV/supply_curve/cli_sc_aggregation.py index e6875166c..71018ed6f 100644 --- a/reV/supply_curve/cli_sc_aggregation.py +++ b/reV/supply_curve/cli_sc_aggregation.py @@ -18,7 +18,7 @@ from rex.utilities.hpc import SLURM from rex.utilities.cli_dtypes import (STR, INT, FLOAT, STRLIST, FLOATLIST, - STRFLOAT) + STRFLOAT, STR_OR_LIST) from rex.utilities.loggers import init_mult from rex.utilities.utilities import dict_str_load, get_class_properties @@ -169,8 +169,9 @@ def from_config(ctx, config_file, verbose): @main.group(invoke_without_command=True) -@click.option('--excl_fpath', '-exf', type=STR, required=True, - help='Exclusions file (.h5).') +@click.option('--excl_fpath', '-exf', type=STR_OR_LIST, required=True, + help='Single exclusions file (.h5) or a ' + 'list of exclusion files (.h5, .h5).') @click.option('--gen_fpath', '-gf', type=STR, required=True, help='reV generation/econ output file.') @click.option('--tm_dset', '-tm', type=STR, required=True, From 7da03e576a48b7c9f6fbaa50cf48535938c7de04 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Mon, 29 Mar 2021 11:55:13 -0600 Subject: [PATCH 04/12] added pre-check of missing layers to exclusionmask init --- reV/supply_curve/exclusions.py | 41 ++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/reV/supply_curve/exclusions.py b/reV/supply_curve/exclusions.py index 5145b384d..5b22295ed 100644 --- a/reV/supply_curve/exclusions.py +++ b/reV/supply_curve/exclusions.py @@ -51,7 +51,7 @@ def __init__(self, layer, inclusion_range=(None, None), Nodata value for the layer. If None, it will be infered when LayerMask is added to ExclusionMask """ - self._layer = layer + self._name = layer self._inclusion_range = inclusion_range self._exclude_values = exclude_values self._inclusion_weights = inclusion_weights @@ -77,7 +77,7 @@ def __init__(self, layer, inclusion_range=(None, None), def __repr__(self): msg = ("{} for {} exclusion, of type {}" - .format(self.__class__.__name__, self.layer, self.mask_type)) + .format(self.__class__.__name__, self.name, self.mask_type)) return msg @@ -93,15 +93,15 @@ def __getitem__(self, data): return self._apply_mask(data) @property - def layer(self): + def name(self): """ Layer name to extract from exclusions .h5 file Returns ------- - _layer : str + _name : str """ - return self._layer + return self._name @property def min_value(self): @@ -420,6 +420,14 @@ def __init__(self, excl_h5, layers=None, min_area=None, if not isinstance(layers, list): layers = [layers] + missing = [layer.name for layer in layers + if layer not in self.excl_layers] + if any(missing): + msg = ("ExclusionMask layers {} are missing from: {}" + .format(missing, self._excl_h5)) + logger.error(msg) + raise KeyError(msg) + for layer in layers: self.add_layer(layer) @@ -578,15 +586,14 @@ def add_layer(self, layer, replace=False): layer : LayerMask LayerMask instance to add to set of layers to be combined """ - layer_name = layer.layer - if layer_name not in self.excl_layers: - msg = "{} does not existin in {}".format(layer_name, self._excl_h5) + if layer.name not in self.excl_layers: + msg = "{} does not exist in {}".format(layer.name, self._excl_h5) logger.error(msg) - raise KeyError(layer_name) + raise KeyError(msg) - if layer_name in self.layer_names: - msg = "{} is already in {}".format(layer_name, self) + if layer.name in self.layer_names: + msg = "{} is already in {}".format(layer.name, self) if replace: msg += " replacing existing layer" logger.warning(msg) @@ -595,15 +602,15 @@ def add_layer(self, layer, replace=False): logger.error(msg) raise ExclusionLayerError(msg) - layer.nodata_value = self.excl_h5.get_nodata_value(layer_name) + layer.nodata_value = self.excl_h5.get_nodata_value(layer.name) if self._check_layers: - if not layer[self.excl_h5[layer_name]].any(): + if not layer[self.excl_h5[layer.name]].any(): msg = ("Layer {} does not have any un-excluded pixels!" - .format(layer_name)) + .format(layer.name)) logger.error(msg) raise ExclusionLayerError(msg) - self._layers[layer_name] = layer + self._layers[layer.name] = layer @property def nodata_lookup(self): @@ -749,7 +756,7 @@ def _force_include(self, mask, layers, ds_slice): exclusions mask. """ for layer in layers: - layer_slice = (layer.layer, ) + ds_slice + layer_slice = (layer.name, ) + ds_slice layer_mask = layer[self.excl_h5[layer_slice]] if mask is None: mask = layer_mask @@ -792,7 +799,7 @@ def _generate_mask(self, *ds_slice): logger.debug('Computing exclusions {} for {}' .format(layer, ds_slice)) log_mem(logger, log_level='DEBUG') - layer_slice = (layer.layer, ) + ds_slice + layer_slice = (layer.name, ) + ds_slice layer_mask = layer[self.excl_h5[layer_slice]] if mask is None: mask = layer_mask From b63d28042d1efe13e34b90833ffe116356222b06 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Mon, 29 Mar 2021 12:10:50 -0600 Subject: [PATCH 05/12] bug fixes --- reV/config/supply_curve_configs.py | 11 +++++++++-- reV/supply_curve/cli_sc_aggregation.py | 15 +++++++++++++-- reV/supply_curve/exclusions.py | 2 +- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/reV/config/supply_curve_configs.py b/reV/config/supply_curve_configs.py index b7efa29bd..834639e92 100644 --- a/reV/config/supply_curve_configs.py +++ b/reV/config/supply_curve_configs.py @@ -43,8 +43,15 @@ def __init__(self, config): def _sc_agg_preflight(self): """Perform pre-flight checks on the SC agg config inputs""" - with h5py.File(self.excl_fpath, mode='r') as f: - dsets = list(f) + dsets = [] + paths = self.excl_fpath + + if isinstance(self.excl_fpath, str): + paths = [self.excl_fpath] + + for fp in paths: + with h5py.File(fp, mode='r') as f: + dsets += list(f) if self.tm_dset not in dsets and self.res_fpath is None: raise ConfigError('Techmap dataset "{}" not found in exclusions ' diff --git a/reV/supply_curve/cli_sc_aggregation.py b/reV/supply_curve/cli_sc_aggregation.py index 71018ed6f..9770280e8 100644 --- a/reV/supply_curve/cli_sc_aggregation.py +++ b/reV/supply_curve/cli_sc_aggregation.py @@ -321,11 +321,22 @@ def direct(ctx, excl_fpath, gen_fpath, tm_dset, econ_fpath, res_fpath, init_mult(name, log_dir, modules=[__name__, 'reV', 'rex'], verbose=verbose) - with h5py.File(excl_fpath, mode='r') as f: - dsets = list(f) + dsets = [] + paths = excl_fpath + if isinstance(excl_fpath, str): + paths = [excl_fpath] + for fp in paths: + with h5py.File(fp, mode='r') as f: + dsets += list(f) if tm_dset in dsets: logger.info('Found techmap "{}".'.format(tm_dset)) + elif tm_dset not in dsets and not isinstance(excl_fpath, str): + msg = ('Could not find techmap dataset "{}" and cannot run ' + 'techmap with arbitrary multiple exclusion filepaths ' + 'to write to: {}'.format(tm_dset, excl_fpath)) + logger.error(msg) + raise RuntimeError(msg) else: logger.info('Could not find techmap "{}". Running techmap module.' .format(tm_dset)) diff --git a/reV/supply_curve/exclusions.py b/reV/supply_curve/exclusions.py index 5b22295ed..5caae1c90 100644 --- a/reV/supply_curve/exclusions.py +++ b/reV/supply_curve/exclusions.py @@ -421,7 +421,7 @@ def __init__(self, excl_h5, layers=None, min_area=None, layers = [layers] missing = [layer.name for layer in layers - if layer not in self.excl_layers] + if layer.name not in self.excl_layers] if any(missing): msg = ("ExclusionMask layers {} are missing from: {}" .format(missing, self._excl_h5)) From 272649f15507a44867946e3607d78a680ef455bc Mon Sep 17 00:00:00 2001 From: grantbuster Date: Mon, 29 Mar 2021 12:13:43 -0600 Subject: [PATCH 06/12] incremented rex requirement for str_or_list cli dtype import --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0cf9789ba..cc47b325b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ NREL-PySAM==2.1.4 -NREL-rex>=0.2.51 +NREL-rex>=0.2.52 packaging>=20.3 plotly>=4.7.1 plotting>=0.0.6 From c648856129099e7d387a1c94faa86740497d8b41 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Mon, 29 Mar 2021 13:20:54 -0600 Subject: [PATCH 07/12] new preflight check for multi file exclusion crs --- reV/handlers/exclusions.py | 26 ++++++++++++++++++++++++++ tests/test_handlers_exclusions.py | 24 +++++++++++++++++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/reV/handlers/exclusions.py b/reV/handlers/exclusions.py index 9bacbbd40..eb66b2e65 100644 --- a/reV/handlers/exclusions.py +++ b/reV/handlers/exclusions.py @@ -94,6 +94,32 @@ def _preflight_multi_file(self): logger.error(msg) raise MultiFileExclusionError(msg) + check_attrs = ('height', 'width', 'crs', 'transform') + base_profile = {} + for fp in self.h5_file: + with ExclusionLayers(fp) as f: + if not base_profile: + base_profile = f.profile + else: + for attr in check_attrs: + if attr not in base_profile or attr not in f.profile: + msg = ('Multi-file exclusion inputs from {} ' + 'dont have profiles with height, width, ' + 'crs, and transform: {} and {}' + .format(self.h5_file, base_profile, + f.profile)) + logger.error(msg) + raise MultiFileExclusionError(msg) + + if base_profile[attr] != f.profile[attr]: + msg = ('Multi-file exclusion inputs from {} ' + 'dont have matching "{}": {} and {}' + .format(self.h5_file, attr, + base_profile[attr], + f.profile[attr])) + logger.error(msg) + raise MultiFileExclusionError(msg) + def close(self): """ Close h5 instance diff --git a/tests/test_handlers_exclusions.py b/tests/test_handlers_exclusions.py index bf986898b..5193f437d 100644 --- a/tests/test_handlers_exclusions.py +++ b/tests/test_handlers_exclusions.py @@ -174,7 +174,7 @@ def test_multi_h5(layer, ds_slice): assert np.allclose(truth, test) -def test_bad_multi_h5(): +def test_multi_h5_bad_shape(): """Test the exclusion handler with multiple source files and a poorly shaped dataset""" excl_h5 = os.path.join(TESTDATADIR, 'ri_exclusions', 'ri_exclusions.h5') @@ -197,6 +197,28 @@ def test_bad_multi_h5(): ExclusionLayers([excl_temp_1, excl_temp_2]) +def test_multi_h5_bad_crs(): + """Test the exclusion handler with multiple source files and one file + with a bad crs attribute""" + excl_h5 = os.path.join(TESTDATADIR, 'ri_exclusions', 'ri_exclusions.h5') + with tempfile.TemporaryDirectory() as td: + excl_temp_1 = os.path.join(td, 'excl1.h5') + excl_temp_2 = os.path.join(td, 'excl2.h5') + shutil.copy(excl_h5, excl_temp_1) + shutil.copy(excl_h5, excl_temp_2) + + with h5py.File(excl_temp_2, 'a') as f: + attrs = dict(f.attrs) + attrs['profile'] = json.loads(attrs['profile']) + attrs['profile']['crs'] = 'bad_crs' + attrs['profile'] = json.dumps(attrs['profile']) + for k, v in attrs.items(): + f.attrs[k] = v + + with pytest.raises(MultiFileExclusionError): + ExclusionLayers([excl_temp_1, excl_temp_2]) + + def execute_pytest(capture='all', flags='-rapP'): """Execute module as pytest with detailed summary report. From d4d68dd7a9cb353b42d8bf469be8c34bd43929a5 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Mon, 29 Mar 2021 13:27:54 -0600 Subject: [PATCH 08/12] incremented version for multi exclusion fpath input feature --- reV/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reV/version.py b/reV/version.py index 0f7f08649..f852e72eb 100644 --- a/reV/version.py +++ b/reV/version.py @@ -2,4 +2,4 @@ reV Version number """ -__version__ = "0.4.54" +__version__ = "0.4.55" From 36d1cda05ca7fd40af67b3e4f540ff924d563e3e Mon Sep 17 00:00:00 2001 From: grantbuster Date: Mon, 29 Mar 2021 13:52:57 -0600 Subject: [PATCH 09/12] edits for PR review and one bug fix on frictionmask handler --- reV/config/supply_curve_configs.py | 10 ++++------ reV/supply_curve/exclusions.py | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/reV/config/supply_curve_configs.py b/reV/config/supply_curve_configs.py index 834639e92..1b3c6a71c 100644 --- a/reV/config/supply_curve_configs.py +++ b/reV/config/supply_curve_configs.py @@ -6,13 +6,13 @@ @author: gbuster """ -import h5py import os import logging from reV.utilities.exceptions import ConfigError, PipelineError from reV.config.base_analysis_config import AnalysisConfig from reV.pipeline.pipeline import Pipeline +from rex.multi_file_resource import MultiFileResource logger = logging.getLogger(__name__) @@ -43,15 +43,13 @@ def __init__(self, config): def _sc_agg_preflight(self): """Perform pre-flight checks on the SC agg config inputs""" - dsets = [] - paths = self.excl_fpath + paths = self.excl_fpath if isinstance(self.excl_fpath, str): paths = [self.excl_fpath] - for fp in paths: - with h5py.File(fp, mode='r') as f: - dsets += list(f) + with MultiFileResource(paths, check_files=False) as res: + dsets = res.datasets if self.tm_dset not in dsets and self.res_fpath is None: raise ConfigError('Techmap dataset "{}" not found in exclusions ' diff --git a/reV/supply_curve/exclusions.py b/reV/supply_curve/exclusions.py index 5caae1c90..28822be61 100644 --- a/reV/supply_curve/exclusions.py +++ b/reV/supply_curve/exclusions.py @@ -965,7 +965,7 @@ def _generate_mask(self, *ds_slice): if len(ds_slice) == 1 & isinstance(ds_slice[0], tuple): ds_slice = ds_slice[0] - layer_slice = (self._layers[self._fric_dset].layer, ) + ds_slice + layer_slice = (self._layers[self._fric_dset].name, ) + ds_slice mask = self._layers[self._fric_dset][self.excl_h5[layer_slice]] mask[(mask == self._layers[self._fric_dset].nodata_value)] = 1 From efe3fba9c5bb50ec17861b78a42e67ae576c466e Mon Sep 17 00:00:00 2001 From: grantbuster Date: Mon, 29 Mar 2021 14:49:04 -0600 Subject: [PATCH 10/12] turned check layers to false when pre extracting layers because loading the full layers takes forever and youll get an idea of percent inclusion after loading anyway --- reV/supply_curve/aggregation.py | 2 +- reV/supply_curve/exclusions.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/reV/supply_curve/aggregation.py b/reV/supply_curve/aggregation.py index e2812694d..513ed0ef3 100644 --- a/reV/supply_curve/aggregation.py +++ b/reV/supply_curve/aggregation.py @@ -279,7 +279,7 @@ def _extract_inclusion_mask(excl_fpath, tm_dset, excl_dict=None, logger.info('Pre-extracting full exclusion mask, this could take ' 'up to 30min for a large exclusion config...') with ExclusionMaskFromDict(excl_fpath, layers_dict=excl_dict, - check_layers=True, min_area=min_area, + check_layers=False, min_area=min_area, kernel=area_filter_kernel) as f: inclusion_mask = f.mask tm_mask = f._excl_h5[tm_dset] == -1 diff --git a/reV/supply_curve/exclusions.py b/reV/supply_curve/exclusions.py index 28822be61..322c694d7 100644 --- a/reV/supply_curve/exclusions.py +++ b/reV/supply_curve/exclusions.py @@ -783,6 +783,7 @@ def _generate_mask(self, *ds_slice): ("and" operation) such that 1 is included, 0 is excluded, 0.5 is half. """ + mask = None if len(ds_slice) == 1 & isinstance(ds_slice[0], tuple): ds_slice = ds_slice[0] From 602123f2099573c594e7cce35ed0f1c342feb947 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Mon, 29 Mar 2021 15:05:47 -0600 Subject: [PATCH 11/12] added a flag to check layers on full mask generation instead of duplicate check on preflight --- reV/supply_curve/aggregation.py | 2 +- reV/supply_curve/exclusions.py | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/reV/supply_curve/aggregation.py b/reV/supply_curve/aggregation.py index 513ed0ef3..20fd85fb1 100644 --- a/reV/supply_curve/aggregation.py +++ b/reV/supply_curve/aggregation.py @@ -281,7 +281,7 @@ def _extract_inclusion_mask(excl_fpath, tm_dset, excl_dict=None, with ExclusionMaskFromDict(excl_fpath, layers_dict=excl_dict, check_layers=False, min_area=min_area, kernel=area_filter_kernel) as f: - inclusion_mask = f.mask + inclusion_mask = f._generate_mask(..., check_layers=True) tm_mask = f._excl_h5[tm_dset] == -1 inclusion_mask[tm_mask] = 0 diff --git a/reV/supply_curve/exclusions.py b/reV/supply_curve/exclusions.py index 322c694d7..0260c2f3e 100644 --- a/reV/supply_curve/exclusions.py +++ b/reV/supply_curve/exclusions.py @@ -605,8 +605,7 @@ def add_layer(self, layer, replace=False): layer.nodata_value = self.excl_h5.get_nodata_value(layer.name) if self._check_layers: if not layer[self.excl_h5[layer.name]].any(): - msg = ("Layer {} does not have any un-excluded pixels!" - .format(layer.name)) + msg = ("Layer {} is fully excluded!".format(layer.name)) logger.error(msg) raise ExclusionLayerError(msg) @@ -765,7 +764,7 @@ def _force_include(self, mask, layers, ds_slice): return mask - def _generate_mask(self, *ds_slice): + def _generate_mask(self, *ds_slice, check_layers=False): """ Generate multiplicative inclusion mask from exclusion layers. @@ -775,6 +774,11 @@ def _generate_mask(self, *ds_slice): What to extract from ds, each arg is for a sequential axis. For example, (slice(0, 64), slice(0, 64)) will extract a 64x64 exclusions mask. + check_layers : bool + Check each layer as each layer is extracted to ensure they contain + un-excluded values. This should only really be True if ds_slice is + for the full inclusion mask. Otherwise, this could raise an error + for a fully excluded mask for just one excluded SC point. Returns ------- @@ -802,6 +806,13 @@ def _generate_mask(self, *ds_slice): log_mem(logger, log_level='DEBUG') layer_slice = (layer.name, ) + ds_slice layer_mask = layer[self.excl_h5[layer_slice]] + + if check_layers and not layer_mask.any(): + msg = ("Layer {} is fully excluded!" + .format(layer.name)) + logger.error(msg) + raise ExclusionLayerError(msg) + if mask is None: mask = layer_mask else: From 1fac5dc8649bf5a28ae36c586cc0a641aac56836 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Mon, 29 Mar 2021 17:10:44 -0600 Subject: [PATCH 12/12] broke up generate mask --- reV/supply_curve/exclusions.py | 36 +++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/reV/supply_curve/exclusions.py b/reV/supply_curve/exclusions.py index 0260c2f3e..45590ef00 100644 --- a/reV/supply_curve/exclusions.py +++ b/reV/supply_curve/exclusions.py @@ -789,11 +789,7 @@ def _generate_mask(self, *ds_slice, check_layers=False): """ mask = None - if len(ds_slice) == 1 & isinstance(ds_slice[0], tuple): - ds_slice = ds_slice[0] - - if self._min_area is not None: - ds_slice, sub_slice = self._increase_mask_slice(ds_slice, n=1) + ds_slice, sub_slice = self._parse_ds_slice(ds_slice) if self.layers: force_include = [] @@ -835,6 +831,36 @@ def _generate_mask(self, *ds_slice, check_layers=False): return mask + def _parse_ds_slice(self, ds_slice): + """Parse a dataset slice to make it the proper dimensions and also + optionally increase the dataset slice to make the contiguous area + filter more accurate + + Parameters + ---------- + ds_slice : int | slice | list | ndarray + What to extract from ds, each arg is for a sequential axis. + For example, (slice(0, 64), slice(0, 64)) will extract a 64x64 + exclusions mask. + + Returns + ------- + ds_slice : tuple + Two entry tuple with x and y slices with increased dimensions. + sub_slice : tuple + Two entry tuple with x and y slices to retrieve the original + slice out of the bigger slice. + """ + + if len(ds_slice) == 1 & isinstance(ds_slice[0], tuple): + ds_slice = ds_slice[0] + + sub_slice = None + if self._min_area is not None: + ds_slice, sub_slice = self._increase_mask_slice(ds_slice, n=1) + + return ds_slice, sub_slice + @classmethod def run(cls, excl_h5, layers=None, min_area=None, kernel='queen', hsds=False):