From ae85ecbcca0111f895942724c72aa31ab61cd868 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Thu, 4 Jan 2024 10:45:04 -0700 Subject: [PATCH 1/7] refactor of reV gen and bespoke bias correction methods to use new rex bias correction module --- reV/SAM/SAM.py | 26 +++++--- reV/SAM/generation.py | 26 +++++--- reV/bespoke/bespoke.py | 114 +++++++++++++++++++++++---------- reV/generation/generation.py | 118 +++++++++++++++++++++-------------- tests/test_bespoke.py | 5 +- tests/test_gen_pv.py | 6 +- tests/test_gen_wind.py | 7 ++- 7 files changed, 195 insertions(+), 107 deletions(-) diff --git a/reV/SAM/SAM.py b/reV/SAM/SAM.py index 1c9bc00d5..e8bb9438e 100644 --- a/reV/SAM/SAM.py +++ b/reV/SAM/SAM.py @@ -254,15 +254,23 @@ def get(cls, res_file, project_points, module, res_file to lr_res_file spatial mapping. For details on this argument, see the rex.MultiResolutionResource docstring. bias_correct : None | pd.DataFrame - None if not provided or extracted DataFrame with wind or solar - resource bias correction table. This has columns: gid (can be index - name), adder, scalar. The gid field should match the true resource - gid regardless of the optional gid_map input. If both adder and - scalar are present, the wind or solar resource is corrected by - (res*scalar)+adder. If either adder or scalar is not present, - scalar defaults to 1 and adder to 0. Only windspeed or GHI+DNI are - corrected depending on the technology. GHI and DNI are corrected - with the same correction factors. + Optional DataFrame or CSV filepath to a wind or solar + resource bias correction table. This has columns: + + - ``gid``: GID of site (can be index name of dataframe) + - ``method``: function name from ``rex.bias_correction`` module + + The ``gid`` field should match the true resource ``gid`` regardless + of the optional ``gid_map`` input. Only ``windspeed`` **or** + ``GHI`` + ``DNI`` + ``DHI`` are corrected, depending on the + technology (wind for the former, PV or CSP for the latter). See the + functions in the ``rex.bias_correction`` module for available + inputs for ``method``. Any additional kwargs required for the + requested ``method`` can be input as additional columns in the + ``bias_correct`` table e.g., for linear bias correction functions + you can include ``scalar`` and ``adder`` inputs as columns in the + ``bias_correct`` table on a site-by-site basis. If ``None``, no + corrections are applied. By default, ``None``. Returns diff --git a/reV/SAM/generation.py b/reV/SAM/generation.py index fde4066d3..e697a755c 100644 --- a/reV/SAM/generation.py +++ b/reV/SAM/generation.py @@ -431,15 +431,23 @@ def reV_run(cls, points_control, res_file, site_df, res_file to lr_res_file spatial mapping. For details on this argument, see the rex.MultiResolutionResource docstring. bias_correct : None | pd.DataFrame - None if not provided or extracted DataFrame with wind or solar - resource bias correction table. This has columns: gid (can be index - name), adder, scalar. The gid field should match the true resource - gid regardless of the optional gid_map input. If both adder and - scalar are present, the wind or solar resource is corrected by - (res*scalar)+adder. If either adder or scalar is not present, - scalar defaults to 1 and adder to 0. Only windspeed or GHI+DNI are - corrected depending on the technology. GHI and DNI are corrected - with the same correction factors. + Optional DataFrame or CSV filepath to a wind or solar + resource bias correction table. This has columns: + + - ``gid``: GID of site (can be index name of dataframe) + - ``method``: function name from ``rex.bias_correction`` module + + The ``gid`` field should match the true resource ``gid`` regardless + of the optional ``gid_map`` input. Only ``windspeed`` **or** + ``GHI`` + ``DNI`` + ``DHI`` are corrected, depending on the + technology (wind for the former, PV or CSP for the latter). See the + functions in the ``rex.bias_correction`` module for available + inputs for ``method``. Any additional kwargs required for the + requested ``method`` can be input as additional columns in the + ``bias_correct`` table e.g., for linear bias correction functions + you can include ``scalar`` and ``adder`` inputs as columns in the + ``bias_correct`` table on a site-by-site basis. If ``None``, no + corrections are applied. By default, ``None``. Returns ------- diff --git a/reV/bespoke/bespoke.py b/reV/bespoke/bespoke.py index 9897d06db..6067fdf1f 100644 --- a/reV/bespoke/bespoke.py +++ b/reV/bespoke/bespoke.py @@ -2,8 +2,7 @@ """ reV bespoke wind plant analysis tools """ -# TODO update docstring -# TODO check on outputs +from inspect import signature import time import logging import copy @@ -31,6 +30,7 @@ FileInputError) from reV.utilities import log_versions, ModuleName +from rex.utilities.bc_utils import _parse_bc_table from rex.joint_pd.joint_pd import JointPD from rex.renewable_resource import WindResource from rex.multi_year_resource import MultiYearWindResource @@ -325,14 +325,24 @@ def __init__(self, gid, excl, res, tm_dset, sam_sys_inputs, extract from the resource input). This is useful if you're running forecasted resource data (e.g., ECMWF) to complement historical meteorology (e.g., WTK). - bias_correct : str | pd.DataFrame | None - Optional DataFrame or csv filepath to a wind bias correction table. - This has columns: gid (can be index name), adder, scalar. If both - adder and scalar are present, the wind is corrected by - (res*scalar)+adder. If either is not present, scalar defaults to 1 - and adder to 0. Only windspeed is corrected. Note that if gid_map - is provided, the bias_correct gid corresponds to the actual - resource data gid and not the techmap gid. + bias_correct : str | pd.DataFrame, optional + Optional DataFrame or CSV filepath to a wind or solar + resource bias correction table. This has columns: + + - ``gid``: GID of site (can be index name of dataframe) + - ``method``: function name from ``rex.bias_correction`` module + + The ``gid`` field should match the true resource ``gid`` regardless + of the optional ``gid_map`` input. Only ``windspeed`` **or** + ``GHI`` + ``DNI`` + ``DHI`` are corrected, depending on the + technology (wind for the former, PV or CSP for the latter). See the + functions in the ``rex.bias_correction`` module for available + inputs for ``method``. Any additional kwargs required for the + requested ``method`` can be input as additional columns in the + ``bias_correct`` table e.g., for linear bias correction functions + you can include ``scalar`` and ``adder`` inputs as columns in the + ``bias_correct`` table on a site-by-site basis. If ``None``, no + corrections are applied. By default, ``None``. pre_loaded_data : BespokeSinglePlantData, optional A pre-loaded :class:`BespokeSinglePlantData` object, or ``None``. Can be useful to speed up execution on file @@ -530,10 +540,55 @@ class was initialized with close=False, this will not close any handlers.""" self.sc_point.close() + def bias_correct_ws(self, ws, dset, h5_gids): + """Bias correct windspeed data if the ``bias_correct`` input was + provided. + + Parameters + ---------- + ws : np.ndarray + Windspeed data in shape (time, space) + dset : str + Resource dataset name e.g., "windspeed_100m", "temperature_100m", + "pressure_100m", or something similar + h5_gids : list | np.ndarray + Array of integer gids (spatial indices) from the source h5 file. + This is used to get the correct bias correction parameters from + ``bias_correct`` table based on its ``gid`` column + + Returns + ------- + ws : np.ndarray + Bias corrected windspeed data in same shape as input + """ + + if self._bias_correct is not None and dset.startswith('windspeed_'): + + out = _parse_bc_table(self._bias_correct, h5_gids) + bc_fun, bc_fun_kwargs, bool_bc = out + + logger.debug('Bias correcting windspeed with function {} ' + 'for h5 gids: {}'.format(bc_fun, h5_gids)) + + bc_fun_kwargs['ws'] = ws[:, bool_bc] + sig = signature(bc_fun) + bc_fun_kwargs = {k: v for k, v in bc_fun_kwargs.items() + if k in sig.parameters} + + ws[:, bool_bc] = bc_fun(**bc_fun_kwargs) + + return ws + def get_weighted_res_ts(self, dset): """Special method for calculating the exclusion-weighted mean resource timeseries data for the BespokeSinglePlant. + Parameters + ---------- + dset : str + Resource dataset name e.g., "windspeed_100m", "temperature_100m", + "pressure_100m", or something similar + Returns ------- data : np.ndarray @@ -550,16 +605,7 @@ def get_weighted_res_ts(self, dset): else: data = self._pre_loaded_data[dset, :, h5_gids] - if self._bias_correct is not None and dset.startswith('windspeed_'): - missing = [g for g in h5_gids if g not in self._bias_correct.index] - for missing_gid in missing: - self._bias_correct.loc[missing_gid, 'scalar'] = 1 - self._bias_correct.loc[missing_gid, 'adder'] = 0 - - scalar = self._bias_correct.loc[h5_gids, 'scalar'].values - adder = self._bias_correct.loc[h5_gids, 'adder'].values - data = data * scalar + adder - data = np.maximum(data, 0) + data = self.bias_correct_ws(data, dset, h5_gids) weights = np.zeros(len(gids)) for i, gid in enumerate(gids): @@ -1596,20 +1642,20 @@ def __init__(self, excl_fpath, res_fpath, tm_dset, objective_function, Optional DataFrame or CSV filepath to a wind or solar resource bias correction table. This has columns: - - ``gid``: GID of site (can be index name) - - ``adder``: Value to add to resource at each site - - ``scalar``: Value to scale resource at each site by - - The ``gid`` field should match the true resource ``gid`` - regardless of the optional ``gid_map`` input. If both - ``adder`` and ``scalar`` are present, the wind or solar - resource is corrected by :math:`(res*scalar)+adder`. If - *either* is missing, ``scalar`` defaults to 1 and ``adder`` - to 0. Only `windspeed` **or** `GHI` + `DNI` are corrected, - depending on the technology (wind for the former, solar - for the latter). `GHI` and `DNI` are corrected with the - same correction factors. If ``None``, no corrections are - applied. By default, ``None``. + - ``gid``: GID of site (can be index name of dataframe) + - ``method``: function name from ``rex.bias_correction`` module + + The ``gid`` field should match the true resource ``gid`` regardless + of the optional ``gid_map`` input. Only ``windspeed`` **or** + ``GHI`` + ``DNI`` + ``DHI`` are corrected, depending on the + technology (wind for the former, PV or CSP for the latter). See the + functions in the ``rex.bias_correction`` module for available + inputs for ``method``. Any additional kwargs required for the + requested ``method`` can be input as additional columns in the + ``bias_correct`` table e.g., for linear bias correction functions + you can include ``scalar`` and ``adder`` inputs as columns in the + ``bias_correct`` table on a site-by-site basis. If ``None``, no + corrections are applied. By default, ``None``. pre_load_data : bool, optional Option to pre-load resource data. This step can be time-consuming up front, but it drastically reduces the diff --git a/reV/generation/generation.py b/reV/generation/generation.py index ba53b4b3e..78424f35a 100644 --- a/reV/generation/generation.py +++ b/reV/generation/generation.py @@ -336,20 +336,20 @@ def __init__(self, technology, project_points, sam_files, resource_file, Optional DataFrame or CSV filepath to a wind or solar resource bias correction table. This has columns: - - ``gid``: GID of site (can be index name) - - ``adder``: Value to add to resource at each site - - ``scalar``: Value to scale resource at each site by - - The ``gid`` field should match the true resource ``gid`` - regardless of the optional ``gid_map`` input. If both - ``adder`` and ``scalar`` are present, the wind or solar - resource is corrected by :math:`(res*scalar)+adder`. If - *either* is missing, ``scalar`` defaults to 1 and - ``adder`` to 0. Only `windspeed` **or** `GHI` + `DNI` are - corrected, depending on the technology (wind for the former, - solar for the latter). `GHI` and `DNI` are corrected with - the same correction factors. If ``None``, no corrections are - applied. By default, ``None``. + - ``gid``: GID of site (can be index name of dataframe) + - ``method``: function name from ``rex.bias_correction`` module + + The ``gid`` field should match the true resource ``gid`` regardless + of the optional ``gid_map`` input. Only ``windspeed`` **or** + ``GHI`` + ``DNI`` + ``DHI`` are corrected, depending on the + technology (wind for the former, PV or CSP for the latter). See the + functions in the ``rex.bias_correction`` module for available + inputs for ``method``. Any additional kwargs required for the + requested ``method`` can be input as additional columns in the + ``bias_correct`` table e.g., for linear bias correction functions + you can include ``scalar`` and ``adder`` inputs as columns in the + ``bias_correct`` table on a site-by-site basis. If ``None``, no + corrections are applied. By default, ``None``. """ pc = self.get_pc(points=project_points, points_range=None, sam_configs=sam_files, tech=technology, @@ -600,14 +600,23 @@ def _run_single_worker(cls, points_control, tech=None, res_file=None, res_file to lr_res_file spatial mapping. For details on this argument, see the rex.MultiResolutionResource docstring. bias_correct : None | pd.DataFrame - None if not provided or extracted DataFrame with wind or solar - resource bias correction table. This has columns: gid (can be index - name), adder, scalar. If both adder and scalar are present, the - wind or solar resource is corrected by (res*scalar)+adder. If - either adder or scalar is not present, scalar defaults to 1 and - adder to 0. Only windspeed or GHI+DNI are corrected depending on - the technology. GHI and DNI are corrected with the same correction - factors. + Optional DataFrame or CSV filepath to a wind or solar + resource bias correction table. This has columns: + + - ``gid``: GID of site (can be index name of dataframe) + - ``method``: function name from ``rex.bias_correction`` module + + The ``gid`` field should match the true resource ``gid`` regardless + of the optional ``gid_map`` input. Only ``windspeed`` **or** + ``GHI`` + ``DNI`` + ``DHI`` are corrected, depending on the + technology (wind for the former, PV or CSP for the latter). See the + functions in the ``rex.bias_correction`` module for available + inputs for ``method``. Any additional kwargs required for the + requested ``method`` can be input as additional columns in the + ``bias_correct`` table e.g., for linear bias correction functions + you can include ``scalar`` and ``adder`` inputs as columns in the + ``bias_correct`` table on a site-by-site basis. If ``None``, no + corrections are applied. By default, ``None``. Returns ------- @@ -766,26 +775,45 @@ def _parse_bc(bias_correct): Parameters ---------- - bias_correct : str | pd.DataFrame | None - Optional DataFrame or csv filepath to a wind or solar resource bias - correction table. This has columns: gid (can be index name), adder, - scalar. If both adder and scalar are present, the wind or solar - resource is corrected by (res*scalar)+adder. If either is not - present, scalar defaults to 1 and adder to 0. Only windspeed or - GHI+DNI are corrected depending on the technology. GHI and DNI are - corrected with the same correction factors. + bias_correct : str | pd.DataFrame, optional + Optional DataFrame or CSV filepath to a wind or solar + resource bias correction table. This has columns: + + - ``gid``: GID of site (can be index name of dataframe) + - ``method``: function name from ``rex.bias_correction`` module + + The ``gid`` field should match the true resource ``gid`` regardless + of the optional ``gid_map`` input. Only ``windspeed`` **or** + ``GHI`` + ``DNI`` + ``DHI`` are corrected, depending on the + technology (wind for the former, PV or CSP for the latter). See the + functions in the ``rex.bias_correction`` module for available + inputs for ``method``. Any additional kwargs required for the + requested ``method`` can be input as additional columns in the + ``bias_correct`` table e.g., for linear bias correction functions + you can include ``scalar`` and ``adder`` inputs as columns in the + ``bias_correct`` table on a site-by-site basis. If ``None``, no + corrections are applied. By default, ``None``. Returns ------- bias_correct : None | pd.DataFrame - None if not provided or extracted DataFrame with wind or solar - resource bias correction table. This has columns: gid (can be index - name), adder, scalar. If both adder and scalar are present, the - wind or solar resource is corrected by (res*scalar)+adder. If - either adder or scalar is not present, scalar defaults to 1 and - adder to 0. Only windspeed or GHI+DNI are corrected depending on - the technology. GHI and DNI are corrected with the same correction - factors. + Optional DataFrame or CSV filepath to a wind or solar + resource bias correction table. This has columns: + + - ``gid``: GID of site (can be index name of dataframe) + - ``method``: function name from ``rex.bias_correction`` module + + The ``gid`` field should match the true resource ``gid`` regardless + of the optional ``gid_map`` input. Only ``windspeed`` **or** + ``GHI`` + ``DNI`` + ``DHI`` are corrected, depending on the + technology (wind for the former, PV or CSP for the latter). See the + functions in the ``rex.bias_correction`` module for available + inputs for ``method``. Any additional kwargs required for the + requested ``method`` can be input as additional columns in the + ``bias_correct`` table e.g., for linear bias correction functions + you can include ``scalar`` and ``adder`` inputs as columns in the + ``bias_correct`` table on a site-by-site basis. If ``None``, no + corrections are applied. By default, ``None``. """ if isinstance(bias_correct, type(None)): @@ -798,16 +826,6 @@ def _parse_bc(bias_correct): 'but received: {}'.format(type(bias_correct))) assert isinstance(bias_correct, pd.DataFrame), msg - if 'adder' not in bias_correct: - logger.info('Bias correction table provided, but "adder" not ' - 'found, defaulting to 0.') - bias_correct['adder'] = 0 - - if 'scalar' not in bias_correct: - logger.info('Bias correction table provided, but "scalar" not ' - 'found, defaulting to 1.') - bias_correct['scalar'] = 1 - msg = ('Bias correction table must have "gid" column but only found: ' '{}'.format(list(bias_correct.columns))) assert 'gid' in bias_correct or bias_correct.index.name == 'gid', msg @@ -815,6 +833,10 @@ def _parse_bc(bias_correct): if bias_correct.index.name != 'gid': bias_correct = bias_correct.set_index('gid') + msg = ('Bias correction table must have "method" column but only ' + 'found: {}'.format(list(bias_correct.columns))) + assert 'method' in bias_correct, msg + return bias_correct def _parse_output_request(self, req): diff --git a/tests/test_bespoke.py b/tests/test_bespoke.py index 11b971bc8..408f36994 100644 --- a/tests/test_bespoke.py +++ b/tests/test_bespoke.py @@ -911,7 +911,7 @@ def test_bespoke_aep_is_zero_if_no_turbines_placed(): assert aep == 0 -def test_bespoke_w_prior_run(): +def test_bespoke_prior_run(): """Test a follow-on bespoke timeseries generation run based on a prior plant layout optimization.""" output_request = ('system_capacity', 'cf_mean', 'cf_profile', @@ -1064,7 +1064,7 @@ def test_gid_map(): assert np.allclose(f1["ws_mean"], f2["ws_mean"]) -def test_bespoke_w_bias_correct(): +def test_bespoke_bias_correct(): """Test bespoke run with bias correction on windspeed data.""" output_request = ('system_capacity', 'cf_mean', 'cf_profile', 'extra_unused_data', 'ws_mean') @@ -1085,6 +1085,7 @@ def test_bespoke_w_bias_correct(): # intentionally leaving out WTK gid 13 which only has 5 included 90m # pixels in order to check that this is dynamically patched. bias_correct = pd.DataFrame({'gid': [3, 4, 12, 11, 10, 9]}) + bias_correct['method'] = 'lin_ws' bias_correct['scalar'] = 0.5 fp_bc = os.path.join(td, 'bc.csv') bias_correct.to_csv(fp_bc) diff --git a/tests/test_gen_pv.py b/tests/test_gen_pv.py index 67a710f27..468ce2241 100644 --- a/tests/test_gen_pv.py +++ b/tests/test_gen_pv.py @@ -592,7 +592,8 @@ def test_irrad_bias_correct(): sites_per_worker=1, output_request=output_request) gen_base.run(max_workers=1) - bc_df = pd.DataFrame({'gid': np.arange(100), 'scalar': 1, 'adder': 50}) + bc_df = pd.DataFrame({'gid': np.arange(100), 'method': 'lin_irrad', + 'scalar': 1, 'adder': 50}) gen = Gen('pvwattsv7', points, sam_files, res_file, sites_per_worker=1, output_request=output_request, bias_correct=bc_df) @@ -604,7 +605,8 @@ def test_irrad_bias_correct(): mask = (gen_base.out['cf_profile'] <= gen.out['cf_profile']) assert (mask.sum() / mask.size) > 0.99 - bc_df = pd.DataFrame({'gid': np.arange(100), 'scalar': 1, 'adder': -1500}) + bc_df = pd.DataFrame({'gid': np.arange(100), 'method': 'lin_irrad', + 'scalar': 1, 'adder': -1500}) gen = Gen('pvwattsv7', points, sam_files, res_file, sites_per_worker=1, output_request=output_request, bias_correct=bc_df) gen.run(max_workers=1) diff --git a/tests/test_gen_wind.py b/tests/test_gen_wind.py index 23b5eb660..47e986ca6 100644 --- a/tests/test_gen_wind.py +++ b/tests/test_gen_wind.py @@ -334,14 +334,14 @@ def test_wind_bias_correct(): # run reV 2.0 generation points = slice(0, 10) - pp = ProjectPoints(points, sam_files, 'windpower', res_file=res_file) gen_base = Gen('windpower', points, sam_files, res_file, output_request=('cf_mean', 'cf_profile', 'ws_mean'), sites_per_worker=3) gen_base.run(max_workers=1) outs_base = np.array(list(gen_base.out['cf_mean'])) - bc_df = pd.DataFrame({'gid': np.arange(100), 'scalar': 1, 'adder': 2}) + bc_df = pd.DataFrame({'gid': np.arange(100), 'method': 'lin_ws', + 'scalar': 1, 'adder': 2}) gen = Gen('windpower', points, sam_files, res_file, output_request=('cf_mean', 'cf_profile', 'ws_mean'), sites_per_worker=3, bias_correct=bc_df) @@ -350,7 +350,8 @@ def test_wind_bias_correct(): assert all(outs_bc > outs_base) assert np.allclose(gen_base.out['ws_mean'] + 2, gen.out['ws_mean']) - bc_df = pd.DataFrame({'gid': np.arange(100), 'scalar': 1, 'adder': -100}) + bc_df = pd.DataFrame({'gid': np.arange(100), 'method': 'lin_ws', + 'scalar': 1, 'adder': -100}) gen = Gen('windpower', points, sam_files, res_file, output_request=('cf_mean', 'cf_profile', 'ws_mean'), sites_per_worker=3, bias_correct=bc_df) From d27b9de800568a957d79f8600afe2ac9b7d4bcb2 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Mon, 8 Jan 2024 15:18:36 -0700 Subject: [PATCH 2/7] added methods for chunking bias correction tables into smaller tables for memory --- reV/bespoke/bespoke.py | 36 +++++++++++++++++- reV/generation/base.py | 24 +++++++++++- reV/generation/generation.py | 31 +++++++++++++++ reV/supply_curve/points.py | 73 ++++++++++++++++++------------------ tests/test_gen_pv.py | 14 ++++--- 5 files changed, 134 insertions(+), 44 deletions(-) diff --git a/reV/bespoke/bespoke.py b/reV/bespoke/bespoke.py index 6067fdf1f..1245546f5 100644 --- a/reV/bespoke/bespoke.py +++ b/reV/bespoke/bespoke.py @@ -1888,6 +1888,36 @@ def _pre_loaded_data_for_sc_gid(self, sc_gid): return self._pre_loaded_data.get_preloaded_data_for_gid(sc_gid) + def _get_bc_for_gid(self, gid): + """Get the bias correction table trimmed down just for the resource + pixels corresponding to a single supply curve GID. This can help + prevent excess memory usage when doing complex bias correction + distributed to parallel workers. + + Parameters + ---------- + gid : int + SC point gid for site to pull bias correction data for + + Returns + ------- + out : pd.DataFrame | None + If bias_correct was input, this is just the rows from the larger + bias correction table that correspond to the SC point gid + """ + out = self._bias_correct + + if self._bias_correct is not None: + with SupplyCurvePoint(gid, self._excl_fpath, self._tm_dset) as scp: + h5_gids = scp.h5_gid_set + if self._gid_map is not None: + h5_gids = [self._gid_map[g] for g in h5_gids] + + mask = self._bias_correct.index.isin(h5_gids) + out = self._bias_correct[mask] + + return out + @property def outputs(self): """Saved outputs for the multi wind plant bespoke optimization. Keys @@ -2262,7 +2292,7 @@ def run_parallel(self, max_workers=None): slice_lookup=copy.deepcopy(self.slice_lookup), prior_meta=self._get_prior_meta(gid), gid_map=self._gid_map, - bias_correct=self._bias_correct, + bias_correct=self._get_bc_for_gid(gid), pre_loaded_data=self._pre_loaded_data_for_sc_gid(gid))) # gather results @@ -2318,6 +2348,8 @@ def run(self, out_fpath=None, max_workers=None): pre_loaded_data = self._pre_loaded_data_for_sc_gid(gid) afk = self._area_filter_kernel wlm = self._wake_loss_multiplier + i_bc = self._get_bc_for_gid(gid) + si = self.run_serial(self._excl_fpath, self._res_fpath, self._tm_dset, @@ -2342,7 +2374,7 @@ def run(self, out_fpath=None, max_workers=None): slice_lookup=slice_lookup, prior_meta=prior_meta, gid_map=self._gid_map, - bias_correct=self._bias_correct, + bias_correct=i_bc, gids=gid, pre_loaded_data=pre_loaded_data) self._outputs.update(si) diff --git a/reV/generation/base.py b/reV/generation/base.py index 691a4387a..7eb4fe94d 100644 --- a/reV/generation/base.py +++ b/reV/generation/base.py @@ -1134,6 +1134,25 @@ def _pre_split_pc(self, pool_size=None): .format(len(pc_chunks), [len(x) for x in pc_chunks])) return N, pc_chunks + def _reduce_kwargs(self, pc, **kwargs): + """Placeholder for functions that need to reduce the global kwargs that + they send to workers to reduce memory footprint + + Parameters + ---------- + pc : PointsControl + PointsControl object for a single worker chunk + kwargs : dict + Kwargs for all gids that needs to be reduced before being sent to + ``_run_single_worker()`` + + Returns + ------- + kwargs : dict + Same as input but reduced just for the gids in pc + """ + return kwargs + def _parallel_run(self, max_workers=None, pool_size=None, timeout=1800, **kwargs): """Execute parallel compute. @@ -1152,6 +1171,7 @@ def _parallel_run(self, max_workers=None, pool_size=None, timeout=1800, kwargs : dict Keyword arguments to self._run_single_worker(). """ + if pool_size is None: pool_size = os.cpu_count() * 2 if max_workers is None: @@ -1172,7 +1192,9 @@ def _parallel_run(self, max_workers=None, pool_size=None, timeout=1800, with SpawnProcessPool(max_workers=max_workers, loggers=loggers) as exe: for pc in pc_chunk: - future = exe.submit(self._run_single_worker, pc, **kwargs) + pc_kwargs = self._reduce_kwargs(pc, **kwargs) + future = exe.submit(self._run_single_worker, pc, + **pc_kwargs) futures.append(future) chunks[future] = pc diff --git a/reV/generation/generation.py b/reV/generation/generation.py index 78424f35a..a5107aefc 100644 --- a/reV/generation/generation.py +++ b/reV/generation/generation.py @@ -867,6 +867,37 @@ def _parse_output_request(self, req): return list(set(output_request)) + def _reduce_kwargs(self, pc, **kwargs): + """Reduce the global kwargs on a per-worker basis to reduce memory + footprint + + Parameters + ---------- + pc : PointsControl + PointsControl object for a single worker chunk + kwargs : dict + reV generation kwargs for all gids that needs to be reduced before + being sent to ``_run_single_worker()`` + + Returns + ------- + kwargs : dict + Same as input but reduced just for the gids in pc + """ + + gids = pc.project_points.gids + gid_map = kwargs.get('gid_map', None) + bias_correct = kwargs.get('bias_correct', None) + + if bias_correct is not None: + if gid_map is not None: + gids = [gid_map[gid] for gid in gids] + + mask = bias_correct.index.isin(gids) + kwargs['bias_correct'] = bias_correct[mask] + + return kwargs + def run(self, out_fpath=None, max_workers=1, timeout=1800, pool_size=None): """Execute a parallel reV generation run with smart data flushing. diff --git a/reV/supply_curve/points.py b/reV/supply_curve/points.py index 46046c30e..c19f1ed5d 100644 --- a/reV/supply_curve/points.py +++ b/reV/supply_curve/points.py @@ -50,6 +50,25 @@ def __init__(self, gid, exclusion_shape, resolution=64): self._rows, self._cols = self._parse_slices( gid, resolution, exclusion_shape) + @staticmethod + def _ordered_unique(seq): + """Get a list of unique values in the same order as the input sequence. + + Parameters + ---------- + seq : list | tuple + Sequence of values. + + Returns + ------- + seq : list + List of unique values in seq input with original order. + """ + + seen = set() + + return [x for x in seq if not (x in seen or seen.add(x))] + def _parse_slices(self, gid, resolution, exclusion_shape): """Parse inputs for the definition of this SC point. @@ -220,6 +239,8 @@ def __init__(self, gid, excl, tm_dset, excl_dict=None, inclusion_mask=None, super().__init__(gid, exclusion_shape, resolution=resolution) self._gids = self._parse_techmap(tm_dset) + self._h5_gids = self._gids + self._h5_gid_set = None self._incl_mask = inclusion_mask self._incl_mask_flat = None @@ -458,6 +479,22 @@ def h5(self): placeholder for h5 Resource handler object """ + @property + def h5_gid_set(self): + """Get list of unique h5 gids corresponding to this sc point. + + Returns + ------- + h5_gids : list + List of h5 gids. + """ + if self._h5_gid_set is None: + self._h5_gid_set = self._ordered_unique(self._h5_gids) + if -1 in self._h5_gid_set: + self._h5_gid_set.remove(-1) + + return self._h5_gid_set + @property def summary(self): """ @@ -884,7 +921,6 @@ def __init__(self, gid, excl, agg_h5, tm_dset, exclusion_shape=exclusion_shape, close=close) - self._h5_gid_set = None self._h5_fpath, self._h5 = self._parse_h5_file(agg_h5) if gen_index is not None: @@ -988,25 +1024,6 @@ def _map_gen_gids(res_gids, gen_index): return gen_gids, res_gids - @staticmethod - def _ordered_unique(seq): - """Get a list of unique values in the same order as the input sequence. - - Parameters - ---------- - seq : list | tuple - Sequence of values. - - Returns - ------- - seq : list - List of unique values in seq input with original order. - """ - - seen = set() - - return [x for x in seq if not (x in seen or seen.add(x))] - @property def h5(self): """ @@ -1112,22 +1129,6 @@ def offshore(self): return offshore - @property - def h5_gid_set(self): - """Get list of unique h5 gids corresponding to this sc point. - - Returns - ------- - h5_gids : list - List of h5 gids. - """ - if self._h5_gid_set is None: - self._h5_gid_set = self._ordered_unique(self._h5_gids) - if -1 in self._h5_gid_set: - self._h5_gid_set.remove(-1) - - return self._h5_gid_set - @property def gid_counts(self): """Get the sum of the inclusion values in each resource/generation gid diff --git a/tests/test_gen_pv.py b/tests/test_gen_pv.py index 468ce2241..f282eb5cf 100644 --- a/tests/test_gen_pv.py +++ b/tests/test_gen_pv.py @@ -592,24 +592,28 @@ def test_irrad_bias_correct(): sites_per_worker=1, output_request=output_request) gen_base.run(max_workers=1) - bc_df = pd.DataFrame({'gid': np.arange(100), 'method': 'lin_irrad', + bc_df = pd.DataFrame({'gid': np.arange(1, 10), 'method': 'lin_irrad', 'scalar': 1, 'adder': 50}) gen = Gen('pvwattsv7', points, sam_files, res_file, sites_per_worker=1, output_request=output_request, bias_correct=bc_df) gen.run(max_workers=1) - assert (gen_base.out['cf_mean'] < gen.out['cf_mean']).all() - assert (gen_base.out['ghi_mean'] < gen.out['ghi_mean']).all() + assert (gen_base.out['cf_mean'][0] == gen.out['cf_mean'][0]).all() + assert (gen_base.out['ghi_mean'][0] == gen.out['ghi_mean'][0]).all() + assert np.allclose(gen_base.out['cf_profile'][:, 0], + gen.out['cf_profile'][:, 0]) - mask = (gen_base.out['cf_profile'] <= gen.out['cf_profile']) + assert (gen_base.out['cf_mean'][1:] < gen.out['cf_mean'][1:]).all() + assert (gen_base.out['ghi_mean'][1:] < gen.out['ghi_mean'][1:]).all() + mask = (gen_base.out['cf_profile'][:, 1:] <= gen.out['cf_profile'][:, 1:]) assert (mask.sum() / mask.size) > 0.99 bc_df = pd.DataFrame({'gid': np.arange(100), 'method': 'lin_irrad', 'scalar': 1, 'adder': -1500}) gen = Gen('pvwattsv7', points, sam_files, res_file, sites_per_worker=1, output_request=output_request, bias_correct=bc_df) - gen.run(max_workers=1) + gen.run(max_workers=2) for arr in gen.out.values(): assert (arr == 0).all() From ed64313699491da38bba176ee3b396f6d9805af4 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Mon, 8 Jan 2024 16:34:44 -0700 Subject: [PATCH 3/7] protect against no relevant bc data and fully excluded supply curve points, added another bespoke test with extrapolation of t/p from single vert level --- reV/SAM/generation.py | 24 ++++++++++++------------ reV/bespoke/bespoke.py | 31 ++++++++++++++++++++----------- tests/test_bespoke.py | 13 ++++++++++++- 3 files changed, 44 insertions(+), 24 deletions(-) diff --git a/reV/SAM/generation.py b/reV/SAM/generation.py index e697a755c..9b84a4985 100644 --- a/reV/SAM/generation.py +++ b/reV/SAM/generation.py @@ -1911,7 +1911,7 @@ def set_resource_data(self, resource, meta): if 'rh' in resource: # set relative humidity for icing. rh = self.ensure_res_len(resource['rh'].values, time_index) - n_roll = int(meta['timezone'] * self.time_interval) + n_roll = int(meta['timezone'].values * self.time_interval) rh = np.roll(rh, n_roll, axis=0) data_dict['rh'] = rh.tolist() @@ -1919,21 +1919,21 @@ def set_resource_data(self, resource, meta): # ensure that resource array length is multiple of 8760 # roll the truncated resource array to local timezone temp = self.ensure_res_len(resource[var_list].values, time_index) - n_roll = int(meta['timezone'] * self.time_interval) + n_roll = int(meta['timezone'].values * self.time_interval) temp = np.roll(temp, n_roll, axis=0) data_dict['data'] = temp.tolist() - data_dict['lat'] = meta['latitude'] - data_dict['lon'] = meta['longitude'] - data_dict['tz'] = meta['timezone'] - data_dict['elev'] = meta['elevation'] + data_dict['lat'] = float(meta['latitude'].iloc[0]) + data_dict['lon'] = float(meta['longitude'].iloc[0]) + data_dict['tz'] = int(meta['timezone'].iloc[0]) + data_dict['elev'] = float(meta['elevation'].iloc[0]) time_index = self.ensure_res_len(time_index, time_index) - data_dict['minute'] = time_index.minute - data_dict['hour'] = time_index.hour - data_dict['year'] = time_index.year - data_dict['month'] = time_index.month - data_dict['day'] = time_index.day + data_dict['minute'] = time_index.minute.tolist() + data_dict['hour'] = time_index.hour.tolist() + data_dict['year'] = time_index.year.tolist() + data_dict['month'] = time_index.month.tolist() + data_dict['day'] = time_index.day.tolist() # add resource data to self.data and clear self['wind_resource_data'] = data_dict @@ -2088,7 +2088,7 @@ def set_resource_data(self, resource, meta): # roll the truncated resource array to local timezone for var in ['significant_wave_height', 'energy_period']: arr = self.ensure_res_len(resource[var].values, time_index) - n_roll = int(meta['timezone'] * self.time_interval) + n_roll = int(meta['timezone'].values * self.time_interval) data_dict[var] = np.roll(arr, n_roll, axis=0).tolist() data_dict['lat'] = meta['latitude'] diff --git a/reV/bespoke/bespoke.py b/reV/bespoke/bespoke.py index 1245546f5..2b53d6e31 100644 --- a/reV/bespoke/bespoke.py +++ b/reV/bespoke/bespoke.py @@ -567,15 +567,16 @@ def bias_correct_ws(self, ws, dset, h5_gids): out = _parse_bc_table(self._bias_correct, h5_gids) bc_fun, bc_fun_kwargs, bool_bc = out - logger.debug('Bias correcting windspeed with function {} ' - 'for h5 gids: {}'.format(bc_fun, h5_gids)) + if bool_bc.any(): + logger.debug('Bias correcting windspeed with function {} ' + 'for h5 gids: {}'.format(bc_fun, h5_gids)) - bc_fun_kwargs['ws'] = ws[:, bool_bc] - sig = signature(bc_fun) - bc_fun_kwargs = {k: v for k, v in bc_fun_kwargs.items() - if k in sig.parameters} + bc_fun_kwargs['ws'] = ws[:, bool_bc] + sig = signature(bc_fun) + bc_fun_kwargs = {k: v for k, v in bc_fun_kwargs.items() + if k in sig.parameters} - ws[:, bool_bc] = bc_fun(**bc_fun_kwargs) + ws[:, bool_bc] = bc_fun(**bc_fun_kwargs) return ws @@ -1101,9 +1102,9 @@ def run_wind_plant_ts(self): # copy dataset outputs to meta data for supply curve table summary if 'cf_mean-means' in self.outputs: - self._meta['mean_cf'] = self.outputs['cf_mean-means'] + self._meta.loc[:, 'mean_cf'] = self.outputs['cf_mean-means'] if 'lcoe_fcr-means' in self.outputs: - self._meta['mean_lcoe'] = self.outputs['lcoe_fcr-means'] + self._meta.loc[:, 'mean_lcoe'] = self.outputs['lcoe_fcr-means'] self.recalc_lcoe() logger.debug('Timeseries analysis complete!') @@ -1908,8 +1909,16 @@ def _get_bc_for_gid(self, gid): out = self._bias_correct if self._bias_correct is not None: - with SupplyCurvePoint(gid, self._excl_fpath, self._tm_dset) as scp: - h5_gids = scp.h5_gid_set + h5_gids = [] + try: + scp_kwargs = dict(gid=gid, excl=self._excl_fpath, + tm_dset=self._tm_dset, + resolution=self._resolution) + with SupplyCurvePoint(**scp_kwargs) as scp: + h5_gids = scp.h5_gid_set + except EmptySupplyCurvePointError: + pass + if self._gid_map is not None: h5_gids = [self._gid_map[g] for g in h5_gids] diff --git a/tests/test_bespoke.py b/tests/test_bespoke.py index 408f36994..1dfd6a7df 100644 --- a/tests/test_bespoke.py +++ b/tests/test_bespoke.py @@ -913,7 +913,11 @@ def test_bespoke_aep_is_zero_if_no_turbines_placed(): def test_bespoke_prior_run(): """Test a follow-on bespoke timeseries generation run based on a prior - plant layout optimization.""" + plant layout optimization. + + Also added another minor test with extrapolation of t/p datasets from a + single vertical level (e.g., with Sup3rCC data) + """ output_request = ('system_capacity', 'cf_mean', 'cf_profile', 'extra_unused_data') with tempfile.TemporaryDirectory() as td: @@ -925,6 +929,13 @@ def test_bespoke_prior_run(): shutil.copy(RES.format(2012), res_fp.format(2012)) shutil.copy(RES.format(2013), res_fp.format(2013)) + # test t/p extrapolation from single level (e.g. with Sup3rCC data) + del_dsets = ('pressure_100m', 'pressure_200m', 'temperature_80m') + for y in (2012, 2013): + with h5py.File(res_fp.format(y), 'a') as h5: + for dset in del_dsets: + del h5[dset] + res_fp_all = res_fp.format('*') res_fp_2013 = res_fp.format('2013') From a122be97c847b20454d4ab72216fe2cf34630426 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Fri, 12 Jan 2024 13:57:45 -0700 Subject: [PATCH 4/7] updates per rex PR review --- reV/bespoke/bespoke.py | 4 ++-- requirements.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/reV/bespoke/bespoke.py b/reV/bespoke/bespoke.py index 2b53d6e31..6694570e2 100644 --- a/reV/bespoke/bespoke.py +++ b/reV/bespoke/bespoke.py @@ -30,7 +30,7 @@ FileInputError) from reV.utilities import log_versions, ModuleName -from rex.utilities.bc_utils import _parse_bc_table +from rex.utilities.bc_parse_table import parse_bc_table from rex.joint_pd.joint_pd import JointPD from rex.renewable_resource import WindResource from rex.multi_year_resource import MultiYearWindResource @@ -564,7 +564,7 @@ def bias_correct_ws(self, ws, dset, h5_gids): if self._bias_correct is not None and dset.startswith('windspeed_'): - out = _parse_bc_table(self._bias_correct, h5_gids) + out = parse_bc_table(self._bias_correct, h5_gids) bc_fun, bc_fun_kwargs, bool_bc = out if bool_bc.any(): diff --git a/requirements.txt b/requirements.txt index dd7bc7777..b7517c364 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ NREL-gaps>=0.6.9 NREL-NRWAL>=0.0.7 NREL-PySAM~=4.1.0 -NREL-rex>=0.2.80 +NREL-rex>=0.2.85 packaging>=20.3 plotly>=4.7.1 plotting>=0.0.6 From be3d16e6490c1ba73b42246b9d4e7e77282b3ed7 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Tue, 16 Jan 2024 15:14:27 -0700 Subject: [PATCH 5/7] fix dataframe vs series meta data to rev-sam set resource methods --- reV/SAM/generation.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/reV/SAM/generation.py b/reV/SAM/generation.py index 9b84a4985..e28c7f447 100644 --- a/reV/SAM/generation.py +++ b/reV/SAM/generation.py @@ -697,6 +697,7 @@ def set_resource_data(self, resource, meta): and timezone. """ + meta = self._parse_meta(meta) time_index = resource.index self.time_interval = self.get_time_interval(resource.index.values) @@ -1883,6 +1884,8 @@ def set_resource_data(self, resource, meta): and timezone. """ + meta = self._parse_meta(meta) + # map resource data names to SAM required data names var_map = {'speed': 'windspeed', 'direction': 'winddirection', @@ -1911,7 +1914,7 @@ def set_resource_data(self, resource, meta): if 'rh' in resource: # set relative humidity for icing. rh = self.ensure_res_len(resource['rh'].values, time_index) - n_roll = int(meta['timezone'].values * self.time_interval) + n_roll = int(meta['timezone'] * self.time_interval) rh = np.roll(rh, n_roll, axis=0) data_dict['rh'] = rh.tolist() @@ -1919,14 +1922,14 @@ def set_resource_data(self, resource, meta): # ensure that resource array length is multiple of 8760 # roll the truncated resource array to local timezone temp = self.ensure_res_len(resource[var_list].values, time_index) - n_roll = int(meta['timezone'].values * self.time_interval) + n_roll = int(meta['timezone'] * self.time_interval) temp = np.roll(temp, n_roll, axis=0) data_dict['data'] = temp.tolist() - data_dict['lat'] = float(meta['latitude'].iloc[0]) - data_dict['lon'] = float(meta['longitude'].iloc[0]) - data_dict['tz'] = int(meta['timezone'].iloc[0]) - data_dict['elev'] = float(meta['elevation'].iloc[0]) + data_dict['lat'] = float(meta['latitude']) + data_dict['lon'] = float(meta['longitude']) + data_dict['tz'] = int(meta['timezone']) + data_dict['elev'] = float(meta['elevation']) time_index = self.ensure_res_len(time_index, time_index) data_dict['minute'] = time_index.minute.tolist() From c15084ee5ec8dafed79ba0e47ac265995eac1fbd Mon Sep 17 00:00:00 2001 From: grantbuster Date: Tue, 16 Jan 2024 15:15:08 -0700 Subject: [PATCH 6/7] bump version for new bias correction integration with rex --- reV/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reV/version.py b/reV/version.py index 39630597e..73761c6a2 100644 --- a/reV/version.py +++ b/reV/version.py @@ -2,4 +2,4 @@ reV Version number """ -__version__ = "0.8.4" +__version__ = "0.8.5" From 934b71e036e83a3bac7d155d3bc79b40e9f600fe Mon Sep 17 00:00:00 2001 From: grantbuster Date: Tue, 16 Jan 2024 16:39:50 -0700 Subject: [PATCH 7/7] fix meta dtype for sam resource for mhk --- reV/SAM/generation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/reV/SAM/generation.py b/reV/SAM/generation.py index e28c7f447..f1e01e5b9 100644 --- a/reV/SAM/generation.py +++ b/reV/SAM/generation.py @@ -548,6 +548,7 @@ def set_resource_data(self, resource, meta): location. Should include values for latitude, longitude, elevation, and timezone. """ + meta = self._parse_meta(meta) self.time_interval = self.get_time_interval(resource.index.values) pysam_w_fname = self._create_pysam_wfile(resource, meta) self[self.PYSAM_WEATHER_TAG] = pysam_w_fname @@ -2066,6 +2067,8 @@ def set_resource_data(self, resource, meta): and timezone. """ + meta = self._parse_meta(meta) + # map resource data names to SAM required data names var_map = {'significantwaveheight': 'significant_wave_height', 'waveheight': 'significant_wave_height', @@ -2091,7 +2094,7 @@ def set_resource_data(self, resource, meta): # roll the truncated resource array to local timezone for var in ['significant_wave_height', 'energy_period']: arr = self.ensure_res_len(resource[var].values, time_index) - n_roll = int(meta['timezone'].values * self.time_interval) + n_roll = int(meta['timezone'] * self.time_interval) data_dict[var] = np.roll(arr, n_roll, axis=0).tolist() data_dict['lat'] = meta['latitude']