From f10d580b26ffa24aecce78d0b3557d03beef6a33 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Thu, 9 Jan 2025 14:33:18 -0700 Subject: [PATCH 1/2] fix utility function that wouldnt allow rev to use s3 filepaths --- rex/utilities/utilities.py | 15 +++++++++++++-- rex/version.py | 2 +- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/rex/utilities/utilities.py b/rex/utilities/utilities.py index 8d158858..5f5d54e6 100644 --- a/rex/utilities/utilities.py +++ b/rex/utilities/utilities.py @@ -299,13 +299,15 @@ def check_res_file(res_file): - It belongs to a multi-file handler - Is on local disk - Is a hsds path + - Is as S3 path (starts with "s3://" Parameters ---------- res_file : str Filepath to single resource file, unix style multi-file path like - /h5_dir/prefix*suffix.h5, or an hsds filepath (filename of hsds - path can also contain wildcards *) + /h5_dir/prefix*suffix.h5, an hsds filepath + (filename of hsds path can also contain wildcards *), or + an s3 filepath starting with "s3://" Returns ------- @@ -321,6 +323,15 @@ def check_res_file(res_file): if os.path.isfile(res_file): pass + elif res_file.startswith('s3://'): + try: + import fsspec + except Exception as e: + msg = (f'Tried to open s3 file path: "{res_file}" with ' + 'fsspec but could not import, try ' + '`pip install NREL-rex[s3]`') + raise ImportError(msg) from e + elif '*' in res_file: multi_h5_res = True diff --git a/rex/version.py b/rex/version.py index bd9e91f1..e4abe3fa 100644 --- a/rex/version.py +++ b/rex/version.py @@ -1,3 +1,3 @@ """rex Version number""" -__version__ = "0.2.96" +__version__ = "0.2.97" From 674cd48bdb25a118aaffed0be6070cb32646b493 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Thu, 9 Jan 2025 14:46:47 -0700 Subject: [PATCH 2/2] reduce check_res_file complexity --- rex/utilities/utilities.py | 76 +++++++++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 21 deletions(-) diff --git a/rex/utilities/utilities.py b/rex/utilities/utilities.py index 5f5d54e6..68c2ad53 100644 --- a/rex/utilities/utilities.py +++ b/rex/utilities/utilities.py @@ -317,15 +317,18 @@ def check_res_file(res_file): Boolean flag to use h5pyd to handle .h5 'files' hosted on AWS behind HSDS """ + multi_h5_res = False hsds = False + bad = True if os.path.isfile(res_file): - pass + bad = False elif res_file.startswith('s3://'): try: - import fsspec + import fsspec # pylint: disable=unused-import + bad = False except Exception as e: msg = (f'Tried to open s3 file path: "{res_file}" with ' 'fsspec but could not import, try ' @@ -333,33 +336,63 @@ def check_res_file(res_file): raise ImportError(msg) from e elif '*' in res_file: + bad = False multi_h5_res = True elif os.path.isdir(res_file): + bad = False msg = ('Cannot parse directory, need to add wildcard * suffix: {}' .format(res_file)) raise FileInputError(msg) else: - try: - import h5pyd - hsds_dir = os.path.dirname(res_file) - with h5pyd.Folder(hsds_dir + '/') as f: - hsds = True - fps = [f'{hsds_dir}/{fn}' for fn in f - if fnmatch(f'{hsds_dir}/{fn}', res_file)] - if not any(fps): - msg = ('{} is not a valid HSDS file path!' - .format(res_file)) - raise FileNotFoundError(msg) - elif len(fps) > 1: - multi_h5_res = True - - except Exception as ex: - msg = ("{} is not a valid file path, and HSDS " - "cannot be check for a file at this path:{}!" - .format(res_file, ex)) - raise FileNotFoundError(msg) from ex + multi_h5_res, hsds = check_hsds_file(res_file) + bad = not hsds + + if bad: + msg = ("{} is not a valid file path, and HSDS " + "cannot be check for a file at this path!" + .format(res_file)) + raise FileNotFoundError(msg) + + return multi_h5_res, hsds + + +def check_hsds_file(res_file): + """ + Check resource to see if the given path + - It belongs to a multi-file handler + - Is a hsds path + + Parameters + ---------- + res_file : str + Filepath to single resource file, unix style multi-file path like + /h5_dir/prefix*suffix.h5, an hsds filepath + (filename of hsds path can also contain wildcards *), or + an s3 filepath starting with "s3://" + + Returns + ------- + multi_h5_res : bool + Boolean flag to use a MultiFileResource handler + hsds : bool + Boolean flag to use h5pyd to handle .h5 'files' hosted on AWS + behind HSDS + """ + import h5pyd + hsds_dir = os.path.dirname(res_file) + + with h5pyd.Folder(hsds_dir + '/') as f: + hsds = True + fps = [f'{hsds_dir}/{fn}' for fn in f + if fnmatch(f'{hsds_dir}/{fn}', res_file)] + if not any(fps): + msg = ('{} is not a valid HSDS file path!' + .format(res_file)) + raise FileNotFoundError(msg) + elif len(fps) > 1: + multi_h5_res = True return multi_h5_res, hsds @@ -572,6 +605,7 @@ class Retry: """ Retry Decorator to run a function multiple times """ + def __init__(self, tries=3, n_sec=1): """ Parameters