From 827c7a6ea10d46f0bcf40341c6a428cdec355841 Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Tue, 22 Aug 2023 12:20:15 +0200 Subject: [PATCH] Simplify regex - Directly extra radar_code - Remove timezone element and make it part of .* extra - Update documentation --- src/vptstools/s3.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/vptstools/s3.py b/src/vptstools/s3.py index 8187477..b0e90c8 100644 --- a/src/vptstools/s3.py +++ b/src/vptstools/s3.py @@ -88,24 +88,22 @@ def parse_file_name(file_name): Notes ----- - File format is according to the following file format:: + File names are expected to have the following format:: - ccrrr_vp_yyyymmddhhmmss.h5 + radar_type_yyyymmddThhmmextra.h5 - with ``c`` the country code two-letter ids and ``rrr`` - the radar three-letter id, e.g. bejab_vp_20161120235500.h5. - Path information in front of the h5 name itself are ignored. + with ``radar`` the 5-letter radar code, ``type`` the data type, + ``yyyymmdd`` the date and ``hhmm`` the hours and minutes. + ``T`` is optional, ``extra`` is ignored. """ name_regex = re.compile( - r".*([a-zA-Z]{2})([a-zA-Z]{3})_([a-z]*)_(\d\d\d\d)(\d\d)(\d\d)T?" - r"(\d\d)(\d\d)(?:Z|00)?.*\.h5" + r".*([a-zA-Z]{5})_([a-z]*)_(\d\d\d\d)(\d\d)(\d\d)T?(\d\d)(\d\d).*\.h5" ) match = re.match(name_regex, file_name) if match: file_name = Path(file_name).name - country, radar, data_type, year, month, day, hour, minute = match.groups() - radar_code = country + radar + radar_code, data_type, year, month, day, hour, minute = match.groups() return radar_code.lower(), data_type, year, month, day, hour, minute, file_name else: raise ValueError(f"File name {file_name} is not a valid ODIM h5 file.")