From b7583a34f84b427e164e32c3d5d3b76db9e1964c Mon Sep 17 00:00:00 2001 From: Lawrence <33462079+lawrence-mbf@users.noreply.github.com> Date: Thu, 27 Jul 2023 18:39:42 +0000 Subject: [PATCH] Replace and Update Datetime Parser (#525) * add timestamp2datetime function * Replace datenum8601 with internal datetime parser * Revert error id change for correctType function * Explicitly allow datetime cell arrays datetimes cannot be concatenated if their time zone values are different for some reason. * fix datetime logic error * Fix time zone formatting * Fix correctType behavior with cell datetimes * Refactor and fix tests - Slightly shorten some test data. - Fix datetime comparison precision. * Broaden datetime detection from string Includes MATLAB default common times and moves formatting into correctType instead. * FIx missing format call when reading iso8601 * Cover all datetime parse failure error ids * Loosen datetime comparisons in python testing * Fix logic error when comparing matlab datetimes * Fix Datastub loading char matrices New MATLAB versions might return an array of strings from h5load(). This was not handled correctly beforehand and accidentally created char matrices. These now convert string arrays properly to cell arrays of character vectors. --------- Co-authored-by: Lawrence Niu --- +io/mapData2H5.m | 9 +- +io/timestamp2datetime.m | 123 +++++++ +tests/+system/PyNWBIOTest.py | 67 ++-- +tests/+unit/dataStubTest.m | 6 +- +tests/+util/verifyContainerEqual.m | 74 ++--- +types/+untyped/DataStub.m | 2 +- +types/+util/correctType.m | 334 ++++++++------------ external_packages/datenum8601/datenum8601.m | 245 -------------- external_packages/datenum8601/license.txt | 24 -- 9 files changed, 334 insertions(+), 550 deletions(-) create mode 100644 +io/timestamp2datetime.m delete mode 100644 external_packages/datenum8601/datenum8601.m delete mode 100644 external_packages/datenum8601/license.txt diff --git a/+io/mapData2H5.m b/+io/mapData2H5.m index cf66ca94..7aba50ad 100644 --- a/+io/mapData2H5.m +++ b/+io/mapData2H5.m @@ -7,9 +7,12 @@ forceChunked = any(strcmp('forceChunking', varargin)); if iscell(data) - assert(iscellstr(data), 'NWB:MapData:NonCellStr', ['Cell arrays must be ' ... - 'cell arrays of character vectors. Cell arrays of other types are ' ... - 'not supported.']); + assert(... + iscellstr(data) ... + || all(cellfun('isclass', data, 'datetime')) ... + || all(cellfun('isclass', data, 'string')) ... + , 'NWB:MapData:NonCellStr', ['Cell arrays must be cell arrays of character vectors. ' ... + 'Cell arrays of other types are not supported.']); end tid = io.getBaseType(class(data)); diff --git a/+io/timestamp2datetime.m b/+io/timestamp2datetime.m new file mode 100644 index 00000000..a736f7cc --- /dev/null +++ b/+io/timestamp2datetime.m @@ -0,0 +1,123 @@ +function Datetimes = timestamp2datetime(timestamps) + %TIMESTAMP2DATETIME converts string timestamps to MATLAB datetime object + + timestamps = timestamp2cellstr(timestamps); + for iTimestamp = 1:length(timestamps) + timestampString = timestamps{iTimestamp}; + try + Datetime = datetime(timestampString); + catch ME + unrecognizedStringId = { ... + 'MATLAB:datetime:UnrecognizedDateStringSuggestLocale' ... + , 'MATLAB:datetime:UnrecognizedDateStringsSuggestLocale' ... + }; % missing plural strings lmao + if any(strcmp(ME.identifier, unrecognizedStringId)) + Datetime = detectDatetime(timestampString); + else + rethrow(ME); + end + end + Datetimes(iTimestamp) = Datetime; + end +end + +function Datetime = detectDatetime(timestamp) + errorId = 'NWB:InvalidTimestamp'; + errorTemplate = sprintf('Timestamp `%s` is not a valid ISO8601 subset for NWB:\n %%s', timestamp); + Datetime = datetime(0, 0, 0, 0, 0, 0, 0); + %% YMoD + hmsStart = find(timestamp == 'T', 1); + if isempty(hmsStart) + ymdStamp = timestamp; + else + ymdStamp = extractBefore(timestamp, hmsStart); + end + errorMessage = sprintf(errorTemplate, 'YMD should be in the form YYYY-mm-dd or YYYYmmdd'); + if contains(ymdStamp, '-') + assert(length(ymdStamp) == 10, errorId, errorMessage); + YmdToken = struct(... + 'Year', ymdStamp(1:4) ... + , 'Month', ymdStamp(6:7) ... + , 'Day', ymdStamp(9:10) ... + ); + else + assert(length(ymdStamp) == 8, errorId, errorMessage); + YmdToken = struct(... + 'Year', ymdStamp(1:4) ... + , 'Month', ymdStamp(5:6) ... + , 'Day', ymdStamp(7:8) ... + ); + end + Datetime.Year = str2double(YmdToken.Year); + Datetime.Month = str2double(YmdToken.Month); + Datetime.Day = str2double(YmdToken.Day); + assert(~isnat(Datetime), errorId, sprintf(errorTemplate, 'non-numeric YMD values detected')); + + %% HMiS TZ + if isempty(hmsStart) + return; + end + afterDateStamp = extractAfter(timestamp, 'T'); % needs to do this so we don't have wrong '-' checks. + timeZoneStart = find(afterDateStamp == 'Z' | afterDateStamp == '+' | afterDateStamp == '-', 1); + if isempty(timeZoneStart) + hmsStamp = afterDateStamp; + else + hmsStamp = extractBefore(afterDateStamp, timeZoneStart); + end + errorMessage = sprintf(errorTemplate ... + , 'H:m:s should be in the form HH:mm:ss.ssssss or HHmmss.ssssss'); + if contains(hmsStamp, ':') + % note, seconds must be at least 2 digits + assert(length(hmsStamp) >= 8, errorId, errorMessage); + HmsToken = struct(... + 'Hour', hmsStamp(1:2) ... + , 'Minute', hmsStamp(4:5) ... + , 'Second', hmsStamp(7:end) ... + ); + else + assert(length(hmsStamp) >= 6, errorId, errorMessage); + HmsToken = struct(... + 'Hour', hmsStamp(1:2) ... + , 'Minute', hmsStamp(3:4) ... + , 'Second', hmsStamp(5:end) ... + ); + end + Datetime.Hour = str2double(HmsToken.Hour); + Datetime.Minute = str2double(HmsToken.Minute); + Datetime.Second = str2double(HmsToken.Second); + assert(~isnat(Datetime), errorId, sprintf(errorTemplate, 'non-numeric H:m:s values detected')); + + %% TimeZone + if isempty(timeZoneStart) + return; + end + timeZoneStamp = afterDateStamp(timeZoneStart:end); + try + Datetime.TimeZone = timeZoneStamp; + catch ME + Cause = MException(errorId ... + , sprintf(errorTemplate, sprintf('invalid time zone `%s` provided', timeZoneStamp))); + addCause(ME, Cause); + throwAsCaller(ME); + end +end + +function cells = timestamp2cellstr(timestamps) + if isstring(timestamps) + cells = num2cell(timestamps); + for iString = 1:length(cells) + cells{iString} = char(cells{iString}); + end + elseif iscell(timestamps) + cells = cell(size(timestamps)); + for iElement = 1:length(timestamps) + cells(iElement) = timestamp2cellstr(timestamps{iElement}); + end + elseif ischar(timestamps) + cells = {timestamps}; + else + error(['timestamps must be a ' ... + , 'string, character array, or cell array of strings/character arrays.']); + end +end + diff --git a/+tests/+system/PyNWBIOTest.py b/+tests/+system/PyNWBIOTest.py index 2962409d..212a6b79 100644 --- a/+tests/+system/PyNWBIOTest.py +++ b/+tests/+system/PyNWBIOTest.py @@ -1,5 +1,5 @@ import unittest -from datetime import datetime +from datetime import datetime, timedelta import os.path import numpy as np from dateutil.tz import tzlocal, tzoffset @@ -64,47 +64,48 @@ def assertContainerEqual(self, container1, container2): # noqa: C901 container_fields = container1.__fields__ for nwbfield in container_fields: with self.subTest(nwbfield=nwbfield, container_type=type1.__name__): - f1 = getattr(container1, nwbfield) - f2 = getattr(container2, nwbfield) - if isinstance(f1, (tuple, list, np.ndarray)): - if len(f1) > 0: - if isinstance(f1[0], Container): - for sub1, sub2 in zip(f1, f2): + field1 = getattr(container1, nwbfield) + field2 = getattr(container2, nwbfield) + if isinstance(field1, (tuple, list, np.ndarray)): + if len(field1) > 0: + if isinstance(field1[0], Container): + for sub1, sub2 in zip(field1, field2): self.assertContainerEqual(sub1, sub2) - elif isinstance(f1[0], Data): - for sub1, sub2 in zip(f1, f2): + elif isinstance(field1[0], Data): + for sub1, sub2 in zip(field1, field2): self.assertDataEqual(sub1, sub2) continue else: - self.assertEqual(len(f1), len(f2)) - if len(f1) == 0: + self.assertEqual(len(field1), len(field2)) + if len(field1) == 0: continue - if isinstance(f1[0], float): - for v1, v2 in zip(f1, f2): + if isinstance(field1[0], float): + for v1, v2 in zip(field1, field2): self.assertAlmostEqual(v1, v2, places=6) else: - self.assertTrue(np.array_equal(f1, f2)) - elif isinstance(f1, dict) and len(f1) and isinstance(next(iter(f1.values())), Container): - f1_keys = set(f1.keys()) - f2_keys = set(f2.keys()) - self.assertSetEqual(f1_keys, f2_keys) - for k in f1_keys: + self.assertTrue(np.array_equal(field1, field2)) + elif isinstance(field1, dict) and len(field1) and isinstance(next(iter(field1.values())), Container): + field1_keys = set(field1.keys()) + field2_keys = set(field2.keys()) + self.assertSetEqual(field1_keys, field2_keys) + for k in field1_keys: with self.subTest(module_name=k): - self.assertContainerEqual(f1[k], f2[k]) - elif isinstance(f1, Container) or isinstance(f1, Container): - self.assertContainerEqual(f1, f2) - elif isinstance(f1, Data) or isinstance(f2, Data): - if isinstance(f1, Data) and isinstance(f2, Data): - self.assertDataEqual(f1, f2) - elif isinstance(f1, Data): - self.assertTrue(np.array_equal(f1.data, f2)) - elif isinstance(f2, Data): - self.assertTrue(np.array_equal(f1.data, f2)) + self.assertContainerEqual(field1[k], field2[k]) + elif isinstance(field1, Container) or isinstance(field1, Container): + self.assertContainerEqual(field1, field2) + elif isinstance(field1, Data) and isinstance(field2, Data): + self.assertDataEqual(field1, field2) + elif isinstance(field1, Data) or isinstance(field2, Data): + self.assertTrue(np.array_equal(field1.data, field2)) + elif isinstance(field1, (float, np.float32, np.float16, h5py.Dataset)): + npt.assert_almost_equal(field1, field2) + elif isinstance(field1, datetime): + self.assertTrue(isinstance(field2, datetime)) + field1_upper = field1 + timedelta(milliseconds = 1) + field1_lower = field1 - timedelta(milliseconds = 1) + self.assertTrue(field2 >= field1_lower and field2 <= field1_upper) else: - if isinstance(f1, (float, np.float32, np.float16, h5py.Dataset)): - npt.assert_almost_equal(f1, f2) - else: - self.assertEqual(f1, f2) + self.assertEqual(field1, field2) def assertDataEqual(self, data1, data2): self.assertEqual(type(data1), type(data2)) diff --git a/+tests/+unit/dataStubTest.m b/+tests/+unit/dataStubTest.m index 0070e0ba..8bf01384 100644 --- a/+tests/+unit/dataStubTest.m +++ b/+tests/+unit/dataStubTest.m @@ -14,13 +14,11 @@ function setup(testCase) end function testRegionRead(testCase) -date = datetime(2018, 3, 1, 12, 0, 0); -session_start_time = datetime(date,'Format','yyyy-MM-dd''T''HH:mm:SSZZ',... - 'TimeZone','local'); + nwb = NwbFile(... 'session_description', 'a test NWB File', ... 'identifier', 'mouse004_day4', ... - 'session_start_time', session_start_time); + 'session_start_time', datetime(2018, 3, 1, 12, 0, 0, 'TimeZone', 'local')); data = reshape(1:5000, 25, 5, 4, 2, 5); diff --git a/+tests/+util/verifyContainerEqual.m b/+tests/+util/verifyContainerEqual.m index 1bf3179f..8dd722c4 100644 --- a/+tests/+util/verifyContainerEqual.m +++ b/+tests/+util/verifyContainerEqual.m @@ -11,54 +11,56 @@ function verifyContainerEqual(testCase, actual, expected, ignoreList) for i = 1:numel(props) prop = props{i}; - actualVal = actual.(prop); - expectedVal = expected.(prop); - failmsg = ['Values for property ''' prop ''' are not equal']; + actualValue = actual.(prop); + expectedValue = expected.(prop); + failureMessage = ['Values for property ''' prop ''' are not equal']; - if isa(actualVal, 'types.untyped.DataStub') - actualVal = actualVal.load(); + if isa(actualValue, 'types.untyped.DataStub') + actualValue = actualValue.load(); end - if startsWith(class(expectedVal), 'types.') && ~startsWith(class(expectedVal), 'types.untyped') - tests.util.verifyContainerEqual(testCase, actualVal, expectedVal); - elseif isa(expectedVal, 'types.untyped.Set') - tests.util.verifySetEqual(testCase, actualVal, expectedVal, failmsg); - elseif ischar(expectedVal) - testCase.verifyEqual(char(actualVal), expectedVal, failmsg); - elseif isa(expectedVal, 'types.untyped.ObjectView') || isa(expectedVal, 'types.untyped.SoftLink') - testCase.verifyEqual(actualVal.path, expectedVal.path, failmsg); - elseif isa(expectedVal, 'types.untyped.RegionView') - testCase.verifyEqual(actualVal.path, expectedVal.path, failmsg); - testCase.verifyEqual(actualVal.region, expectedVal.region, failmsg); - elseif isa(expectedVal, 'types.untyped.Anon') - testCase.verifyEqual(actualVal.name, expectedVal.name, failmsg); - tests.util.verifyContainerEqual(testCase, actualVal.value, expectedVal.value); - elseif isdatetime(expectedVal)... - || (iscell(expectedVal) && all(cellfun('isclass', expectedVal, 'datetime'))) + if startsWith(class(expectedValue), 'types.') && ~startsWith(class(expectedValue), 'types.untyped') + tests.util.verifyContainerEqual(testCase, actualValue, expectedValue); + elseif isa(expectedValue, 'types.untyped.Set') + tests.util.verifySetEqual(testCase, actualValue, expectedValue, failureMessage); + elseif ischar(expectedValue) + testCase.verifyEqual(char(actualValue), expectedValue, failureMessage); + elseif isa(expectedValue, 'types.untyped.ObjectView') || isa(expectedValue, 'types.untyped.SoftLink') + testCase.verifyEqual(actualValue.path, expectedValue.path, failureMessage); + elseif isa(expectedValue, 'types.untyped.RegionView') + testCase.verifyEqual(actualValue.path, expectedValue.path, failureMessage); + testCase.verifyEqual(actualValue.region, expectedValue.region, failureMessage); + elseif isa(expectedValue, 'types.untyped.Anon') + testCase.verifyEqual(actualValue.name, expectedValue.name, failureMessage); + tests.util.verifyContainerEqual(testCase, actualValue.value, expectedValue.value); + elseif isdatetime(expectedValue)... + || (iscell(expectedValue) && all(cellfun('isclass', expectedValue, 'datetime'))) % linux MATLAB doesn't appear to propery compare datetimes whereas % Windows MATLAB does. This is a workaround to get tests to work % while getting close enough to exact date representation. - actualVal = types.util.checkDtype(prop, 'datetime', actualVal); - if ~iscell(expectedVal) - expectedVal = {expectedVal}; + actualValue = types.util.checkDtype(prop, 'datetime', actualValue); + if ~iscell(expectedValue) + expectedValue = num2cell(expectedValue); end - if ~iscell(actualVal) - actualVal = {actualVal}; + if ~iscell(actualValue) + actualValue = num2cell(actualValue); end - for iDates = 1:length(expectedVal) + for iDates = 1:length(expectedValue) % ignore microseconds as linux datetime has some strange error % even when datetime doesn't change in Windows. - actualNtfs = convertTo(actualVal{iDates}, 'ntfs'); - expectedNtfs = convertTo(expectedVal{iDates}, 'ntfs'); - testCase.verifyGreaterThanOrEqual(actualNtfs, expectedNtfs - 10, failmsg); - testCase.verifyLessThanOrEqual(actualNtfs, expectedNtfs + 10, failmsg); + ActualDate = actualValue{iDates}; + ExpectedDate = expectedValue{iDates}; + ExpectedUpperBound = ExpectedDate + milliseconds(1); + ExpectedLowerBound = ExpectedDate - milliseconds(1); + testCase.verifyTrue(isbetween(ActualDate, ExpectedLowerBound, ExpectedUpperBound) ... + , failureMessage); end - elseif startsWith(class(expectedVal), 'int') - testCase.verifyEqual(int64(actualVal), int64(expectedVal), failmsg); - elseif startsWith(class(expectedVal), 'uint') - testCase.verifyEqual(uint64(actualVal), uint64(expectedVal), failmsg); + elseif startsWith(class(expectedValue), 'int') + testCase.verifyEqual(int64(actualValue), int64(expectedValue), failureMessage); + elseif startsWith(class(expectedValue), 'uint') + testCase.verifyEqual(uint64(actualValue), uint64(expectedValue), failureMessage); else - testCase.verifyEqual(actualVal, expectedVal, failmsg); + testCase.verifyEqual(actualValue, expectedValue, failureMessage); end end end \ No newline at end of file diff --git a/+types/+untyped/DataStub.m b/+types/+untyped/DataStub.m index ef66c6f5..06eacfd5 100644 --- a/+types/+untyped/DataStub.m +++ b/+types/+untyped/DataStub.m @@ -88,7 +88,7 @@ if iscellstr(data) && isscalar(data) data = data{1}; elseif isstring(data) - data = char(data); + data = convertStringsToChars(data); end case 'logical' % data assumed to be cell array of enum string diff --git a/+types/+util/correctType.m b/+types/+util/correctType.m index ce216757..1776274e 100644 --- a/+types/+util/correctType.m +++ b/+types/+util/correctType.m @@ -1,215 +1,141 @@ function val = correctType(val, type) -%CORRECTTYPE -% Will error if type is simply incompatible -% Will throw if casting to primitive type "type" is impossible - -invalidConversionErrorId = 'NWB:TypeCorrection:InvalidConversion'; -invalidConversionErrorMessage = sprintf( ... - 'Value of type `%s` cannot be converted to type `%s`.', class(val), type); - -switch type - case 'char' - assert(isstring(val) || ischar(val) || iscellstr(val), ... - invalidConversionErrorId, ... - invalidConversionErrorMessage); - case 'datetime' - isHeterogeneousCell = iscell(val) ... - && all(... - cellfun('isclass', val, 'char') ... - | cellfun('isclass', val, 'string')... - | cellfun('isclass', val, 'datetime')); - assert(ischar(val)... - || isdatetime(val) ... - || isstring(val) ... - || isHeterogeneousCell, ... - invalidConversionErrorId, invalidConversionErrorMessage); - - % convert strings to datetimes - if ischar(val) || isstring(val) || iscell(val) - val = str2dates(val); - end - - % coerce time zone and specific output format. - val.TimeZone = 'local'; - val.Format = 'yyyy-MM-dd''T''HH:mm:ss.SSSSSSZZZZZ'; - case {'single', 'double', 'int64', 'int32', 'int16', 'int8', 'uint64', ... - 'uint32', 'uint16', 'uint8'} - assert(ischar(val) ... - || iscellstr(val) ... - || isstring(val) ... - || isnumeric(val)); - - if ischar(val) || iscellstr(val) || isstring(val) - val = str2double(val); - end - - % find nearest type and convert if necessary. - nearestType = findNearestType(val, type); - if ~strcmp(nearestType, class(val)) - castedValue = cast(val, nearestType); - assert(isequal(castedValue, val), ... - 'NWB:TypeCorrection:PrecisionLossDetected', ... - ['Could not convert data value of type `%s` to type `%s`. ' ... - 'Precision loss detected.'], ... - class(val), type); - val = castedValue; - end - case 'logical' - val = logical(val); - otherwise % type may refer to an object or even a link - assert(isa(val, type), ... - invalidConversionErrorId, ... - invalidConversionErrorMessage); -end -end - -function nearestType = findNearestType(val, type) -%FINDNEARESTTYPE given a value of some type. Find the nearest equivalent -%type whose size matches that of the preferred type but can still hold the -%stored value. - -dataLossWarnId = 'NWB:TypeCorrection:DataLoss'; -dataLossWarnMessageFormat = ['Converting value of type `%s` to type ' ... - '`%s` may drop data precision.']; - -if ~isreal(val) - warning(dataLossWarnId, dataLossWarnMessageFormat, ... - class(val), type); - val = real(val); -end - -if strcmp(type, 'numeric') || strcmp(class(val), type) - nearestType = class(val); - return; -end - -isTypeFloat = any(strcmp(type, {'single', 'double'})); -isTypeUnsigned = ~isTypeFloat && startsWith(type, 'u'); -isValueTypeUnsigned = ~isfloat(val) && startsWith(class(val), 'u'); - -valueTypeBitSize = 8 * io.getMatTypeSize(class(val)); -preferredTypeBitSize = 8 * io.getMatTypeSize(type); -idealTypeBitSize = max(valueTypeBitSize, preferredTypeBitSize); - -% In certain classes of conversion, simply scaling upwards in size resolves -% what would otherwise be an error in conversion. For instance: conversion -% from an "int32" type should be stored in a "double" because a "single" -% cannot contain all "int32" values (due to the mantissa). A similar case -% exists when converting from unsigned types to signed types ("uint32" -> -% "int32" should actually return "int64" as it ideal type). -if valueTypeBitSize == idealTypeBitSize ... - && ((isValueTypeUnsigned && ~isTypeUnsigned) ... - || (~isfloat(val) && isTypeFloat)) - idealTypeBitSize = min(64, 2 * idealTypeBitSize); -end - -if isTypeFloat - if 64 == idealTypeBitSize - nearestType = 'double'; - else - nearestType = 'single'; - end -else - if isTypeUnsigned - typePrefix = 'uint'; - else - typePrefix = 'int'; + %CORRECTTYPE + % Will error if type is simply incompatible + % Will throw if casting to primitive type "type" is impossible + + errorId = 'NWB:TypeCorrection:InvalidConversion'; + errorTemplate = sprintf( ... + 'Value of type `%s` cannot be converted to type `%s`:\n %%s', class(val), type ... + ); + + switch type + case 'char' + errorMessage = sprintf(errorTemplate, ... + sprintf('value was not a valid string type. got %s instead', class(val)) ... + ); + assert(isstring(val) || ischar(val) || iscellstr(val), ... + errorId, ... + errorMessage ... + ); + case 'datetime' + isCellString = iscellstr(val) || (iscell(val) && all(cellfun('isclass', val, 'string'))); + isCellDatetime = iscell(val) && all(cellfun('isclass', val, 'datetime')); + isHeterogeneousCell = isCellString || isCellDatetime; + assert(ischar(val) || isdatetime(val) || isstring(val) || isHeterogeneousCell, ... + errorId, sprintf(errorTemplate, 'value is not a timestamp or datetime object')); + + % convert strings to datetimes + if ischar(val) || isstring(val) || isCellString + val = formatDatetime(io.timestamp2datetime(val)); + return; + end + if isdatetime(val) + val = num2cell(val); + end + + % set format depending on default values. + for iDatetime = 1:length(val) + % note, must be a for loop since datetimes with/without timezones cannot be + % concatenated. + val{iDatetime} = formatDatetime(val{iDatetime}); + end + case {'single', 'double', 'int64', 'int32', 'int16', 'int8', 'uint64', ... + 'uint32', 'uint16', 'uint8'} + errorMessage = sprintf(errorTemplate ... + , sprintf('type %s is not numeric or cannot be converted to a numeric value.', class(type)) ... + ); + assert(ischar(val) || iscellstr(val) || isstring(val) || isnumeric(val) ... + , errorId, errorMessage ... + ); + + if ischar(val) || iscellstr(val) || isstring(val) + val = str2double(val); + end + + % find nearest type and convert if necessary. + nearestType = findNearestType(val, type); + if ~strcmp(nearestType, class(val)) + castedValue = cast(val, nearestType); + assert(isequal(castedValue, val), ... + 'NWB:TypeCorrection:PrecisionLossDetected', ... + ['Could not convert data value of type `%s` to type `%s`. ' ... + 'Precision loss detected.'], ... + class(val), type ... + ); + val = castedValue; + end + case 'logical' + val = logical(val); + otherwise % type may refer to an object or even a link + errorMessage = sprintf(errorTemplate ... + , sprintf('value is not instance of type %s. Got type %s instead', type, class(val))); + assert(isa(val, type), errorId, errorMessage); end - nearestType = sprintf('%s%d', typePrefix, idealTypeBitSize); -end -end - -function dt = str2dates(strings) -%STR2DATES converts a string array, character matrix, or cell array of -% convertible types to a formatted date vector. Assumes type is one of the -% above. - -if ischar(strings) - % split character matrix by row. - strings = mat2cell(strings, ones(1, size(strings,1))); -elseif isstring(strings) - strings = num2cell(strings); end -datevals = cell(size(strings)); -for i = 1:length(strings) - if isdatetime(strings{i}) - datevals{i} = strings{i}; +function Datetime = formatDatetime(Datetime) + if all(0 == [Datetime.Hour] & 0 == [Datetime.Minute] & 0 == [Datetime.Second]) + formatString = 'yyyy-MM-dd'; + elseif all(cellfun('isempty', {Datetime.TimeZone})) + formatString = 'yyyy-MM-dd''T''HH:mm:ss.SSSSSS'; else - datevals{i} = datetime8601(strtrim(strings{i})); + formatString = 'yyyy-MM-dd''T''HH:mm:ss.SSSSSSZZZZZ'; end -end -dt = [datevals{:}]; -end - -function dt = datetime8601(datestr) -addpath(fullfile(fileparts(which('NwbFile')), 'external_packages', 'datenum8601')); -[~, ~, format] = datenum8601(datestr); -format = format{1}; -has_delimiters = format(1) == '*'; -if has_delimiters - format = format(2:end); + [Datetime.Format] = formatString; end -assert(strncmp(format, 'ymd', 3),... - 'NWB:CheckDType:DateTime:Unsupported8601',... - 'non-ymd formats not supported.'); -separator = format(4); -if separator ~= ' ' - % non-space digits will error when specifying import format - separator = ['''' separator '''']; -end - -has_fractional_sec = isstrprop(format(8:end), 'digit'); -if has_fractional_sec - seconds_precision = str2double(format(8:end)); - if seconds_precision > 9 - warning('NWB:CheckDType:DateTime:LossySeconds',... - ['Potential loss of time data detected. MATLAB fractional seconds '... - 'precision is limited to 1 ns. Extra precision will be truncated.']); +function nearestType = findNearestType(val, type) + %FINDNEARESTTYPE given a value of some type. Find the nearest equivalent + %type whose size matches that of the preferred type but can still hold the + %stored value. + + dataLossWarnId = 'NWB:TypeCorrection:DataLoss'; + dataLossWarnMessageFormat = ['Converting value of type `%s` to type ' ... + '`%s` may drop data precision.']; + + if ~isreal(val) + warning(dataLossWarnId, dataLossWarnMessageFormat, ... + class(val), type); + val = real(val); end -end -day_segments = {'yyyy', 'MM', 'dd'}; -time_segments = {'HH', 'mm', 'ss'}; - -if has_delimiters - day_delimiter = '-'; - time_delimiter = ':'; -else - day_delimiter = ''; - time_delimiter = ''; -end - -day_format = strjoin(day_segments, day_delimiter); -time_format = strjoin(time_segments, time_delimiter); -format = [day_format separator time_format]; -if has_fractional_sec - format = sprintf('%s.%s', format, repmat('S', 1, seconds_precision)); -end - -[datestr, timezone] = derive_timezone(datestr); -dt = datetime(datestr,... - 'InputFormat', format,... - 'TimeZone', timezone); -end - -function [datestr, timezone] = derive_timezone(datestr) -% one of: -% +-hh:mm -% +-hhmm -% +-hh -% Z - -tzre_pattern = '(?:[+-]\d{2}(?::?\d{2})?|Z)$'; -tzre_match = regexp(datestr, tzre_pattern, 'once'); - -if isempty(tzre_match) - timezone = 'local'; -else - timezone = datestr(tzre_match:end); - if strcmp(timezone, 'Z') - timezone = 'UTC'; + + if strcmp(type, 'numeric') || strcmp(class(val), type) + nearestType = class(val); + return; + end + + isTypeFloat = any(strcmp(type, {'single', 'double'})); + isTypeUnsigned = ~isTypeFloat && startsWith(type, 'u'); + isValueTypeUnsigned = ~isfloat(val) && startsWith(class(val), 'u'); + + valueTypeBitSize = 8 * io.getMatTypeSize(class(val)); + preferredTypeBitSize = 8 * io.getMatTypeSize(type); + idealTypeBitSize = max(valueTypeBitSize, preferredTypeBitSize); + + % In certain classes of conversion, simply scaling upwards in size resolves + % what would otherwise be an error in conversion. For instance: conversion + % from an "int32" type should be stored in a "double" because a "single" + % cannot contain all "int32" values (due to the mantissa). A similar case + % exists when converting from unsigned types to signed types ("uint32" -> + % "int32" should actually return "int64" as it ideal type). + if valueTypeBitSize == idealTypeBitSize ... + && ((isValueTypeUnsigned && ~isTypeUnsigned) ... + || (~isfloat(val) && isTypeFloat)) + idealTypeBitSize = min(64, 2 * idealTypeBitSize); + end + + if isTypeFloat + if 64 == idealTypeBitSize + nearestType = 'double'; + else + nearestType = 'single'; + end + else + if isTypeUnsigned + typePrefix = 'uint'; + else + typePrefix = 'int'; + end + nearestType = sprintf('%s%d', typePrefix, idealTypeBitSize); end - datestr = datestr(1:(tzre_match - 1)); -end end diff --git a/external_packages/datenum8601/datenum8601.m b/external_packages/datenum8601/datenum8601.m deleted file mode 100644 index 49e3a98c..00000000 --- a/external_packages/datenum8601/datenum8601.m +++ /dev/null @@ -1,245 +0,0 @@ -function [DtN,Spl,TkC] = datenum8601(Str,Tok) -% Convert an ISO 8601 formatted Date String (timestamp) to a Serial Date Number. -% -% (c) 2015 Stephen Cobeldick -% -% ### Function ### -% -% Syntax: -% DtN = datenum8601(Str) -% DtN = datenum8601(Str,Tok) -% [DtN,Spl,TkC] = datenum8601(...) -% -% By default the function automatically detects all ISO 8601 timestamp/s in -% the string, or use a token to restrict detection to only one particular style. -% -% The ISO 8601 timestamp style options are: -% - Date in calendar, ordinal or week-numbering notation. -% - Basic or extended format. -% - Choice of date-time separator character ( @T_). -% - Full or lower precision (trailing units omitted) -% - Decimal fraction of the trailing unit. -% These style options are illustrated in the tables below. -% -% The function returns the Serial Date Numbers of the date and time given -% by the ISO 8601 style timestamp/s, the input string parts that are split -% by the detected timestamps (i.e. the substrings not part of any ISO 8601 -% timestamp), and string token/s that define the detected timestamp style/s. -% -% Note 1: Calls undocumented MATLAB function "datenummx". -% Note 2: Unspecified month/date/week/day timestamp values default to one (1). -% Note 3: Unspecified hour/minute/second timestamp values default to zero (0). -% Note 4: Auto-detection mode also parses mixed basic/extended timestamps. -% -% See also DATESTR8601 DATEROUND CLOCK NOW DATENUM DATEVEC DATESTR NATSORT NATSORTROWS NATSORTFILES -% -% ### Examples ### -% -% Examples use the date+time described by the vector [1999,1,3,15,6,48.0568]. -% -% datenum8601('1999-01-03 15:06:48.0568') -% ans = 730123.62972287962 -% -% datenum8601('1999003T150648.0568') -% ans = 730123.62972287962 -% -% datenum8601('1998W537_150648.0568') -% ans = 730123.62972287962 -% -% [DtN,Spl,TkC] = datenum8601('A19990103B1999-003C1998-W53-7D') -% DtN = [730123,730123,730123] -% Spl = {'A','B','C','D'} -% TkC = {'ymd','*yn','*YWD'} -% -% [DtN,Spl,TkC] = datenum8601('1999-003T15') -% DtN = 730123.6250 -% Spl = {'',''} -% TkC = {'*ynTH'} -% -% [DtN,Spl,TkC] = datenum8601('1999-01-03T15','*ymd') -% DtN = 730123.0000 -% Spl = {'','T15'} -% TkC = {'*ymd'} -% -% ### ISO 8601 Timestamps ### -% -% The token consists of one letter for each of the consecutive date/time -% units in the timestamp, thus it defines the date notation (calendar, -% ordinal or week-date) and selects either basic or extended format: -% -% Input | Basic Format | Extended Format (token prefix '*') -% Date | In/Out | Input Timestamp | In/Out | Input Timestamp -% Notation:| : | Example: | : | Example: -% =========|========|=================|=========|=========================== -% Calendar |'ymdHMS'|'19990103T150648'|'*ymdHMS'|'1999-01-03T15:06:48' -% ---------|--------|-----------------|---------|--------------------------- -% Ordinal |'ynHMS' |'1999003T150648' |'*ynHMS' |'1999-003T15:06:48' -% ---------|--------|-----------------|---------|--------------------------- -% Week |'YWDHMS'|'1998W537T150648'|'*YWDHMS'|'1998-W53-7T15:06:48' -% ---------|--------|-----------------|---------|--------------------------- -% -% Options for reduced precision timestamps, non-standard date-time separator -% character, and the addition of a decimal fraction of the trailing unit: -% -% Omit trailing units (reduced precision), eg: | Output->Vector: -% =========|========|=================|=========|=================|===================== -% |'Y' |'1999W' |'*Y' |'1999-W' |[1999,1,4,0,0,0] -% ---------|--------|-----------------|---------|-----------------|--------------------- -% |'ymdH' |'19990103T15' |'*ymdH' |'1999-01-03T15' |[1999,1,3,15,0,0] -% ---------|--------|-----------------|---------|-----------------|--------------------- -% Select the date-time separator character (one of ' ','@','T','_'), eg: -% =========|========|=================|=========|=================|===================== -% |'yn_HM' |'1999003_1506' |'*yn_HM' |'1999-003_15:06' |[1999,1,3,15,6,0] -% ---------|--------|-----------------|---------|-----------------|--------------------- -% |'YWD@H' |'1998W537@15' |'*YWD@H' |'1998-W53-7@15' |[1999,1,3,15,0,0] -% ---------|--------|-----------------|---------|-----------------|--------------------- -% Decimal fraction of trailing date/time value, eg: -% =========|========|=================|=========|=================|===================== -% |'ynH3' |'1999003T15.113' |'*ynH3' |'1999-003T15.113'|[1999,1,3,15,6,46.80] -% ---------|--------|-----------------|---------|-----------------|--------------------- -% |'YWD4' |'1998W537.6297' |'*YWD4' |'1998-W53-7.6297'|[1999,1,3,15,6,46.08] -% ---------|--------|-----------------|---------|-----------------|--------------------- -% |'y10' |'1999.0072047202'|'*y10' |'1999.0072047202'|[1999,1,3,15,6,48.06] -% ---------|--------|-----------------|---------|-----------------|--------------------- -% -% Note 5: This function does not check for ISO 8601 compliance: user beware! -% Note 6: Date-time separator character must be one of ' ','@','T','_'. -% Note 7: Date notations cannot be combined: note upper/lower case characters. -% -% ### Input & Output Arguments ### -% -% Inputs (*default): -% Str = DateString, possibly containing one or more ISO 8601 dates/timestamps. -% Tok = String, token to select the required date notation and format (*[]=any). -% -% Outputs: -% DtN = NumericVector of Serial Date Numbers, one from each timestamp in input . -% Spl = CellOfStrings, the strings before, between and after the detected timestamps. -% TkC = CellOfStrings, tokens of each timestamp notation and format (see tables). -% -% [DtN,Spl,TkC] = datenum8601(Str,*Tok) - -% Define "regexp" match string: -if nargin<2 || isempty(Tok) - % Automagically detect timestamp style. - MtE = [... - '(\d{4})',... % year - '((-(?=(\d{2,3}|W)))?)',... % - - '(W?)',... % W - '(?(3)(\d{2})?|(\d{2}(?=($|\D|\d{2})))?)',... % week/month - '(?(4)(-(?=(?(3)\d|\d{2})))?)',... % - - '(?(4)(?(3)\d|\d{2})?|(\d{3})?)',... % day of week/month/year - '(?(6)([ @T_](?=\d{2}))?)',... % date-time separator character - '(?(7)(\d{2})?)',... % hour - '(?(8)(:(?=\d{2}))?)',... % : - '(?(8)(\d{2})?)',... % minute - '(?(10)(:(?=\d{2}))?)',... % : - '(?(10)(\d{2})?)',... % second - '((\.\d+)?)']; % trailing unit decimal fraction - % (Note: allows a mix of basic/extended formats) -else - % User requests a specific timestamp style. - assert(ischar(Tok)&&isrow(Tok),'Second input must be a string.') - TkU = regexp(Tok,'(^\*?)([ymdnYWD]*)([ @T_]?)([HMS]*)(\d*$)','tokens','once'); - assert(~isempty(TkU),'Second input is not supported: ''%s''',Tok) - MtE = [TkU{2},TkU{4}]; - TkL = numel(MtE); - Ntn = find(strncmp(MtE,{'ymdHMS','ynHMS','YWDHMS'},TkL),1,'first'); - assert(~isempty(Ntn),'Second input is not supported: ''%s''',Tok) - MtE = dn8601Usr(TkU,TkL,Ntn); -end -% -assert(ischar(Str)&&size(Str,1)<2,'First input must be a string.') -% -% Extract timestamp tokens, return split strings: -[TkC,Spl] = regexp(Str,MtE,'tokens','split'); -% -[DtN,TkC] = cellfun(@dn8601Main,TkC); -% -end -%----------------------------------------------------------------------END:datenum8601 -function [DtN,Tok] = dn8601Main(TkC) -% Convert detected substrings into serial date number, create string token. -% -% Lengths of matched tokens: -TkL = cellfun('length',TkC); -% Preallocate Date Vector: -DtV = [1,1,1,0,0,0]; -% -% Create token: -Ext = '*'; -Sep = [TkC{7},'HMS']; -TkX = {['ymd',Sep],['y*n',Sep],['YWD',Sep]}; -Ntn = 1+(TkL(6)==3)+2*TkL(3); -Tok = [Ext(1:+any(TkL([2,5,9,11])==1)),TkX{Ntn}(01 - if Ntn==2&&m==2 % Month (special case not converted by "datenummx"): - DtV(3) = 1+sscanf(TkC{13},'%f')*(datenummx(DtV+[0,1,0,0,0,0])-datenummx(DtV)); - else % All other date or time values (are converted by "datenummx"): - DtV(m) = DtV(m)+sscanf(TkC{13},'%f'); - end - Tok = {[Tok,sprintf('%.0f',TkL(13)-1)]}; -else - Tok = {Tok}; -end -% -% Week-numbering vector to ordinal vector: -if Ntn==3 - DtV(3) = DtV(3)+7*DtV(2)-4-mod(datenummx([DtV(1),1,1]),7); - DtV(2) = 1; -end -% Convert out-of-range Date Vector to Serial Date Number: -DtN = datenummx(DtV) - 31*(0==DtV(2)); -% (Month zero is a special case not converted by "datenummx") -% -end -%----------------------------------------------------------------------END:dn8601Main -function MtE = dn8601Usr(TkU,TkL,Ntn) -% Create "regexp" string from user input token. -% -% Decimal fraction: -if isempty(TkU{5}) - MtE{13} = '()'; -else - MtE{13} = ['(\.\d{',TkU{5},'})']; -end -% Date-time separator character: -if isempty(TkU{3}) - MtE{7} = '(T)'; -else - MtE{7} = ['(',TkU{3},')']; -end -% Year and time tokens (year, hour, minute, second): -MtE([1,8,10,12]) = {'(\d{4})','(\d{2})','(\d{2})','(\d{2})'}; -% Format tokens: -if isempty(TkU{1}) % Basic - MtE([2,5,9,11]) = {'()','()','()','()'}; -else % Extended - MtE([2,5,9,11]) = {'(-)','(-)','(:)','(:)'}; -end -% Date tokens: -switch Ntn - case 1 % Calendar - Idx = [2,5,7,9,11,13]; - MtE([3,4,6]) = {'()', '(\d{2})','(\d{2})'}; - case 2 % Ordinal - Idx = [2,7,9,11,13]; - MtE([3,4,5,6]) = {'()','()','()','(\d{3})'}; - case 3 % Week - Idx = [2,5,7,9,11,13]; - MtE([3,4,6]) = {'(W)','(\d{2})','(\d{1})'}; -end -% -% Concatenate tokens into "regexp" match token: -MtE(Idx(TkL):12) = {'()'}; -MtE = [MtE{:}]; -% -end -%----------------------------------------------------------------------END:dn8601Usr \ No newline at end of file diff --git a/external_packages/datenum8601/license.txt b/external_packages/datenum8601/license.txt deleted file mode 100644 index f7dcb1ce..00000000 --- a/external_packages/datenum8601/license.txt +++ /dev/null @@ -1,24 +0,0 @@ -Copyright (c) 2015, Stephen Cobeldick -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the distribution - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE.