Skip to content

Commit

Permalink
Add error handler and update test script
Browse files Browse the repository at this point in the history
  • Loading branch information
pavlis committed Jan 7, 2025
1 parent 3f22ab6 commit 873beeb
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 8 deletions.
27 changes: 19 additions & 8 deletions python/mspasspy/db/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2580,7 +2580,10 @@ class OriginTimeMatcher(DataFrameCacheMatcher):
this list will need to be changed to remove _id as it in that context
no ObjectID would normally be defined. Be warned, however, that if
used with a normalize function the _id may be required to match a
"source_id" cross reference in a seismic data object.
"source_id" cross reference in a seismic data object. Also note
that the list must contain the key defined by the related
argument "source_time_key" as that is used to match times in
the source data with data start times.
:type attributes_to_load: list of string defining keys in collection
documents
Expand Down Expand Up @@ -2612,17 +2615,19 @@ class OriginTimeMatcher(DataFrameCacheMatcher):
:type require_unique_match: boolean
:param data_time_key: data object Metadata key used to fetch
time for testing as alternative to data start time. If set None
(default) the test will use the start time of an atomic data object
for the time test. If nonzero it is assumed to be a string used
to fetch a time from the data's Metadata container. That is the
best way to run this matcher on Ensembles.
time for testing as alternative to data start time. If set None
(default) the test will use the start time of an atomic data object
for the time test. If nonzero it is assumed to be a string used
to fetch a time from the data's Metadata container. That is the
best way to run this matcher on Ensembles.
:type data_time_key: string
:param source_time_key: dataframe column name to use as source
origin time field. Default is "time"
origin time field. Default is "time". This key must match
a key in the attributes_to_load list or the constructor will
throw an exception.
:type source_time_key: string Can also be a None type which
is causes the internal value to be set to "time"
is causes the internal value to be set to "time"
"""

def __init__(
Expand Down Expand Up @@ -2657,6 +2662,12 @@ def __init__(
self.source_time_key = "time"
else:
self.source_time_key = source_time_key
if self.source_time_key not in attributes_to_load:
message = "OriginTimeMatcher constructor: "
message += "key for fetching origin time=" + self.source_time_key
message += " is not in attributes_to_load list\n"
message += "Required for matching with waveform start times"
raise MsPASSError(message,ErrorSeverity.Fatal)

def subset(self, mspass_object) -> pd.DataFrame:
"""
Expand Down
35 changes: 35 additions & 0 deletions python/tests/db/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,41 @@ def test_OriginTimeMatcher_find_one(self):
ts["testtime"] = 9999.99
db_retdoc = db_matcher.find_one(ts)
assert db_retdoc[0] is None

def test_OriginTimeMatcher_find_doc(self):
"""
Nearly identical code to "find_one" version immediately above but
for the find_doc method that is independently implemented.
There is only a dataframe version of that method though.
TODO: this test does not validate multiple match algorithm
returning minimum time offset as unique match. find_one test needs
a similar test.
"""
cached_matcher = OriginTimeMatcher(
self.db, source_time_key="time", t0offset=522.0
)

orig_doc = self.db.wf_miniseed.find_one(
{"_id": ObjectId("62812b08178bf05fe5787d82")}
)
orig_ts = self.db.read_data(orig_doc, collection="wf_miniseed")

# get document for TimeSeries
ts_1 = TimeSeries(orig_ts)
doc1 = dict(ts_1)
doc2 = dict(ts_1)

retdoc = cached_matcher.find_doc(doc1)
# Failed find returns a none in component 0 so catch that
assert retdoc
assert isinstance(retdoc,dict)

# test failure with unmatched time - should silenetly return None
doc2["time"] = 99999.99
retdoc = cached_matcher.find_doc(doc2)
assert retdoc is None


def test_OriginTimeMatcher_normalize(self):
# t0offset value needed to work with test data set. See above
Expand Down

0 comments on commit 873beeb

Please sign in to comment.