Add error handler and update test script

mspass-team · Jan 7, 2025 · 873beeb · 873beeb
1 parent 3f22ab6
commit 873beeb
Show file tree

Hide file tree

Showing 2 changed files with 54 additions and 8 deletions.
diff --git a/python/mspasspy/db/normalize.py b/python/mspasspy/db/normalize.py
@@ -2580,7 +2580,10 @@ class OriginTimeMatcher(DataFrameCacheMatcher):
       this list will need to be changed to remove _id as it in that context
       no ObjectID would normally be defined.  Be warned, however, that if
       used with a normalize function the _id may be required to match a
-      "source_id" cross reference in a seismic data object.
+      "source_id" cross reference in a seismic data object.  Also note 
+      that the list must contain the key defined by the related 
+      argument "source_time_key" as that is used to match times in 
+      the source data with data start times.   
     :type attributes_to_load:  list of string defining keys in collection
       documents
 
@@ -2612,17 +2615,19 @@ class OriginTimeMatcher(DataFrameCacheMatcher):
     :type require_unique_match:  boolean
 
     :param data_time_key:  data object Metadata key used to fetch
-    time for testing as alternative to data start time.  If set None
-    (default) the test will use the start time of an atomic data object
-    for the time test.  If nonzero it is assumed to be a string used
-    to fetch a time from the data's Metadata container.  That is the
-    best way to run this matcher on Ensembles.
+      time for testing as alternative to data start time.  If set None
+      (default) the test will use the start time of an atomic data object
+      for the time test.  If nonzero it is assumed to be a string used
+      to fetch a time from the data's Metadata container.  That is the
+      best way to run this matcher on Ensembles.
     :type data_time_key:  string
 
     :param source_time_key:  dataframe column name to use as source
-    origin time field.  Default is "time"
+      origin time field.  Default is "time".   This key must match 
+      a key in the attributes_to_load list or the constructor will 
+      throw an exception.
     :type source_time_key:  string  Can also be a None type which
-    is causes the internal value to be set to "time"
+      is causes the internal value to be set to "time"
     """
 
     def __init__(
@@ -2657,6 +2662,12 @@ def __init__(
             self.source_time_key = "time"
         else:
             self.source_time_key = source_time_key
+        if self.source_time_key not in attributes_to_load:
+            message = "OriginTimeMatcher constructor:  "
+            message += "key for fetching origin time=" + self.source_time_key
+            message += " is not in attributes_to_load list\n"
+            message += "Required for matching with waveform start times"
+            raise MsPASSError(message,ErrorSeverity.Fatal)
 
     def subset(self, mspass_object) -> pd.DataFrame:
         """ 

diff --git a/python/tests/db/test_normalize.py b/python/tests/db/test_normalize.py
@@ -732,6 +732,41 @@ def test_OriginTimeMatcher_find_one(self):
         ts["testtime"] = 9999.99
         db_retdoc = db_matcher.find_one(ts)
         assert db_retdoc[0] is None
+
+    def test_OriginTimeMatcher_find_doc(self):
+        """
+        Nearly identical code to "find_one" version immediately above but 
+        for the find_doc method that is independently implemented.  
+        There is only a dataframe version of that method though.
+        
+        TODO:  this test does not validate multiple match algorithm 
+        returning minimum time offset as unique match.  find_one test needs 
+        a similar test.
+        """
+        cached_matcher = OriginTimeMatcher(
+            self.db, source_time_key="time", t0offset=522.0
+        )
+
+        orig_doc = self.db.wf_miniseed.find_one(
+            {"_id": ObjectId("62812b08178bf05fe5787d82")}
+        )
+        orig_ts = self.db.read_data(orig_doc, collection="wf_miniseed")
+
+        #   get document for TimeSeries
+        ts_1 = TimeSeries(orig_ts)
+        doc1 = dict(ts_1)
+        doc2 = dict(ts_1)
+
+        retdoc = cached_matcher.find_doc(doc1)
+        # Failed find returns a none in component 0 so catch that
+        assert retdoc
+        assert isinstance(retdoc,dict)
+
+        # test failure with unmatched time - should silenetly return None
+        doc2["time"] = 99999.99
+        retdoc = cached_matcher.find_doc(doc2)
+        assert retdoc is None
+
 
     def test_OriginTimeMatcher_normalize(self):
         # t0offset value needed to work with test data set.  See above