datalad · candleindark · Feb 28, 2024 · Feb 28, 2024 · Feb 28, 2024 · Feb 28, 2024
diff --git a/datalad_registry/tests/test_overview.py b/datalad_registry/tests/test_overview.py
@@ -165,11 +165,11 @@ def test_sorting(
             ("datalad AND handbook", ["https://handbook.datalad.org"]),
         ],
     )
-    def test_search_query(
+    def test_search_with_valid_query(
         self, search_query: Optional[str], expected_results: list[str], flask_client
     ):
         """
-        Test for the filtering of dataset URLs in the overview page
+        Test searching with a valid query
         """
 
         resp = flask_client.get("/overview/", query_string={"query": search_query})
@@ -187,9 +187,9 @@ def test_search_query(
             "unknown_field:example",
         ],
     )
-    def test_search_query_error(self, search_query: Optional[str], flask_client):
+    def test_search_with_invalid_query(self, search_query: Optional[str], flask_client):
         """
-        Test for the filtering of dataset URLs in the overview page
+        Test searching with an invalid query
         """
 
         resp = flask_client.get("/overview/", query_string={"query": search_query})

diff --git a/datalad_registry/tests/test_search.py b/datalad_registry/tests/test_search.py
@@ -7,24 +7,6 @@
 from ..search import parse_query
 
 
-@pytest.mark.parametrize(
-    "query, exc, err",
-    [
-        ("unknown_field:example", ValueError, None),
-        # Lark masks exceptions. We did not provide dedicated ones for all
-        # of them, but let's test that error message as expected
-        ("ds_id:=example", VisitError, "Operation = is not implemented"),
-        # r'(haxby or halchenko) AND metadata:BIDSmetadata[bids_dataset,metalad_core]:'
-        # r'"BIDSVersion\": \"v"',
-    ],
-)
-def test_search_errors(query, exc, err):
-    with pytest.raises(exc) as ce:
-        parse_query(query)
-    if err:
-        assert err in str(ce.value)
-
-
 @pytest.fixture
 def populate_with_url_metadata_for_search(
     populate_with_dataset_urls,  # noqa: U100 (unused argument)
@@ -69,72 +51,112 @@ def populate_with_url_metadata_for_search(
         db.session.commit()
 
 
-@pytest.mark.usefixtures("populate_with_url_metadata_for_search")
-@pytest.mark.parametrize(
-    "query, expected",
-    [
-        # based purely on url field
-        ("example", [1]),
-        ("example OR handbook", [1, 3]),
-        # case insensitive
-        ("Example OR handBook", [1, 3]),
-        ("example AND handbook", []),
-        ("example handbook", []),  # implicit AND
-        ("datalad OR handbook", [2, 3]),
-        ("datalad AND handbook", [3]),
-        ("datalad", [2, 3]),
-        ("handbook", [3]),
-        ("NOT url:handbook", [1, 2, 4]),
-        ("NOT metadata:handbook", [1, 2, 3, 4]),
-        ("datalad AND NOT url:handbook", [2]),
-        ("datalad AND (NOT url:handbook)", [2]),
-        ("datalad (NOT url:handbook)", [2]),  # implicit AND
-        ("datalad AND NOT metadata:handbook", [2, 3]),
-        # we get empty result ATM which fails the test. TODO - figure it out/fix!
-        ("NOT handbook", [1, 2, 4]),
-        ("datalad AND (NOT handbook)", [2]),
-        ("handbook datalad", [3]),  # should be identical result to above AND
-        ("handbook url:datalad", [3]),
-        ("handbook url:?datalad", [3]),  # identical to above
-        ("handbook ds_id:datalad", []),
-        ("handbook OR ds_id:datalad", [3]),
-        ("url:handbook OR ds_id:datalad", [3]),
-        ("url:handbook OR ds_id:844c", [1, 2, 3]),
-        ("(url:handbook OR metadata[metalad_core]:meta1value) AND ds_id:844c", [2, 3]),
-        (
-            "(url:?handbook OR metadata[metalad_core]:?meta1value) AND ds_id:?844c",
-            [2, 3],
-        ),
-        ("(url:handbook OR metadata[metalad_core]:meta3value) AND ds_id:844C", [1, 3]),
-        ("(url:handbook OR metadata[metalad_core]:value) AND ds_id:844c", [1, 2, 3]),
-        (
-            "(url:handbook OR metadata[metalad_core]:value) ds_id:844c",
-            [1, 2, 3],
-        ),  # implicit AND
-        ("meta2value", [2]),
-        # search within a JSON record. Space between key and value would matter
-        (r'metadata:"meta1\": \"meta1value\""', [2]),
-        ('metadata:"value"', [1, 2]),
-        ('metadata[metalad_studyminimeta]:"value"', []),
-        # ATM only exact match for extractor
-        ('metadata[metalad]:"value"', []),
-        ('metadata[metalad_core]:"value"', [1, 2]),
-        # OR among multiple listed, ok to have unknown
-        ('metadata[metalad_core,metalad_studyminimeta,unknown]:"value"', [1, 2]),
-        # Prototypical query for which we do not have full support yet, e.g.
-        # regex matching :~
-        #  (r"""((jim AND NOT haxby AND "important\" paper") OR ds_id:~"^000[3-9]..$"
-        #   OR url:"example.com") AND metadata:non AND metadata[ex1,ex2]:"specific data"
-        #   AND metadata[extractor2]:data""", []),
-        # Find datasets with the last
-        #  (metadata[bids_dataset][Authors][-1]:haxby ...)
-    ],
-)
-def test_search_cases(flask_app, query, expected):
-    r = parse_query(query)
-    # print(f"QUERY {query}: {r}")
-    with flask_app.app_context():
-        result = db.session.execute(select(RepoUrl).filter(r))
-        hits = [_.id for _ in result.scalars().all()]
-        # print(expected, hits)
-        assert hits == expected
+class TestSearch:
+    """
+    Tests for the search functionality
+    """
+
+    @pytest.mark.parametrize(
+        "query, exc, err",
+        [
+            ("unknown_field:example", ValueError, None),
+            # Lark masks exceptions. We did not provide dedicated ones for all
+            # of them, but let's test that error message as expected
+            ("ds_id:=example", VisitError, "Operation = is not implemented"),
+            # r'(haxby or halchenko) AND '
+            # r'metadata:BIDSmetadata[bids_dataset,metalad_core]:'
+            # r'"BIDSVersion\": \"v"',
+        ],
+    )
+    def test_with_invalid_query(self, query, exc, err):
+        """
+        Test the search functionality in handling invalid queries
+        """
+        with pytest.raises(exc) as ce:
+            parse_query(query)
+        if err:
+            assert err in str(ce.value)
+
+    @pytest.mark.usefixtures("populate_with_url_metadata_for_search")
+    @pytest.mark.parametrize(
+        "query, expected",
+        [
+            # based purely on url field
+            ("example", [1]),
+            ("example OR handbook", [1, 3]),
+            # case insensitive
+            ("Example OR handBook", [1, 3]),
+            ("example AND handbook", []),
+            ("example handbook", []),  # implicit AND
+            ("datalad OR handbook", [2, 3]),
+            ("datalad AND handbook", [3]),
+            ("datalad", [2, 3]),
+            ("handbook", [3]),
+            ("NOT url:handbook", [1, 2, 4]),
+            ("NOT metadata:handbook", [1, 2, 3, 4]),
+            ("datalad AND NOT url:handbook", [2]),
+            ("datalad AND (NOT url:handbook)", [2]),
+            ("datalad (NOT url:handbook)", [2]),  # implicit AND
+            ("datalad AND NOT metadata:handbook", [2, 3]),
+            # we get empty result ATM which fails the test. TODO - figure it out/fix!
+            ("NOT handbook", [1, 2, 4]),
+            ("datalad AND (NOT handbook)", [2]),
+            ("handbook datalad", [3]),  # should be identical result to above AND
+            ("handbook url:datalad", [3]),
+            ("handbook url:?datalad", [3]),  # identical to above
+            ("handbook ds_id:datalad", []),
+            ("handbook OR ds_id:datalad", [3]),
+            ("url:handbook OR ds_id:datalad", [3]),
+            ("url:handbook OR ds_id:844c", [1, 2, 3]),
+            (
+                "(url:handbook OR metadata[metalad_core]:meta1value) AND ds_id:844c",
+                [2, 3],
+            ),
+            (
+                "(url:?handbook OR metadata[metalad_core]:?meta1value) AND ds_id:?844c",
+                [2, 3],
+            ),
+            (
+                "(url:handbook OR metadata[metalad_core]:meta3value) AND ds_id:844C",
+                [1, 3],
+            ),
+            (
+                "(url:handbook OR metadata[metalad_core]:value) AND ds_id:844c",
+                [1, 2, 3],
+            ),
+            (
+                "(url:handbook OR metadata[metalad_core]:value) ds_id:844c",
+                [1, 2, 3],
+            ),  # implicit AND
+            ("meta2value", [2]),
+            # search within a JSON record. Space between key and value would matter
+            (r'metadata:"meta1\": \"meta1value\""', [2]),
+            ('metadata:"value"', [1, 2]),
+            ('metadata[metalad_studyminimeta]:"value"', []),
+            # ATM only exact match for extractor
+            ('metadata[metalad]:"value"', []),
+            ('metadata[metalad_core]:"value"', [1, 2]),
+            # OR among multiple listed, ok to have unknown
+            ('metadata[metalad_core,metalad_studyminimeta,unknown]:"value"', [1, 2]),
+            # Prototypical query for which we do not have full support yet, e.g.
+            # regex matching :~
+            #  (r"""((jim AND NOT haxby AND "important\" paper") OR
+            #   ds_id:~"^000[3-9]..$"
+            #   OR url:"example.com") AND metadata:non
+            #   AND metadata[ex1,ex2]:"specific data"
+            #   AND metadata[extractor2]:data""", []),
+            # Find datasets with the last
+            #  (metadata[bids_dataset][Authors][-1]:haxby ...)
+        ],
+    )
+    def test_with_valid_query(self, flask_app, query, expected):
+        """
+        Test the search functionality in handling valid queries
+        """
+        r = parse_query(query)
+        # print(f"QUERY {query}: {r}")
+        with flask_app.app_context():
+            result = db.session.execute(select(RepoUrl).filter(r))
+            hits = [_.id for _ in result.scalars().all()]
+            # print(expected, hits)
+            assert hits == expected