From f2ee385fc4aae170aa6237207742196d57e07be3 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Wed, 28 Feb 2024 10:46:01 -0800 Subject: [PATCH 1/6] Rename tests for the overview page These names are more informative. Additionally, doc strings for these tests are also updated. --- datalad_registry/tests/test_overview.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datalad_registry/tests/test_overview.py b/datalad_registry/tests/test_overview.py index b2d8dd92..70d42294 100644 --- a/datalad_registry/tests/test_overview.py +++ b/datalad_registry/tests/test_overview.py @@ -165,11 +165,11 @@ def test_sorting( ("datalad AND handbook", ["https://handbook.datalad.org"]), ], ) - def test_search_query( + def test_search_with_valid_query( self, search_query: Optional[str], expected_results: list[str], flask_client ): """ - Test for the filtering of dataset URLs in the overview page + Test searching with a valid query """ resp = flask_client.get("/overview/", query_string={"query": search_query}) @@ -187,9 +187,9 @@ def test_search_query( "unknown_field:example", ], ) - def test_search_query_error(self, search_query: Optional[str], flask_client): + def test_search_with_invalid_query(self, search_query: Optional[str], flask_client): """ - Test for the filtering of dataset URLs in the overview page + Test searching with an invalid query """ resp = flask_client.get("/overview/", query_string={"query": search_query}) From dfaa32748742ff065f11b7bb1a66608880b9411c Mon Sep 17 00:00:00 2001 From: Isaac To Date: Wed, 28 Feb 2024 11:03:50 -0800 Subject: [PATCH 2/6] Group tests for the search function into a class --- datalad_registry/tests/test_search.py | 186 ++++++++++++++------------ 1 file changed, 99 insertions(+), 87 deletions(-) diff --git a/datalad_registry/tests/test_search.py b/datalad_registry/tests/test_search.py index 6d29a1b1..bda3dda9 100644 --- a/datalad_registry/tests/test_search.py +++ b/datalad_registry/tests/test_search.py @@ -7,24 +7,6 @@ from ..search import parse_query -@pytest.mark.parametrize( - "query, exc, err", - [ - ("unknown_field:example", ValueError, None), - # Lark masks exceptions. We did not provide dedicated ones for all - # of them, but let's test that error message as expected - ("ds_id:=example", VisitError, "Operation = is not implemented"), - # r'(haxby or halchenko) AND metadata:BIDSmetadata[bids_dataset,metalad_core]:' - # r'"BIDSVersion\": \"v"', - ], -) -def test_search_errors(query, exc, err): - with pytest.raises(exc) as ce: - parse_query(query) - if err: - assert err in str(ce.value) - - @pytest.fixture def populate_with_url_metadata_for_search( populate_with_dataset_urls, # noqa: U100 (unused argument) @@ -69,72 +51,102 @@ def populate_with_url_metadata_for_search( db.session.commit() -@pytest.mark.usefixtures("populate_with_url_metadata_for_search") -@pytest.mark.parametrize( - "query, expected", - [ - # based purely on url field - ("example", [1]), - ("example OR handbook", [1, 3]), - # case insensitive - ("Example OR handBook", [1, 3]), - ("example AND handbook", []), - ("example handbook", []), # implicit AND - ("datalad OR handbook", [2, 3]), - ("datalad AND handbook", [3]), - ("datalad", [2, 3]), - ("handbook", [3]), - ("NOT url:handbook", [1, 2, 4]), - ("NOT metadata:handbook", [1, 2, 3, 4]), - ("datalad AND NOT url:handbook", [2]), - ("datalad AND (NOT url:handbook)", [2]), - ("datalad (NOT url:handbook)", [2]), # implicit AND - ("datalad AND NOT metadata:handbook", [2, 3]), - # we get empty result ATM which fails the test. TODO - figure it out/fix! - ("NOT handbook", [1, 2, 4]), - ("datalad AND (NOT handbook)", [2]), - ("handbook datalad", [3]), # should be identical result to above AND - ("handbook url:datalad", [3]), - ("handbook url:?datalad", [3]), # identical to above - ("handbook ds_id:datalad", []), - ("handbook OR ds_id:datalad", [3]), - ("url:handbook OR ds_id:datalad", [3]), - ("url:handbook OR ds_id:844c", [1, 2, 3]), - ("(url:handbook OR metadata[metalad_core]:meta1value) AND ds_id:844c", [2, 3]), - ( - "(url:?handbook OR metadata[metalad_core]:?meta1value) AND ds_id:?844c", - [2, 3], - ), - ("(url:handbook OR metadata[metalad_core]:meta3value) AND ds_id:844C", [1, 3]), - ("(url:handbook OR metadata[metalad_core]:value) AND ds_id:844c", [1, 2, 3]), - ( - "(url:handbook OR metadata[metalad_core]:value) ds_id:844c", - [1, 2, 3], - ), # implicit AND - ("meta2value", [2]), - # search within a JSON record. Space between key and value would matter - (r'metadata:"meta1\": \"meta1value\""', [2]), - ('metadata:"value"', [1, 2]), - ('metadata[metalad_studyminimeta]:"value"', []), - # ATM only exact match for extractor - ('metadata[metalad]:"value"', []), - ('metadata[metalad_core]:"value"', [1, 2]), - # OR among multiple listed, ok to have unknown - ('metadata[metalad_core,metalad_studyminimeta,unknown]:"value"', [1, 2]), - # Prototypical query for which we do not have full support yet, e.g. - # regex matching :~ - # (r"""((jim AND NOT haxby AND "important\" paper") OR ds_id:~"^000[3-9]..$" - # OR url:"example.com") AND metadata:non AND metadata[ex1,ex2]:"specific data" - # AND metadata[extractor2]:data""", []), - # Find datasets with the last - # (metadata[bids_dataset][Authors][-1]:haxby ...) - ], -) -def test_search_cases(flask_app, query, expected): - r = parse_query(query) - # print(f"QUERY {query}: {r}") - with flask_app.app_context(): - result = db.session.execute(select(RepoUrl).filter(r)) - hits = [_.id for _ in result.scalars().all()] - # print(expected, hits) - assert hits == expected +class TestSearch: + @pytest.mark.parametrize( + "query, exc, err", + [ + ("unknown_field:example", ValueError, None), + # Lark masks exceptions. We did not provide dedicated ones for all + # of them, but let's test that error message as expected + ("ds_id:=example", VisitError, "Operation = is not implemented"), + # r'(haxby or halchenko) AND ' + # r'metadata:BIDSmetadata[bids_dataset,metalad_core]:' + # r'"BIDSVersion\": \"v"', + ], + ) + def test_search_errors(self, query, exc, err): + with pytest.raises(exc) as ce: + parse_query(query) + if err: + assert err in str(ce.value) + + @pytest.mark.usefixtures("populate_with_url_metadata_for_search") + @pytest.mark.parametrize( + "query, expected", + [ + # based purely on url field + ("example", [1]), + ("example OR handbook", [1, 3]), + # case insensitive + ("Example OR handBook", [1, 3]), + ("example AND handbook", []), + ("example handbook", []), # implicit AND + ("datalad OR handbook", [2, 3]), + ("datalad AND handbook", [3]), + ("datalad", [2, 3]), + ("handbook", [3]), + ("NOT url:handbook", [1, 2, 4]), + ("NOT metadata:handbook", [1, 2, 3, 4]), + ("datalad AND NOT url:handbook", [2]), + ("datalad AND (NOT url:handbook)", [2]), + ("datalad (NOT url:handbook)", [2]), # implicit AND + ("datalad AND NOT metadata:handbook", [2, 3]), + # we get empty result ATM which fails the test. TODO - figure it out/fix! + ("NOT handbook", [1, 2, 4]), + ("datalad AND (NOT handbook)", [2]), + ("handbook datalad", [3]), # should be identical result to above AND + ("handbook url:datalad", [3]), + ("handbook url:?datalad", [3]), # identical to above + ("handbook ds_id:datalad", []), + ("handbook OR ds_id:datalad", [3]), + ("url:handbook OR ds_id:datalad", [3]), + ("url:handbook OR ds_id:844c", [1, 2, 3]), + ( + "(url:handbook OR metadata[metalad_core]:meta1value) AND ds_id:844c", + [2, 3], + ), + ( + "(url:?handbook OR metadata[metalad_core]:?meta1value) AND ds_id:?844c", + [2, 3], + ), + ( + "(url:handbook OR metadata[metalad_core]:meta3value) AND ds_id:844C", + [1, 3], + ), + ( + "(url:handbook OR metadata[metalad_core]:value) AND ds_id:844c", + [1, 2, 3], + ), + ( + "(url:handbook OR metadata[metalad_core]:value) ds_id:844c", + [1, 2, 3], + ), # implicit AND + ("meta2value", [2]), + # search within a JSON record. Space between key and value would matter + (r'metadata:"meta1\": \"meta1value\""', [2]), + ('metadata:"value"', [1, 2]), + ('metadata[metalad_studyminimeta]:"value"', []), + # ATM only exact match for extractor + ('metadata[metalad]:"value"', []), + ('metadata[metalad_core]:"value"', [1, 2]), + # OR among multiple listed, ok to have unknown + ('metadata[metalad_core,metalad_studyminimeta,unknown]:"value"', [1, 2]), + # Prototypical query for which we do not have full support yet, e.g. + # regex matching :~ + # (r"""((jim AND NOT haxby AND "important\" paper") OR + # ds_id:~"^000[3-9]..$" + # OR url:"example.com") AND metadata:non + # AND metadata[ex1,ex2]:"specific data" + # AND metadata[extractor2]:data""", []), + # Find datasets with the last + # (metadata[bids_dataset][Authors][-1]:haxby ...) + ], + ) + def test_search_cases(self, flask_app, query, expected): + r = parse_query(query) + # print(f"QUERY {query}: {r}") + with flask_app.app_context(): + result = db.session.execute(select(RepoUrl).filter(r)) + hits = [_.id for _ in result.scalars().all()] + # print(expected, hits) + assert hits == expected From 8e85071efe54b8e8f2a2519fb3cb7e98172dcc0c Mon Sep 17 00:00:00 2001 From: Isaac To Date: Wed, 28 Feb 2024 11:08:10 -0800 Subject: [PATCH 3/6] Rename the tests for the search function These names are appropriate now because of the context provided the containing class --- datalad_registry/tests/test_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datalad_registry/tests/test_search.py b/datalad_registry/tests/test_search.py index bda3dda9..97f7e065 100644 --- a/datalad_registry/tests/test_search.py +++ b/datalad_registry/tests/test_search.py @@ -64,7 +64,7 @@ class TestSearch: # r'"BIDSVersion\": \"v"', ], ) - def test_search_errors(self, query, exc, err): + def test_with_invalid_query(self, query, exc, err): with pytest.raises(exc) as ce: parse_query(query) if err: @@ -142,7 +142,7 @@ def test_search_errors(self, query, exc, err): # (metadata[bids_dataset][Authors][-1]:haxby ...) ], ) - def test_search_cases(self, flask_app, query, expected): + def test_with_valid_query(self, flask_app, query, expected): r = parse_query(query) # print(f"QUERY {query}: {r}") with flask_app.app_context(): From 130cb925ae8281256e1e78d560e772361ef5db95 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Wed, 28 Feb 2024 11:10:55 -0800 Subject: [PATCH 4/6] Provide doc strings for tests of search functionality --- datalad_registry/tests/test_search.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/datalad_registry/tests/test_search.py b/datalad_registry/tests/test_search.py index 97f7e065..3317e2e5 100644 --- a/datalad_registry/tests/test_search.py +++ b/datalad_registry/tests/test_search.py @@ -52,6 +52,10 @@ def populate_with_url_metadata_for_search( class TestSearch: + """ + Tests for the search functionality + """ + @pytest.mark.parametrize( "query, exc, err", [ @@ -65,6 +69,9 @@ class TestSearch: ], ) def test_with_invalid_query(self, query, exc, err): + """ + Test the search functionality in handling invalid queries + """ with pytest.raises(exc) as ce: parse_query(query) if err: @@ -143,6 +150,9 @@ def test_with_invalid_query(self, query, exc, err): ], ) def test_with_valid_query(self, flask_app, query, expected): + """ + Test the search functionality in handling valid queries + """ r = parse_query(query) # print(f"QUERY {query}: {r}") with flask_app.app_context(): From 9b4fb252e4eef76476e273f7bf2f411b79d1efb3 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Wed, 28 Feb 2024 12:32:53 -0800 Subject: [PATCH 5/6] Move tests for search out of a class The class is unneeded for all tests in the containing files are for testing the search functionality --- datalad_registry/tests/test_search.py | 206 +++++++++++++------------- 1 file changed, 101 insertions(+), 105 deletions(-) diff --git a/datalad_registry/tests/test_search.py b/datalad_registry/tests/test_search.py index 3317e2e5..ec7f8adf 100644 --- a/datalad_registry/tests/test_search.py +++ b/datalad_registry/tests/test_search.py @@ -51,112 +51,108 @@ def populate_with_url_metadata_for_search( db.session.commit() -class TestSearch: +@pytest.mark.parametrize( + "query, exc, err", + [ + ("unknown_field:example", ValueError, None), + # Lark masks exceptions. We did not provide dedicated ones for all + # of them, but let's test that error message as expected + ("ds_id:=example", VisitError, "Operation = is not implemented"), + # r'(haxby or halchenko) AND ' + # r'metadata:BIDSmetadata[bids_dataset,metalad_core]:' + # r'"BIDSVersion\": \"v"', + ], +) +def test_with_invalid_query(query, exc, err): """ - Tests for the search functionality + Test the search functionality in handling invalid queries """ + with pytest.raises(exc) as ce: + parse_query(query) + if err: + assert err in str(ce.value) - @pytest.mark.parametrize( - "query, exc, err", - [ - ("unknown_field:example", ValueError, None), - # Lark masks exceptions. We did not provide dedicated ones for all - # of them, but let's test that error message as expected - ("ds_id:=example", VisitError, "Operation = is not implemented"), - # r'(haxby or halchenko) AND ' - # r'metadata:BIDSmetadata[bids_dataset,metalad_core]:' - # r'"BIDSVersion\": \"v"', - ], - ) - def test_with_invalid_query(self, query, exc, err): - """ - Test the search functionality in handling invalid queries - """ - with pytest.raises(exc) as ce: - parse_query(query) - if err: - assert err in str(ce.value) - @pytest.mark.usefixtures("populate_with_url_metadata_for_search") - @pytest.mark.parametrize( - "query, expected", - [ - # based purely on url field - ("example", [1]), - ("example OR handbook", [1, 3]), - # case insensitive - ("Example OR handBook", [1, 3]), - ("example AND handbook", []), - ("example handbook", []), # implicit AND - ("datalad OR handbook", [2, 3]), - ("datalad AND handbook", [3]), - ("datalad", [2, 3]), - ("handbook", [3]), - ("NOT url:handbook", [1, 2, 4]), - ("NOT metadata:handbook", [1, 2, 3, 4]), - ("datalad AND NOT url:handbook", [2]), - ("datalad AND (NOT url:handbook)", [2]), - ("datalad (NOT url:handbook)", [2]), # implicit AND - ("datalad AND NOT metadata:handbook", [2, 3]), - # we get empty result ATM which fails the test. TODO - figure it out/fix! - ("NOT handbook", [1, 2, 4]), - ("datalad AND (NOT handbook)", [2]), - ("handbook datalad", [3]), # should be identical result to above AND - ("handbook url:datalad", [3]), - ("handbook url:?datalad", [3]), # identical to above - ("handbook ds_id:datalad", []), - ("handbook OR ds_id:datalad", [3]), - ("url:handbook OR ds_id:datalad", [3]), - ("url:handbook OR ds_id:844c", [1, 2, 3]), - ( - "(url:handbook OR metadata[metalad_core]:meta1value) AND ds_id:844c", - [2, 3], - ), - ( - "(url:?handbook OR metadata[metalad_core]:?meta1value) AND ds_id:?844c", - [2, 3], - ), - ( - "(url:handbook OR metadata[metalad_core]:meta3value) AND ds_id:844C", - [1, 3], - ), - ( - "(url:handbook OR metadata[metalad_core]:value) AND ds_id:844c", - [1, 2, 3], - ), - ( - "(url:handbook OR metadata[metalad_core]:value) ds_id:844c", - [1, 2, 3], - ), # implicit AND - ("meta2value", [2]), - # search within a JSON record. Space between key and value would matter - (r'metadata:"meta1\": \"meta1value\""', [2]), - ('metadata:"value"', [1, 2]), - ('metadata[metalad_studyminimeta]:"value"', []), - # ATM only exact match for extractor - ('metadata[metalad]:"value"', []), - ('metadata[metalad_core]:"value"', [1, 2]), - # OR among multiple listed, ok to have unknown - ('metadata[metalad_core,metalad_studyminimeta,unknown]:"value"', [1, 2]), - # Prototypical query for which we do not have full support yet, e.g. - # regex matching :~ - # (r"""((jim AND NOT haxby AND "important\" paper") OR - # ds_id:~"^000[3-9]..$" - # OR url:"example.com") AND metadata:non - # AND metadata[ex1,ex2]:"specific data" - # AND metadata[extractor2]:data""", []), - # Find datasets with the last - # (metadata[bids_dataset][Authors][-1]:haxby ...) - ], - ) - def test_with_valid_query(self, flask_app, query, expected): - """ - Test the search functionality in handling valid queries - """ - r = parse_query(query) - # print(f"QUERY {query}: {r}") - with flask_app.app_context(): - result = db.session.execute(select(RepoUrl).filter(r)) - hits = [_.id for _ in result.scalars().all()] - # print(expected, hits) - assert hits == expected +@pytest.mark.usefixtures("populate_with_url_metadata_for_search") +@pytest.mark.parametrize( + "query, expected", + [ + # based purely on url field + ("example", [1]), + ("example OR handbook", [1, 3]), + # case insensitive + ("Example OR handBook", [1, 3]), + ("example AND handbook", []), + ("example handbook", []), # implicit AND + ("datalad OR handbook", [2, 3]), + ("datalad AND handbook", [3]), + ("datalad", [2, 3]), + ("handbook", [3]), + ("NOT url:handbook", [1, 2, 4]), + ("NOT metadata:handbook", [1, 2, 3, 4]), + ("datalad AND NOT url:handbook", [2]), + ("datalad AND (NOT url:handbook)", [2]), + ("datalad (NOT url:handbook)", [2]), # implicit AND + ("datalad AND NOT metadata:handbook", [2, 3]), + # we get empty result ATM which fails the test. TODO - figure it out/fix! + ("NOT handbook", [1, 2, 4]), + ("datalad AND (NOT handbook)", [2]), + ("handbook datalad", [3]), # should be identical result to above AND + ("handbook url:datalad", [3]), + ("handbook url:?datalad", [3]), # identical to above + ("handbook ds_id:datalad", []), + ("handbook OR ds_id:datalad", [3]), + ("url:handbook OR ds_id:datalad", [3]), + ("url:handbook OR ds_id:844c", [1, 2, 3]), + ( + "(url:handbook OR metadata[metalad_core]:meta1value) AND ds_id:844c", + [2, 3], + ), + ( + "(url:?handbook OR metadata[metalad_core]:?meta1value) AND ds_id:?844c", + [2, 3], + ), + ( + "(url:handbook OR metadata[metalad_core]:meta3value) AND ds_id:844C", + [1, 3], + ), + ( + "(url:handbook OR metadata[metalad_core]:value) AND ds_id:844c", + [1, 2, 3], + ), + ( + "(url:handbook OR metadata[metalad_core]:value) ds_id:844c", + [1, 2, 3], + ), # implicit AND + ("meta2value", [2]), + # search within a JSON record. Space between key and value would matter + (r'metadata:"meta1\": \"meta1value\""', [2]), + ('metadata:"value"', [1, 2]), + ('metadata[metalad_studyminimeta]:"value"', []), + # ATM only exact match for extractor + ('metadata[metalad]:"value"', []), + ('metadata[metalad_core]:"value"', [1, 2]), + # OR among multiple listed, ok to have unknown + ('metadata[metalad_core,metalad_studyminimeta,unknown]:"value"', [1, 2]), + # Prototypical query for which we do not have full support yet, e.g. + # regex matching :~ + # (r"""((jim AND NOT haxby AND "important\" paper") OR + # ds_id:~"^000[3-9]..$" + # OR url:"example.com") AND metadata:non + # AND metadata[ex1,ex2]:"specific data" + # AND metadata[extractor2]:data""", []), + # Find datasets with the last + # (metadata[bids_dataset][Authors][-1]:haxby ...) + ], +) +def test_with_valid_query(flask_app, query, expected): + """ + Test the search functionality in handling valid queries + """ + r = parse_query(query) + # print(f"QUERY {query}: {r}") + with flask_app.app_context(): + result = db.session.execute(select(RepoUrl).filter(r)) + hits = [_.id for _ in result.scalars().all()] + # print(expected, hits) + assert hits == expected From 1bdd6a3c6a7fba35a2834a410bff87a8e8b9aef0 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Wed, 28 Feb 2024 12:44:45 -0800 Subject: [PATCH 6/6] Specify purpose of the `test_search.py` module in comment --- datalad_registry/tests/test_search.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datalad_registry/tests/test_search.py b/datalad_registry/tests/test_search.py index ec7f8adf..8a933ec2 100644 --- a/datalad_registry/tests/test_search.py +++ b/datalad_registry/tests/test_search.py @@ -1,3 +1,6 @@ +# This test module contains fixtures and tests for the search functionality +# enabled by the datalad_registry/search.py module. + from lark.exceptions import VisitError import pytest from sqlalchemy import select