From 2d579d29e7769a7e65294fb706767e6081c31ec2 Mon Sep 17 00:00:00 2001 From: Biyeun Buczyk Date: Thu, 30 Jan 2025 19:34:10 +0100 Subject: [PATCH 1/3] extract case id sort block for case search this will be used in other areas to maintain consistency with case list explorer report sorting --- corehq/apps/case_search/utils.py | 11 +++++++++++ .../apps/reports/standard/cases/case_list_explorer.py | 10 ++-------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/corehq/apps/case_search/utils.py b/corehq/apps/case_search/utils.py index c2ec5f1076b6..a6bbbb2ad01d 100644 --- a/corehq/apps/case_search/utils.py +++ b/corehq/apps/case_search/utils.py @@ -535,3 +535,14 @@ def _get_case_search_cases(helper, case_ids): # Warning: '_tag_is_related_case' may cause the relevant user-defined properties to be overwritten. def _tag_is_related_case(case): case.case_json[IS_RELATED_CASE] = "true" + + +def get_case_id_sort_block(is_descending=False): + sort_order = 'desc' if is_descending else 'asc' + return [{ + 'case_properties.value.exact': { + 'order': sort_order, + 'nested_path': 'case_properties', + 'nested_filter': {'term': {"case_properties.key.exact": "@case_id"}}, + } + }] diff --git a/corehq/apps/reports/standard/cases/case_list_explorer.py b/corehq/apps/reports/standard/cases/case_list_explorer.py index dee2803f4544..f0f104734d44 100644 --- a/corehq/apps/reports/standard/cases/case_list_explorer.py +++ b/corehq/apps/reports/standard/cases/case_list_explorer.py @@ -10,6 +10,7 @@ DOCS_LINK_CASE_LIST_EXPLORER, ) from corehq.apps.case_search.exceptions import CaseFilterError +from corehq.apps.case_search.utils import get_case_id_sort_block from corehq.apps.es.case_search import CaseSearchES, wrap_case_search_hit from corehq.apps.locations.permissions import location_safe from corehq.apps.reports.datatables import DataTablesColumn, DataTablesHeader @@ -110,14 +111,7 @@ def _populate_sort(self, query, sort): if meta_property.key == '@case_id': # This condition is added because ES 5 does not allow sorting on _id. # When we will have case_id in root of the document, this should be removed. - sort_order = 'desc' if descending else 'asc' - query.es_query['sort'] = [{ - 'case_properties.value.exact': { - 'order': sort_order, - 'nested_path': 'case_properties', - 'nested_filter': {'term': {"case_properties.key.exact": "@case_id"}}, - } - }] + query.es_query['sort'] = get_case_id_sort_block(descending) return query query = query.sort(meta_property.es_field_name, desc=descending) except KeyError: From 467dd017f3b2ef645bad9c04f4eb1ee8278adea7 Mon Sep 17 00:00:00 2001 From: Biyeun Buczyk Date: Thu, 30 Jan 2025 20:08:05 +0100 Subject: [PATCH 2/3] add support for working with elasticsearch queries in django_tables2 this ElasticRecord will support only CaseSearchES for now, as that's what we will need in the near future for data cleaning cases --- .../hqwebapp/tables/elasticsearch/__init__.py | 0 .../hqwebapp/tables/elasticsearch/records.py | 89 ++++++++ .../hqwebapp/tables/elasticsearch/tables.py | 197 ++++++++++++++++++ 3 files changed, 286 insertions(+) create mode 100644 corehq/apps/hqwebapp/tables/elasticsearch/__init__.py create mode 100644 corehq/apps/hqwebapp/tables/elasticsearch/records.py create mode 100644 corehq/apps/hqwebapp/tables/elasticsearch/tables.py diff --git a/corehq/apps/hqwebapp/tables/elasticsearch/__init__.py b/corehq/apps/hqwebapp/tables/elasticsearch/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/corehq/apps/hqwebapp/tables/elasticsearch/records.py b/corehq/apps/hqwebapp/tables/elasticsearch/records.py new file mode 100644 index 000000000000..00eb76f60e68 --- /dev/null +++ b/corehq/apps/hqwebapp/tables/elasticsearch/records.py @@ -0,0 +1,89 @@ +from abc import ABC, abstractmethod + +from django.utils.translation import gettext_lazy + +from corehq.apps.case_search.const import INDEXED_METADATA_BY_KEY +from corehq.apps.case_search.utils import get_case_id_sort_block +from corehq.apps.es.case_search import wrap_case_search_hit +from corehq.apps.reports.standard.cases.data_sources import SafeCaseDisplay +from corehq.util.timezones.utils import get_timezone + + +class BaseElasticRecord(ABC): + + def __init__(self, record, request): + self.record = record + self.request = request + + def __repr__(self): + return f"{self.__class__.__name__}(record={str(self.record)})" + + def __getitem__(self, item): + raise NotImplementedError("please implement __getitem__") + + @property + @abstractmethod + def verbose_name(self): + """ + The full (singular) human-friendly name for the data. + + :return: string + """ + + @property + @abstractmethod + def verbose_name_plural(self): + """ + The full (plural) human-friendly name for the data. + + :return: string + """ + + @staticmethod + def get_sorted_query(query, accessors): + """ + This should return a sorted version of the query passed into it based on + the sorting specified by `accessors`. + + Arguments: + query (ESQuery subclass): + the query we want to apply sorting to + + accessors (list of `OrderBy` instances from django_tables2): + iterate through the list to an `accessor` (`OrderBy`) with the + following relevant properties: + `accessor.bare`: the accessor as defined by the table column + `accessor.is_descending`: (boolean) True if the sort order is descending + :return: an instance of ESQuery (same subclass as the query passed in) + """ + raise NotImplementedError("please implement `get_sorted_query`") + + +class CaseSearchElasticRecord(BaseElasticRecord): + verbose_name = gettext_lazy("case") + verbose_name_plural = gettext_lazy("cases") + + def __init__(self, record, request): + record = SafeCaseDisplay( + wrap_case_search_hit(record), + get_timezone(request, request.domain) + ) + super().__init__(record, request) + + def __getitem__(self, item): + return self.record.get(item) + + @staticmethod + def get_sorted_query(query, accessors): + for accessor in accessors: + try: + meta_property = INDEXED_METADATA_BY_KEY[accessor.bare] + if meta_property.key == '@case_id': + # This condition is added because ES 5 does not allow sorting on _id. + # When we will have case_id in root of the document, this should be removed. + query.es_query['sort'] = get_case_id_sort_block(accessor.is_descending) + return query + query = query.sort(meta_property.es_field_name, desc=accessor.is_descending) + except KeyError: + query = query.sort_by_case_property(accessor.bare, desc=accessor.is_descending) + return query diff --git a/corehq/apps/hqwebapp/tables/elasticsearch/tables.py b/corehq/apps/hqwebapp/tables/elasticsearch/tables.py new file mode 100644 index 000000000000..c85032a585ef --- /dev/null +++ b/corehq/apps/hqwebapp/tables/elasticsearch/tables.py @@ -0,0 +1,197 @@ +from memoized import memoized + +from django_tables2 import tables +from django_tables2.data import TableData +from django_tables2.utils import OrderBy + +from corehq.apps.es import ESQuery +from corehq.util.es.elasticsearch import TransportError +from corehq.apps.es.exceptions import ESError +from corehq.apps.reports.exceptions import BadRequestError + +from corehq.apps.hqwebapp.tables.elasticsearch.records import BaseElasticRecord + + +class ElasticTable(tables.Table): + """ + To use elasticsearch queries with `django_tables2`, please + make sure the Table passed to your TableView inherits from this class. + + You can then return an unsorted and un-paginated ESQuery in your TableView's + `get_queryset()` method. Please make sure that the subclass of `ESQuery` is + compatible with the subclass of `BaseElasticRecord` assigned to + `record_class` below. For instance, `CaseSearchElasticRecord` is compatible + with `CaseSearchES`. + + Below is an example setup for a `django_tables2` paginated TableView with + `ElasticTable`: + + The table: + + class MyCaseTable(ElasticTable): + record_class = CaseSearchElasticRecord + + name = columns.Column( + verbose_name=gettext_lazy("Case Name"), + ) + status = columns.Column( + accessor="@status", + verbose_name=gettext_lazy("Status"), + ) + some_property = columns.Column( + verbose_name=gettext_lazy("Some Property"), + ) + + ~ If you want to use this table with HTMX (recommended) the class will look like: + + class MyCaseTable(BaseHtmxTable, ElasticTable): + record_class = CaseSearchElasticRecord + + class Meta(BaseHtmxTable.Meta): + pass + + ... # rest of the column configuration + + + The TableView: + + class MyCaseTableView(LoginAndDomainMixin, DomainViewMixin, SelectablePaginatedTableView): + urlname = "..." + table_class = MyCaseTable + + def get_queryset(self): + return CaseSearchES().domain(self.domain) + + If using HTMX, you can follow the HTMX table example in the styleguide for the host view. + """ + record_class = BaseElasticRecord + + def __init__(self, **kwargs): + data = kwargs.pop('data') + if not ElasticTableData.validate(data): + raise ValueError( + "Please ensure that `data` is a subclass of ESQuery. " + "Otherwise, do not inherit from ElasticTable." + ) + data = ElasticTableData(data) + super().__init__(data=data, **kwargs) + + +class ElasticTableData(TableData): + """ + This is a `django_tables2` `TableData` container for processing an + elasticsearch query with the `ElasticTable` class above. + + You will likely not instantiate this class directly elsewhere. Please see + the `ElasticTable` class for how to use elasticsearch queries with + `django_tables2`. + """ + + def __init__(self, query): + self.query = query + self._length = None + super().__init__([]) # init sets self.data to this value + + def __getitem__(self, key): + if isinstance(key, slice): + return self._get_records(key.start, key.stop) + return self._get_records(key, key + 1)[0] + + def _get_records(self, start, stop): + if stop < start: + raise ValueError("'stop' must be greater than 'start'") + size = stop - start + page_query = self.query.start(start).size(size) + results = self._get_es_results(page_query) + self.data = [self.table.record_class(record, self.table.request) + for record in results['hits'].get('hits', [])] + return self.data + + @staticmethod + def validate(data): + """ + Validates `data` (from `ElasticTable`) for use in this container. + """ + return isinstance(data, ESQuery) + + def set_table(self, table): + """ + This is called from the __init__ method of `django_tables2`'s `Table` class. + """ + if not isinstance(table, ElasticTable): + raise ValueError("The table should be an instance of ElasticTable.") + if not issubclass(table.record_class, BaseElasticRecord): + raise ValueError("table.record_class should be a subclass of BaseElasticRecord.") + super().set_table(table) + + def __len__(self): + if self._length is None: + self._length = self._total_records + return self._length + + @property + @memoized + def _total_records(self): + res = self._get_es_results(self.query.size(0)) + if res is not None: + return res['hits'].get('total', 0) + else: + return 0 + + @staticmethod + def _get_es_results(query): + try: + return query.run().raw + except ESError as e: + original_exception = e.args[0] + if isinstance(original_exception, TransportError): + if hasattr(original_exception.info, "get"): + if original_exception.info.get('status') == 400: + raise BadRequestError() + raise e + + def order_by(self, aliases): + """ + Order the data based on OrderBy aliases (prefixed column names) in the + table. We first convert the aliases to the column's "accessors" (where appropriate) + and then pass the list of OrderBy(accessor) values to the ElasticTable's `record_class` + to sort the query. + + We let the ElasticTable's `record_class` take care of sorting the query because + each elasticsearch index will have a different approach to sorting, + depending on whether we are dealing with ESCase, FormES, etc. + + Arguments: + aliases (an `OrderByTuple` instance from `django_tables2`): + optionally prefixed names of columns ('-' indicates descending order) + in order of significance with regard to data ordering. + """ + accessors = [] + for alias in aliases: + bound_column = self.table.columns[OrderBy(alias).bare] + + # bound_column.order_by reflects the current ordering applied to + # the table. As such we need to check the current ordering on the + # column and use the opposite if it doesn't match the alias prefix. + if alias[0] != bound_column.order_by_alias[0]: + accessors += bound_column.order_by.opposite + else: + accessors += bound_column.order_by + + self.query = self.table.record_class.get_sorted_query(self.query, accessors) + + @property + @memoized + def verbose_name(self): + """ + The full (singular) name for the data. + """ + return self.table.record_class.verbose_name + + @property + @memoized + def verbose_name_plural(self): + """ + The full (plural) name for the data. + """ + return self.table.record_class.verbose_name_plural From 776fcf70fd86b2cc35407a116fd704638a4c4a7e Mon Sep 17 00:00:00 2001 From: Biyeun Buczyk Date: Thu, 30 Jan 2025 20:10:07 +0100 Subject: [PATCH 3/3] add tests for ElasticTable and CaseSearchElasticRecord --- corehq/apps/hqwebapp/tests/tables/__init__.py | 0 .../apps/hqwebapp/tests/tables/generator.py | 39 ++++++ .../tests/tables/test_elastic_records.py | 128 ++++++++++++++++++ .../tests/tables/test_elastic_tables.py | 114 ++++++++++++++++ 4 files changed, 281 insertions(+) create mode 100644 corehq/apps/hqwebapp/tests/tables/__init__.py create mode 100644 corehq/apps/hqwebapp/tests/tables/generator.py create mode 100644 corehq/apps/hqwebapp/tests/tables/test_elastic_records.py create mode 100644 corehq/apps/hqwebapp/tests/tables/test_elastic_tables.py diff --git a/corehq/apps/hqwebapp/tests/tables/__init__.py b/corehq/apps/hqwebapp/tests/tables/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/corehq/apps/hqwebapp/tests/tables/generator.py b/corehq/apps/hqwebapp/tests/tables/generator.py new file mode 100644 index 000000000000..19aa02c7a356 --- /dev/null +++ b/corehq/apps/hqwebapp/tests/tables/generator.py @@ -0,0 +1,39 @@ +import datetime +import uuid + +from casexml.apps.case.mock import CaseBlock + +from corehq.util.test_utils import unit_testing_only + + +@unit_testing_only +def get_case_blocks(): + case_blocks = [] + + date_opened = datetime.datetime(2022, 2, 17, 12) + for name, properties in [ + ("Luna Zenzi", {"color": "red", "domain": "user input"}), + ("Salman Srinivas", {"color": "purple", "domain": "user input"}), + ("Stella Jonathon", {"color": "red", "domain": "user input"}), + ("Arundhati Eddy", {"color": "green", "domain": "user input"}), + ("Katherine Rebecca", {"color": "orange", "domain": "user input"}), + ("Trish Hartmann", {"color": "blue", "domain": "user input"}), + ("Karan Jonathon", {"color": "purple", "domain": "user input"}), + ("Olivia Elicia", {"color": "yellow", "domain": "user input"}), + ("Stella Coba", {"color": "periwinkle", "domain": "user input"}), + ("Santiago Edwena", {"color": "salmon", "domain": "user input"}), + ("Olivia Joeri", {"color": "purple", "domain": "user input"}), + ]: + case_blocks.append(CaseBlock( + case_id=str(uuid.uuid4()), + case_type='person', + case_name=name, + owner_id='person_owner', + date_opened=date_opened, + create=True, + update=properties, + )) + date_opened += datetime.timedelta(days=1) + + case_blocks[-1].close = True # close Olivia Joeri + return case_blocks diff --git a/corehq/apps/hqwebapp/tests/tables/test_elastic_records.py b/corehq/apps/hqwebapp/tests/tables/test_elastic_records.py new file mode 100644 index 000000000000..5c63db5d4c77 --- /dev/null +++ b/corehq/apps/hqwebapp/tests/tables/test_elastic_records.py @@ -0,0 +1,128 @@ +from django.test import TestCase, RequestFactory +from django_tables2.utils import OrderBy + +from corehq.apps.case_search.utils import get_case_id_sort_block +from corehq.apps.data_dictionary.models import CaseType +from corehq.apps.es.tests.utils import ( + case_search_es_setup, + es_test, +) +from corehq.apps.es import CaseSearchES +from corehq.apps.es.case_search import case_search_adapter +from corehq.apps.hqwebapp.tables.elasticsearch.records import CaseSearchElasticRecord +from corehq.apps.hqwebapp.tests.tables.generator import get_case_blocks +from corehq.form_processor.tests.utils import FormProcessorTestUtils + + +@es_test(requires=[case_search_adapter], setup_class=True) +class CaseSearchElasticRecordTests(TestCase): + domain = 'test-cs-table-data' + + @classmethod + def setUpClass(cls): + super().setUpClass() + case_search_es_setup(cls.domain, get_case_blocks()) + cls.case_type_obj = CaseType(domain=cls.domain, name='person') + cls.case_type_obj.save() + + cls.request = RequestFactory().get('/cases/') + cls.request.domain = None # so that get_timezone doesn't do a complex lookup + + @classmethod + def tearDownClass(cls): + FormProcessorTestUtils.delete_all_cases() + cls.case_type_obj.delete() + super().tearDownClass() + + def test_sorting_by_name(self): + query = CaseSearchES().domain(self.domain) + accessors = [OrderBy('-name')] + query = CaseSearchElasticRecord.get_sorted_query(query, accessors) + expected = [{ + 'name.exact': {'order': 'desc'}, + }] + self.assertEqual(query.es_query['sort'], expected) + + def test_sorting_by_case_id(self): + query = CaseSearchES().domain(self.domain) + accessors = [OrderBy('@case_id')] + query = CaseSearchElasticRecord.get_sorted_query(query, accessors) + expected = get_case_id_sort_block(is_descending=False) + self.assertEqual(query.es_query['sort'], expected) + + def test_sorting_by_case_property(self): + query = CaseSearchES().domain(self.domain) + aliases = [OrderBy('-description')] + query = CaseSearchElasticRecord.get_sorted_query(query, aliases) + expected = [ + { + 'case_properties.value.numeric': { + 'order': 'desc', + 'nested_path': 'case_properties', + 'nested_filter': { + 'term': { + 'case_properties.key.exact': 'description' + } + }, + 'missing': None + } + }, + { + 'case_properties.value.date': { + 'order': 'desc', + 'nested_path': 'case_properties', + 'nested_filter': { + 'term': { + 'case_properties.key.exact': 'description' + } + }, + 'missing': None + } + }, + { + 'case_properties.value.exact': { + 'order': 'desc', + 'nested_path': 'case_properties', + 'nested_filter': { + 'term': { + 'case_properties.key.exact': 'description' + } + }, + 'missing': None + } + } + ] + self.assertEqual(query.es_query['sort'], expected) + + @staticmethod + def _get_hit_from_query(query, index): + return query.start(index).size(1).run().raw['hits']['hits'][0] + + def test_get_reserved_property(self): + """ + A case has `domain` as an actual property or a case property, if supplied. + This "case property" is always user-inputted and is unrelated to the domain + the case belongs to. We want to make sure the "case property" is the value returned. + """ + query = CaseSearchES().domain(self.domain) + case_hit = self._get_hit_from_query(query, 2) + record = CaseSearchElasticRecord(case_hit, self.request) + self.assertEqual(record['domain'], 'user input') + + def test_get_non_existing_property(self): + query = CaseSearchES().domain(self.domain) + case_hit = self._get_hit_from_query(query, 4) + record = CaseSearchElasticRecord(case_hit, self.request) + self.assertEqual(record['does_not_exist'], None) + + def test_get_property(self): + query = CaseSearchES().domain(self.domain) + case_hit = self._get_hit_from_query(query, 0) + record = CaseSearchElasticRecord(case_hit, self.request) + self.assertEqual(record['color'], 'red') + + def test_get_name(self): + query = CaseSearchES().domain(self.domain) + case_hit = self._get_hit_from_query(query, 5) + record = CaseSearchElasticRecord(case_hit, self.request) + self.assertEqual(record['name'], 'Trish Hartmann') diff --git a/corehq/apps/hqwebapp/tests/tables/test_elastic_tables.py b/corehq/apps/hqwebapp/tests/tables/test_elastic_tables.py new file mode 100644 index 000000000000..7c3aa6bf9b8d --- /dev/null +++ b/corehq/apps/hqwebapp/tests/tables/test_elastic_tables.py @@ -0,0 +1,114 @@ +from django.test import TestCase, RequestFactory +from django_tables2 import columns +from django_tables2.utils import OrderByTuple + +from corehq.apps.data_dictionary.models import CaseType +from corehq.apps.es.tests.utils import ( + case_search_es_setup, + es_test, +) +from corehq.apps.es import CaseSearchES +from corehq.apps.es.case_search import case_search_adapter +from corehq.apps.hqwebapp.tables.elasticsearch.records import CaseSearchElasticRecord +from corehq.apps.hqwebapp.tables.elasticsearch.tables import ElasticTable +from corehq.apps.hqwebapp.tests.tables.generator import get_case_blocks +from corehq.form_processor.tests.utils import FormProcessorTestUtils + + +@es_test(requires=[case_search_adapter], setup_class=True) +class ElasticTableTests(TestCase): + domain = 'test-cs-table-data' + + @classmethod + def setUpClass(cls): + super().setUpClass() + case_search_es_setup(cls.domain, get_case_blocks()) + cls.case_type_obj = CaseType(domain=cls.domain, name='person') + cls.case_type_obj.save() + + cls.request = RequestFactory().get('/cases/') + cls.request.domain = None # so that get_timezone doesn't do a complex lookup + + class TestCaseSearchElasticTable(ElasticTable): + record_class = CaseSearchElasticRecord + + name = columns.Column( + verbose_name="Case Name", + ) + case_type = columns.Column( + accessor="@case_type", + verbose_name="Case Type", + ) + status = columns.Column( + accessor="@status", + verbose_name="Status", + ) + opened_on = columns.Column( + verbose_name="Opened On", + ) + + cls.table_class = TestCaseSearchElasticTable + + @classmethod + def tearDownClass(cls): + FormProcessorTestUtils.delete_all_cases() + cls.case_type_obj.delete() + super().tearDownClass() + + def test_order_by_alias_to_accessor(self): + """ + We want to make sure that the alias for `status` is converted to + its accessor `@status` to generate the correct sorting block in the + CaseSearchES query when `order_by` is called on the `ElasticTableData` instance. + """ + query = CaseSearchES().domain(self.domain) + table = self.table_class(data=query, request=self.request) + aliases = OrderByTuple(['-status']) + table.data.order_by(aliases) + expected = [{'closed': {'order': 'desc'}}] + self.assertEqual(table.data.query.es_query['sort'], expected) + record = table.data[0] + self.assertEqual(record['@status'], 'closed') + self.assertEqual(record['name'], "Olivia Joeri") + + def test_slicing_and_data(self): + """ + We want to ensure that slicing ElasticTableData returns a list of + instances of that table's `record_class`. + """ + query = CaseSearchES().domain(self.domain) + table = self.table_class(data=query, request=self.request) + results = table.data[5:10] + self.assertEqual(len(results), 5) + self.assertEqual(type(results), list) + self.assertEqual(type(results[0]), self.table_class.record_class) + + def test_get_single_record_returns_object(self): + """ + We want to ensure that getting a single "key" from ElasticTableData + (as opposed to a "slice") returns a single instance that matches + of the related table's `record_class`. + """ + query = CaseSearchES().domain(self.domain) + table = self.table_class(data=query, request=self.request) + record = table.data[0] + self.assertEqual(type(record), self.table_class.record_class) + + def test_get_out_of_bounds_record_raises_index_error(self): + """ + We want to ensure that getting a "key" from ElasticTableData that + is out-of-bounds raises an `IndexError` + """ + query = CaseSearchES().domain(self.domain) + table = self.table_class(data=query, request=self.request) + with self.assertRaises(IndexError): + table.data[50] + + def test_data_length(self): + """ + We want to ensure that getting calling `len` of `ElasticTableData` + returns the expected total number of records in the query. + """ + query = CaseSearchES().domain(self.domain) + table = self.table_class(data=query, request=self.request) + self.assertEqual(len(table.data), 11)