Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for paginating elasticsearch queries for django_tables2 (with HTMX) #35712

Merged
merged 3 commits into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions corehq/apps/case_search/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,3 +535,14 @@ def _get_case_search_cases(helper, case_ids):
# Warning: '_tag_is_related_case' may cause the relevant user-defined properties to be overwritten.
def _tag_is_related_case(case):
case.case_json[IS_RELATED_CASE] = "true"


def get_case_id_sort_block(is_descending=False):
sort_order = 'desc' if is_descending else 'asc'
return [{
'case_properties.value.exact': {
'order': sort_order,
'nested_path': 'case_properties',
'nested_filter': {'term': {"case_properties.key.exact": "@case_id"}},
}
}]
Empty file.
89 changes: 89 additions & 0 deletions corehq/apps/hqwebapp/tables/elasticsearch/records.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from abc import ABC, abstractmethod

from django.utils.translation import gettext_lazy

from corehq.apps.case_search.const import INDEXED_METADATA_BY_KEY
from corehq.apps.case_search.utils import get_case_id_sort_block
from corehq.apps.es.case_search import wrap_case_search_hit
from corehq.apps.reports.standard.cases.data_sources import SafeCaseDisplay
from corehq.util.timezones.utils import get_timezone


class BaseElasticRecord(ABC):

def __init__(self, record, request):
self.record = record
self.request = request

def __repr__(self):
return f"{self.__class__.__name__}(record={str(self.record)})"

def __getitem__(self, item):
raise NotImplementedError("please implement __getitem__")

@property
@abstractmethod
def verbose_name(self):
"""
The full (singular) human-friendly name for the data.

:return: string
"""

@property
@abstractmethod
def verbose_name_plural(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps the default implementation could just add a 's' to the verbose_name.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'm following requirements of the base models from django_tables2, and that's not always the case with translated text that plurals end in s, in english or other languages

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Emphasis on default. But ok.

"""
The full (plural) human-friendly name for the data.

:return: string
"""

@staticmethod
def get_sorted_query(query, accessors):
"""
This should return a sorted version of the query passed into it based on
the sorting specified by `accessors`.

Arguments:
query (ESQuery subclass):
the query we want to apply sorting to

accessors (list of `OrderBy` instances from django_tables2):
iterate through the list to an `accessor` (`OrderBy`) with the
following relevant properties:
`accessor.bare`: the accessor as defined by the table column
`accessor.is_descending`: (boolean) True if the sort order is descending
:return: an instance of ESQuery (same subclass as the query passed in)
"""
raise NotImplementedError("please implement `get_sorted_query`")


class CaseSearchElasticRecord(BaseElasticRecord):
verbose_name = gettext_lazy("case")
verbose_name_plural = gettext_lazy("cases")

def __init__(self, record, request):
record = SafeCaseDisplay(
wrap_case_search_hit(record),
get_timezone(request, request.domain)
)
super().__init__(record, request)

def __getitem__(self, item):
return self.record.get(item)

@staticmethod
def get_sorted_query(query, accessors):
for accessor in accessors:
try:
meta_property = INDEXED_METADATA_BY_KEY[accessor.bare]
if meta_property.key == '@case_id':
# This condition is added because ES 5 does not allow sorting on _id.
# When we will have case_id in root of the document, this should be removed.
query.es_query['sort'] = get_case_id_sort_block(accessor.is_descending)
return query
query = query.sort(meta_property.es_field_name, desc=accessor.is_descending)
except KeyError:
query = query.sort_by_case_property(accessor.bare, desc=accessor.is_descending)
return query
197 changes: 197 additions & 0 deletions corehq/apps/hqwebapp/tables/elasticsearch/tables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
from memoized import memoized

from django_tables2 import tables
from django_tables2.data import TableData
from django_tables2.utils import OrderBy

from corehq.apps.es import ESQuery
from corehq.util.es.elasticsearch import TransportError
from corehq.apps.es.exceptions import ESError
from corehq.apps.reports.exceptions import BadRequestError

from corehq.apps.hqwebapp.tables.elasticsearch.records import BaseElasticRecord


class ElasticTable(tables.Table):
"""
To use elasticsearch queries with `django_tables2`, please
make sure the Table passed to your TableView inherits from this class.

You can then return an unsorted and un-paginated ESQuery in your TableView's
`get_queryset()` method. Please make sure that the subclass of `ESQuery` is
compatible with the subclass of `BaseElasticRecord` assigned to
`record_class` below. For instance, `CaseSearchElasticRecord` is compatible
with `CaseSearchES`.

Below is an example setup for a `django_tables2` paginated TableView with
`ElasticTable`:

The table:

class MyCaseTable(ElasticTable):
record_class = CaseSearchElasticRecord

name = columns.Column(
verbose_name=gettext_lazy("Case Name"),
)
status = columns.Column(
accessor="@status",
verbose_name=gettext_lazy("Status"),
)
some_property = columns.Column(
verbose_name=gettext_lazy("Some Property"),
)

~ If you want to use this table with HTMX (recommended) the class will look like:

class MyCaseTable(BaseHtmxTable, ElasticTable):
record_class = CaseSearchElasticRecord

class Meta(BaseHtmxTable.Meta):
pass

... # rest of the column configuration


The TableView:

class MyCaseTableView(LoginAndDomainMixin, DomainViewMixin, SelectablePaginatedTableView):
urlname = "..."
table_class = MyCaseTable

def get_queryset(self):
return CaseSearchES().domain(self.domain)

If using HTMX, you can follow the HTMX table example in the styleguide for the host view.
"""
record_class = BaseElasticRecord

def __init__(self, **kwargs):
data = kwargs.pop('data')
if not ElasticTableData.validate(data):
raise ValueError(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

"Please ensure that `data` is a subclass of ESQuery. "
"Otherwise, do not inherit from ElasticTable."
)
data = ElasticTableData(data)
super().__init__(data=data, **kwargs)


class ElasticTableData(TableData):
"""
This is a `django_tables2` `TableData` container for processing an
elasticsearch query with the `ElasticTable` class above.

You will likely not instantiate this class directly elsewhere. Please see
the `ElasticTable` class for how to use elasticsearch queries with
`django_tables2`.
"""

def __init__(self, query):
self.query = query
self._length = None
super().__init__([]) # init sets self.data to this value

def __getitem__(self, key):
if isinstance(key, slice):
return self._get_records(key.start, key.stop)
return self._get_records(key, key + 1)[0]

def _get_records(self, start, stop):
if stop < start:
raise ValueError("'stop' must be greater than 'start'")
size = stop - start
page_query = self.query.start(start).size(size)
results = self._get_es_results(page_query)
self.data = [self.table.record_class(record, self.table.request)
for record in results['hits'].get('hits', [])]
return self.data

@staticmethod
def validate(data):
"""
Validates `data` (from `ElasticTable`) for use in this container.
"""
return isinstance(data, ESQuery)

def set_table(self, table):
"""
This is called from the __init__ method of `django_tables2`'s `Table` class.
"""
if not isinstance(table, ElasticTable):
raise ValueError("The table should be an instance of ElasticTable.")
if not issubclass(table.record_class, BaseElasticRecord):
raise ValueError("table.record_class should be a subclass of BaseElasticRecord.")
super().set_table(table)

def __len__(self):
if self._length is None:
self._length = self._total_records
return self._length

@property
@memoized
def _total_records(self):
res = self._get_es_results(self.query.size(0))
if res is not None:
return res['hits'].get('total', 0)
else:
return 0

@staticmethod
def _get_es_results(query):
try:
return query.run().raw
except ESError as e:
original_exception = e.args[0]
if isinstance(original_exception, TransportError):
if hasattr(original_exception.info, "get"):
if original_exception.info.get('status') == 400:
raise BadRequestError()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does TransportError have any other data in it about what went wrong? I don't like the idea of erasing that and potentially only logging a BadRequestError, which does not give you much to work with when investigating an issue.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'm following the pattern we use with CaseListExplorer for fetching results

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That does not mean we cannot improve on it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's table this

raise e

def order_by(self, aliases):
"""
Order the data based on OrderBy aliases (prefixed column names) in the
table. We first convert the aliases to the column's "accessors" (where appropriate)
and then pass the list of OrderBy(accessor) values to the ElasticTable's `record_class`
to sort the query.

We let the ElasticTable's `record_class` take care of sorting the query because
each elasticsearch index will have a different approach to sorting,
depending on whether we are dealing with ESCase, FormES, etc.

Arguments:
aliases (an `OrderByTuple` instance from `django_tables2`):
optionally prefixed names of columns ('-' indicates descending order)
in order of significance with regard to data ordering.
"""
accessors = []
for alias in aliases:
bound_column = self.table.columns[OrderBy(alias).bare]

# bound_column.order_by reflects the current ordering applied to
# the table. As such we need to check the current ordering on the
# column and use the opposite if it doesn't match the alias prefix.
if alias[0] != bound_column.order_by_alias[0]:
accessors += bound_column.order_by.opposite
else:
accessors += bound_column.order_by

self.query = self.table.record_class.get_sorted_query(self.query, accessors)

@property
@memoized
def verbose_name(self):
"""
The full (singular) name for the data.
"""
return self.table.record_class.verbose_name

@property
@memoized
def verbose_name_plural(self):
"""
The full (plural) name for the data.
"""
return self.table.record_class.verbose_name_plural
Empty file.
39 changes: 39 additions & 0 deletions corehq/apps/hqwebapp/tests/tables/generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import datetime
import uuid

from casexml.apps.case.mock import CaseBlock

from corehq.util.test_utils import unit_testing_only


@unit_testing_only
def get_case_blocks():
case_blocks = []

date_opened = datetime.datetime(2022, 2, 17, 12)
for name, properties in [
("Luna Zenzi", {"color": "red", "domain": "user input"}),
("Salman Srinivas", {"color": "purple", "domain": "user input"}),
("Stella Jonathon", {"color": "red", "domain": "user input"}),
("Arundhati Eddy", {"color": "green", "domain": "user input"}),
("Katherine Rebecca", {"color": "orange", "domain": "user input"}),
("Trish Hartmann", {"color": "blue", "domain": "user input"}),
("Karan Jonathon", {"color": "purple", "domain": "user input"}),
("Olivia Elicia", {"color": "yellow", "domain": "user input"}),
("Stella Coba", {"color": "periwinkle", "domain": "user input"}),
("Santiago Edwena", {"color": "salmon", "domain": "user input"}),
("Olivia Joeri", {"color": "purple", "domain": "user input"}),
]:
case_blocks.append(CaseBlock(
case_id=str(uuid.uuid4()),
case_type='person',
case_name=name,
owner_id='person_owner',
date_opened=date_opened,
create=True,
update=properties,
))
date_opened += datetime.timedelta(days=1)

case_blocks[-1].close = True # close Olivia Joeri
return case_blocks
Loading
Loading