Skip to content

Commit

Permalink
Fix solr indexing for multilanguage
Browse files Browse the repository at this point in the history
  • Loading branch information
blagojabozinovski committed Oct 30, 2024
1 parent 06cf635 commit 330022d
Showing 1 changed file with 33 additions and 3 deletions.
36 changes: 33 additions & 3 deletions ckanext/alisea/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,38 @@ def organization_facets(self, facet_dict, organization_type, package_type):
return OrderedDict(new_facets)

# IPackageController
def _before_index_dump_dicts(self, data_dict):
"""
Converts dict fields in the data dictionary to JSON strings.
This function is necessary to ensure that all fields in the data dictionary
can be indexed by Solr. Solr cannot directly index fields of type dict,
which can lead to errors such as "missing required field" even when the
field is present in the data dictionary. By converting dict fields to JSON
strings, we ensure that the data is in a format that Solr can handle.
This issue (https://github.com/ckan/ckan/issues/8423) has been observed in CKAN versions 2.10.4 and Solr 9, where
attempts to upload resources to the Datastore resulted in errors due to
the presence of dict fields in the data dictionary. The solution involves
transforming these fields into strings before indexing, as discussed in
the following issues:
- CKAN - Custom plugin/theme error datastore using fluent presets https://github.com/ckan/ckan/issues/7750
- Solr error: missing required field https://github.com/ckan/ckan/issues/7730
Args:
data_dict (dict): The data dictionary to be processed.
Returns:
dict: The processed data dictionary with dict fields as JSON strings.
"""
for key, value in data_dict.items():
if isinstance(value, dict):
data_dict[key] = json.dumps(value)
return data_dict


def before_dataset_index(self, data_dict):
data_dict['agroecology_category'] = json.loads(data_dict.get('agroecology_category', '[]'))
data_dict['agroecology_keyword'] = json.loads(data_dict.get('agroecology_keyword', '[]'))
data_dict['agroecology_category'] = json.loads(json.dumps(data_dict.get('agroecology_category', '[]')))
data_dict['agroecology_keyword'] = json.loads(json.dumps(data_dict.get('agroecology_keyword', '[]')))
data_dict = self._before_index_dump_dicts(data_dict)
return data_dict

0 comments on commit 330022d

Please sign in to comment.