Skip to content

Commit

Permalink
Merge pull request #35 from keitaroinc/solr-multilanguage
Browse files Browse the repository at this point in the history
Solr multilanguage
  • Loading branch information
blagojabozinovski authored Oct 31, 2024
2 parents 06cf635 + d3871fb commit 9ef94e1
Showing 1 changed file with 77 additions and 4 deletions.
81 changes: 77 additions & 4 deletions ckanext/alisea/plugin.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import ckan.plugins as plugins
import ckan.plugins.toolkit as toolkit
import json
from ckan.lib.plugins import DefaultTranslation
from collections import OrderedDict
from ckanext.alisea import helpers as h
import json
import ast
import logging

log = logging.getLogger(__name__)


class AliseaPlugin(plugins.SingletonPlugin, DefaultTranslation):
Expand Down Expand Up @@ -80,8 +84,77 @@ def organization_facets(self, facet_dict, organization_type, package_type):
return OrderedDict(new_facets)

# IPackageController
def before_dataset_index(self, data_dict):
data_dict['agroecology_category'] = json.loads(data_dict.get('agroecology_category', '[]'))
data_dict['agroecology_keyword'] = json.loads(data_dict.get('agroecology_keyword', '[]'))
def _before_index_dump_dicts(self, data_dict):
"""
Converts dict fields in the data dictionary to JSON strings.
This function is necessary to ensure that all fields in the data dictionary
can be indexed by Solr. Solr cannot directly index fields of type dict,
which can lead to errors such as "missing required field" even when the
field is present in the data dictionary. By converting dict fields to JSON
strings, we ensure that the data is in a format that Solr can handle.
This issue (https://github.com/ckan/ckan/issues/8423) has been observed in CKAN versions 2.10.4 and Solr 9, where
attempts to upload resources to the Datastore resulted in errors due to
the presence of dict fields in the data dictionary. The solution involves
transforming these fields into strings before indexing, as discussed in
the following issues:
- CKAN - Custom plugin/theme error datastore using fluent presets https://github.com/ckan/ckan/issues/7750
- Solr error: missing required field https://github.com/ckan/ckan/issues/7730
Args:
data_dict (dict): The data dictionary to be processed.
Returns:
dict: The processed data dictionary with dict fields as JSON strings.
"""
for key, value in data_dict.items():
if isinstance(value, dict):
data_dict[key] = json.dumps(value)
return data_dict

def convert_stringified_lists(self, data_dict):
"""
Converts stringified lists in the data dictionary to actual lists.
Args:
data_dict (dict): The data dictionary to be processed.
Returns:
dict: The processed data dictionary with actual lists.
This function iterates over the items in the data dictionary and converts
any stringified lists (strings that start with '[' and end with ']') into
actual lists. Keys that start with 'extras_', 'res_', or are 'validated_data_dict'
are excluded from this conversion.
"""
# Excluded items
excluded_keys = [
key for key in data_dict
if key.startswith('extras_') or key.startswith('res_') or key == 'validated_data_dict'
]

# Filter data dictionary
filter_data_dict = {
key: value for key, value in data_dict.items()
if key not in excluded_keys
}

for key, value in filter_data_dict.items():
if isinstance(value, str) and value.startswith('[') and value.endswith(']'):
try:
data_dict[key] = ast.literal_eval(value)
except (ValueError, SyntaxError) as e:
log.error("Error converting stringified list for key '%s': %s", key, e)

return data_dict


def before_dataset_index(self, data_dict):

data_dict = self._before_index_dump_dicts(data_dict)
data_dict = self.convert_stringified_lists(data_dict)
data_dict['agroecology_category'] = json.loads(json.dumps(data_dict.get('agroecology_category', '[]')))
data_dict['agroecology_keyword'] = json.loads(json.dumps(data_dict.get('agroecology_keyword', '[]')))

return data_dict

0 comments on commit 9ef94e1

Please sign in to comment.