Skip to content

Commit

Permalink
HDX-10466 data completeness
Browse files Browse the repository at this point in the history
  • Loading branch information
danmihaila committed Mar 5, 2025
1 parent 74578ec commit 8c42b73
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@ class DataCompleteness(object):
'rows': 500,
'fl': ['id', 'name', 'title', 'organization',
'extras_data_update_frequency',
'last_modified', 'review_date'],
'last_modified',
'review_date',
'dataset_date',
'extras_dataset_date',
],
'ext_compute_freshness': 'for-data-completeness'
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def _generate_dataset_dict(dataset_name, org_id, group_name, review_date, user=U
dataset = {
'package_creator': 'test function',
'private': False,
'dataset_date': '[1960-01-01 TO 2012-12-31]',
'dataset_date': '[1960-01-01 TO {}]'.format(review_date.strftime('%Y-%m-%d')),
'caveats': 'These are the caveats',
'license_other': 'TEST OTHER LICENSE',
'methodology': 'This is a test methodology',
Expand All @@ -78,7 +78,7 @@ def _generate_dataset_dict(dataset_name, org_id, group_name, review_date, user=U
'title': 'Test Dataset ' + dataset_name,
'owner_org': org_id,
'groups': [{'name': group_name}],
'review_date': review_date.isoformat(),
# 'review_date': review_date.isoformat(),
'data_update_frequency': '30',
'maintainer': user
}
Expand All @@ -105,12 +105,16 @@ def setup_data():
org_url='https://hdx.hdxtest.org/'
)

context = {'model': model, 'session': model.Session, 'user': SYSADMIN}

review_date1 = datetime.datetime.utcnow() - datetime.timedelta(days=60)
_generate_dataset_dict('dataset1-category1', ORG, group.get('name'), review_date1)
pkg_dict_1 = _get_action('package_show')(context, {'id': 'dataset1-category1'})

review_date2 = datetime.datetime.utcnow()
_generate_dataset_dict('dataset2-category1', ORG, group.get('name'), review_date2)

pkg_dict_2 = _get_action('package_show')(context, {'id': 'dataset2-category1'})
assert True

@pytest.fixture(scope='module')
def keep_db_tables_on_clean():
Expand Down
3 changes: 2 additions & 1 deletion ckanext-hdx_package/ckanext/hdx_package/helpers/extras.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@


ALLOWED_EXTRAS = {
'review_date': None,
'dataset_date': None,
# 'review_date': None,
'data_update_frequency': None,
'is_requestdata_type': [tk.get_validator('boolean_validator')],
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ class FreshnessCalculator(object):
# return last_change_date

@staticmethod
def end_of_dataset_date(dataset_dict):
def end_of_dataset_date(dataset_date):
"""
Extracts the end date from dataset_date and returns a timezone-aware datetime object.
Expand All @@ -138,7 +138,7 @@ def end_of_dataset_date(dataset_dict):
:rtype: datetime.datetime
"""

dataset_date = dataset_dict.get('dataset_date', '')
# dataset_date = dataset_dict.get('dataset_date', '')
if dataset_date:
dataset_end_date = dataset_date.split(' TO ')[-1].strip('[]')
if dataset_end_date == '*':
Expand All @@ -153,17 +153,10 @@ def __init__(self, dataset_dict):
self.surely_not_fresh = True
self.dataset_dict = dataset_dict
update_freq = get_extra_from_dataset('data_update_frequency', dataset_dict)
# modified = dataset_dict.get('metadata_modified')
try:
# self.modified = FreshnessCalculator.dataset_last_change_date(dataset_dict)
self.modified = FreshnessCalculator.end_of_dataset_date(dataset_dict)
dataset_date = get_extra_from_dataset('dataset_date', dataset_dict)
self.modified = FreshnessCalculator.end_of_dataset_date(dataset_date)
if self.modified and update_freq:
# if self.modified and update_freq and UPDATE_FREQ_OVERDUE_INFO.get(update_freq):
# if '.' not in modified:
# modified += '.000'
# self.modified = datetime.datetime.strptime(modified, "%Y-%m-%dT%H:%M:%S.%f")
# self.extra_overdue_days = UPDATE_FREQ_OVERDUE_INFO.get(update_freq)
# self.extra_delinquent_days = UPDATE_FREQ_DELINQUENT_INFO[update_freq]
self.update_freq_in_days = int(update_freq)
self.surely_not_fresh = False
except Exception as e:
Expand Down

0 comments on commit 8c42b73

Please sign in to comment.