Skip to content

Commit

Permalink
update metadata to track search settings
Browse files Browse the repository at this point in the history
  • Loading branch information
TomDonoghue committed Aug 31, 2023
1 parent 640fb7e commit 31e6226
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 12 deletions.
13 changes: 9 additions & 4 deletions lisc/collect/counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,16 @@ def collect_counts(terms_a, inclusions_a=None, exclusions_a=None, labels_a=None,
... terms_b=[['attention'], ['perception']])
"""

# Initialize meta data object
meta_data = MetaData()

# Collect settings for URLs, and add them to the metadata object
settings = {'db' : db, 'field' : field}
settings.update(eutils_kwargs)
meta_data.add_settings(settings)

# Get e-utils URLS object. Set retmax as 0, since not using UIDs for counts
urls = EUtils(db=db, retmax='0', field=field, retmode='xml', **eutils_kwargs, api_key=api_key)
urls = EUtils(**settings, retmax='0', retmode='xml', api_key=api_key)

# Define the settings for the search utility, adding a default for datetype if not provided
search_settings = ['db', 'retmax', 'retmode', 'field']
Expand All @@ -98,9 +106,6 @@ def collect_counts(terms_a, inclusions_a=None, exclusions_a=None, labels_a=None,
urls.build_url('info', settings=['db'])
urls.build_url('search', settings=search_settings + list(eutils_kwargs.keys()))

# Initialize meta data object
meta_data = MetaData()

# Check for a Requester object to be passed in as logging, otherwise initialize
req = logging if isinstance(logging, Requester) else \
Requester(wait_time=get_wait_time(urls.authenticated),
Expand Down
19 changes: 12 additions & 7 deletions lisc/collect/words.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,17 @@ def collect_words(terms, inclusions=None, exclusions=None, labels=None,
msg = 'Only the `pubmed` database is currently supported for words collection.'
raise NotImplementedError(msg)

# Initialize meta data object
meta_data = MetaData()

# Collect settings for URLs, and add them to the metadata object
settings = {'db' : db, 'retmax' : retmax, 'field' : field,
'usehistory' : 'y' if usehistory else 'n'}
settings.update(eutils_kwargs)
meta_data.add_settings(settings)

# Get EUtils URLS object, with desired settings, and build required utility URLs
urls = EUtils(db=db, retmax=retmax, usehistory='y' if usehistory else 'n',
field=field, retmode='xml', **eutils_kwargs, api_key=api_key)
urls = EUtils(**settings, retmode='xml', api_key=api_key)

# Define the settings for the search utility, adding a default for datetype if not provided
search_settings = ['db', 'usehistory', 'retmax', 'retmode', 'field']
Expand All @@ -94,10 +102,6 @@ def collect_words(terms, inclusions=None, exclusions=None, labels=None,
urls.build_url('search', settings=search_settings + list(eutils_kwargs.keys()))
urls.build_url('fetch', settings=['db', 'retmode'])

# Initialize results & meta data
results = []
meta_data = MetaData()

# Check for a Requester object to be passed in as logging, otherwise initialize
req = logging if isinstance(logging, Requester) else \
Requester(wait_time=get_wait_time(urls.authenticated),
Expand All @@ -111,7 +115,8 @@ def collect_words(terms, inclusions=None, exclusions=None, labels=None,
inclusions = inclusions if inclusions else [[]] * len(terms)
exclusions = exclusions if exclusions else [[]] * len(terms)

# Loop through all the terms
# Loop through all the terms, launch collection, and collect results
results = []
for label, search, incl, excl in zip(labels, terms, inclusions, exclusions):

# Collect term information and make search term argument
Expand Down
16 changes: 15 additions & 1 deletion lisc/data/meta_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ class MetaData():
Details of the requester object used for the data collection.
db_info : dict
Details of the database from which the data was accessed.
settings : dict
Details of any search settings that were used during the collection.
log : list or None
A log of requested URLs, if requests were logged.
"""
Expand All @@ -27,6 +29,7 @@ def __init__(self):
self.date = None
self.requester = None
self.db_info = None
self.settings = None
self.log = None

self.get_date()
Expand All @@ -47,7 +50,7 @@ def as_dict(self):
mt_dict = deepcopy(self.__dict__)

# Unpack dictionary attributes to flatten dictionary
for label in ['requester', 'db_info']:
for label in ['requester', 'db_info', 'settings']:
attr = mt_dict.pop(label)
if attr:
for key, val in attr.items():
Expand Down Expand Up @@ -92,3 +95,14 @@ def add_db_info(self, db_info):
"""

self.db_info = db_info

def add_settings(self, settings):
"""Add search settings information to the MetaData object.
Parameters
----------
settings : dict
Information about settings that were used during the data collection.
"""

self.settings = settings

0 comments on commit 31e6226

Please sign in to comment.