Skip to content

Commit

Permalink
Use streaming for raw data requests. (#19)
Browse files Browse the repository at this point in the history
  • Loading branch information
aaronweeden authored Jun 5, 2024
1 parent 82edbdc commit ac99530
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 18 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
- Add a "Feedback / Feature Requests" section to the README ([\#22](https://github.com/ubccr/xdmod-notebooks/pull/22)).
- Improve performance of validation of filters and raw fields ([\#18](https://github.com/ubccr/xdmod-data/pull/18)).
- Fix bug with trailing slashes in `xdmod_host` ([\#24](https://github.com/ubccr/xdmod-data/pull/24)).
- Use streaming for raw data requests ([\#19](https://github.com/ubccr/xdmod-data/pull/19)).

## v1.0.0 (2023-07-21)
- Initial release.
80 changes: 62 additions & 18 deletions xdmod_data/_http_requester.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,24 +44,55 @@ def _request_data(self, params):

def _request_raw_data(self, params):
url_params = self.__get_raw_data_url_params(params)
# Once XDMoD 10.5 is no longer supported, there will be no need to call
# __get_raw_data_limit(), and the if/else statement below will not be
# necessary — only the body of the 'if' branch will be needed.
limit = self.__get_raw_data_limit()
data = []
num_rows = limit
offset = 0
while num_rows == limit:
response = self._request_json(
path='/rest/v1/warehouse/raw-data?' + url_params
+ '&offset=' + str(offset)
if limit == 'NA':
response_iter_lines = self.__request(
path='/rest/v1/warehouse/raw-data?' + url_params,
post_fields=None,
stream=True,
)
partial_data = response['data']
data += partial_data
i = 0
for line in response_iter_lines:
line_text = line.decode('utf-8').replace('\x1e', '')
line_json = json.loads(line_text)
if i == 0:
response = {'fields': line_json}
else:
data.append(line_json)
if params['show_progress']:
progress_msg = (
'Got ' + str(i) + ' row' + ('' if i == 1 else 's')
+ '...'
)
print(progress_msg, end='\r')
i += 1
if params['show_progress']:
progress_msg = 'Got ' + str(len(data)) + ' rows...'
print(progress_msg, end='\r')
num_rows = len(partial_data)
offset += limit
if params['show_progress']:
print(progress_msg + 'DONE')
print(progress_msg + 'DONE')
else:
num_rows = limit
offset = 0
while num_rows == limit:
response = self._request_json(
path='/rest/v1/warehouse/raw-data?' + url_params
+ '&offset=' + str(offset)
)
partial_data = response['data']
data += partial_data
if params['show_progress']:
progress_msg = (
'Got ' + str(len(data)) + ' row'
+ ('' if len(data) == 1 else 's')
+ '...'
)
print(progress_msg, end='\r')
num_rows = len(partial_data)
offset += limit
if params['show_progress']:
print(progress_msg + 'DONE')
return (data, response['fields'])

def _request_filter_values(self, realm_id, dimension_id):
Expand Down Expand Up @@ -98,7 +129,7 @@ def __assert_connection_to_xdmod_host(self):
+ '\': ' + str(e)
) from None

def __request(self, path='', post_fields=None):
def __request(self, path='', post_fields=None, stream=False):
_validator._assert_runtime_context(self.__in_runtime_context)
url = self.__xdmod_host + path
if post_fields:
Expand Down Expand Up @@ -126,7 +157,10 @@ def __request(self, path='', post_fields=None):
raise RuntimeError(
'Error ' + str(response.status_code) + msg
) from None
return response.text
if stream:
return response.iter_lines()
else:
return response.text

def __get_data_post_fields(self, params):
post_fields = {
Expand Down Expand Up @@ -161,8 +195,18 @@ def __get_raw_data_url_params(self, params):
)
return urlencode(results)

# Once XDMoD 10.5 is no longer supported, there will be no need for this
# method.
def __get_raw_data_limit(self):
if self.__raw_data_limit is None:
response = self._request_json('/rest/v1/warehouse/raw-data/limit')
self.__raw_data_limit = int(response['data'])
try:
response = self._request_json(
'/rest/v1/warehouse/raw-data/limit'
)
self.__raw_data_limit = int(response['data'])
except RuntimeError as e:
if '404' in str(e):
self.__raw_data_limit = 'NA'
else:
raise
return self.__raw_data_limit

0 comments on commit ac99530

Please sign in to comment.