Skip to content

Commit

Permalink
Use streaming for raw data requests.
Browse files Browse the repository at this point in the history
  • Loading branch information
aaronweeden committed Apr 4, 2024
1 parent 63141a0 commit 74af2f0
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 18 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## Main development branch
- Update tests and testing instructions ([\#14](https://github.com/ubccr/xdmod-data/pull/14)).
- Use streaming for raw data requests ([\#19](https://github.com/ubccr/xdmod-data/pull/19)).

## v1.0.0 (2023-07-21)
- Initial release.
78 changes: 60 additions & 18 deletions xdmod_data/_http_requester.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,24 +42,53 @@ def _request_data(self, params):

def _request_raw_data(self, params):
url_params = self.__get_raw_data_url_params(params)
# Once XDMoD 10.5 is no longer supported, there will be no need to call
# __get_raw_data_limit(), and the if/else statement below will not be
# necessary — only the body of the 'if' branch will be needed.
limit = self.__get_raw_data_limit()
data = []
num_rows = limit
offset = 0
while num_rows == limit:
response = self._request_json(
path='/rest/v1/warehouse/raw-data?' + url_params
+ '&offset=' + str(offset)
if limit == 'NA':
response_iter_lines = self.__request(
path='/rest/v1/warehouse/raw-data?' + url_params,
post_fields=None,
stream=True,
)
partial_data = response['data']
data += partial_data
response_text = ''
i = 0
for line in response_iter_lines:
response_text += line.decode('utf-8')
if params['show_progress']:
progress_msg = (
'Got ' + str(i) + ' row' + ('' if i == 1 else 's')
+ '...'
)
print(progress_msg, end='\r')
i += 1
if params['show_progress']:
progress_msg = 'Got ' + str(len(data)) + ' rows...'
print(progress_msg, end='\r')
num_rows = len(partial_data)
offset += limit
if params['show_progress']:
print(progress_msg + 'DONE')
print(progress_msg + 'DONE')
response = json.loads(response_text)
data = response['data']
else:
num_rows = limit
offset = 0
while num_rows == limit:
response = self._request_json(
path='/rest/v1/warehouse/raw-data?' + url_params
+ '&offset=' + str(offset)
)
partial_data = response['data']
data += partial_data
if params['show_progress']:
progress_msg = (
'Got ' + str(len(data)) + ' row'
+ ('' if len(data) == 1 else 's')
+ '...'
)
print(progress_msg, end='\r')
num_rows = len(partial_data)
offset += limit
if params['show_progress']:
print(progress_msg + 'DONE')
return (data, response['fields'])

def _request_json(self, path, post_fields=None):
Expand All @@ -75,7 +104,7 @@ def __assert_connection_to_xdmod_host(self):
+ '\': ' + str(e)
) from None

def __request(self, path='', post_fields=None):
def __request(self, path='', post_fields=None, stream=False):
_validator._assert_runtime_context(self.__in_runtime_context)
url = self.__xdmod_host + path
if post_fields:
Expand Down Expand Up @@ -103,7 +132,10 @@ def __request(self, path='', post_fields=None):
raise RuntimeError(
'Error ' + str(response.status_code) + msg
) from None
return response.text
if stream:
return response.iter_lines()
else:
return response.text

def __get_data_post_fields(self, params):
post_fields = {
Expand Down Expand Up @@ -138,8 +170,18 @@ def __get_raw_data_url_params(self, params):
)
return urlencode(results)

# Once XDMoD 10.5 is no longer supported, there will be no need for this
# method.
def __get_raw_data_limit(self):
if self.__raw_data_limit is None:
response = self._request_json('/rest/v1/warehouse/raw-data/limit')
self.__raw_data_limit = int(response['data'])
try:
response = self._request_json(
'/rest/v1/warehouse/raw-data/limit'
)
self.__raw_data_limit = int(response['data'])
except RuntimeError as e:
if '404' in str(e):
self.__raw_data_limit = 'NA'
else:
raise
return self.__raw_data_limit

0 comments on commit 74af2f0

Please sign in to comment.