diff --git a/CHANGELOG.md b/CHANGELOG.md index c53c6856..fc58ebca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # xdmod-data Changelog ## Main development branch +- Use streaming for raw data requests ([\#19](https://github.com/ubccr/xdmod-data/pull/19)) - Update tests and testing instructions ([\#14](https://github.com/ubccr/xdmod-data/pull/14)) ## v1.0.0 (2023-07-21) diff --git a/xdmod_data/_http_requester.py b/xdmod_data/_http_requester.py index 5b9009ad..32553816 100644 --- a/xdmod_data/_http_requester.py +++ b/xdmod_data/_http_requester.py @@ -42,24 +42,53 @@ def _request_data(self, params): def _request_raw_data(self, params): url_params = self.__get_raw_data_url_params(params) + # Once XDMoD 10.5 is no longer supported, there will be no need to call + # __get_raw_data_limit(), and the if/else statement below will not be + # necessary — only the body of the 'if' branch will be needed. limit = self.__get_raw_data_limit() data = [] - num_rows = limit - offset = 0 - while num_rows == limit: - response = self._request_json( - path='/rest/v1/warehouse/raw-data?' + url_params - + '&offset=' + str(offset) + if limit == 'NA': + response_iter_lines = self.__request( + path='/rest/v1/warehouse/raw-data?' + url_params, + post_fields=None, + stream=True, ) - partial_data = response['data'] - data += partial_data + response_text = '' + i = 0 + for line in response_iter_lines: + response_text += line.decode('utf-8') + if params['show_progress']: + progress_msg = ( + 'Got ' + str(i) + ' row' + ('' if i == 1 else 's') + + '...' + ) + print(progress_msg, end='\r') + i += 1 if params['show_progress']: - progress_msg = 'Got ' + str(len(data)) + ' rows...' - print(progress_msg, end='\r') - num_rows = len(partial_data) - offset += limit - if params['show_progress']: - print(progress_msg + 'DONE') + print(progress_msg + 'DONE') + response = json.loads(response_text) + data = response['data'] + else: + num_rows = limit + offset = 0 + while num_rows == limit: + response = self._request_json( + path='/rest/v1/warehouse/raw-data?' + url_params + + '&offset=' + str(offset) + ) + partial_data = response['data'] + data += partial_data + if params['show_progress']: + progress_msg = ( + 'Got ' + str(len(data)) + ' row' + + ('' if len(data) == 1 else 's') + + '...' + ) + print(progress_msg, end='\r') + num_rows = len(partial_data) + offset += limit + if params['show_progress']: + print(progress_msg + 'DONE') return (data, response['fields']) def _request_json(self, path, post_fields=None): @@ -75,7 +104,7 @@ def __assert_connection_to_xdmod_host(self): + '\': ' + str(e) ) from None - def __request(self, path='', post_fields=None): + def __request(self, path='', post_fields=None, stream=False): _validator._assert_runtime_context(self.__in_runtime_context) url = self.__xdmod_host + path if post_fields: @@ -103,7 +132,10 @@ def __request(self, path='', post_fields=None): raise RuntimeError( 'Error ' + str(response.status_code) + msg ) from None - return response.text + if stream: + return response.iter_lines() + else: + return response.text def __get_data_post_fields(self, params): post_fields = { @@ -138,8 +170,16 @@ def __get_raw_data_url_params(self, params): ) return urlencode(results) + # Once XDMoD 10.5 is no longer supported, there will be no need for this + # method. def __get_raw_data_limit(self): if self.__raw_data_limit is None: - response = self._request_json('/rest/v1/warehouse/raw-data/limit') - self.__raw_data_limit = int(response['data']) + try: + response = self._request_json( + '/rest/v1/warehouse/raw-data/limit' + ) + self.__raw_data_limit = int(response['data']) + except RuntimeError as e: + if '404' in str(e): + self.__raw_data_limit = 'NA' return self.__raw_data_limit