Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change warehouse raw data endpoint to respond with JSON text sequence. #1858

Merged
merged 4 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 26 additions & 41 deletions classes/Rest/Controllers/WarehouseControllerProvider.php
Original file line number Diff line number Diff line change
Expand Up @@ -2108,20 +2108,20 @@ private function getUserStore(\XDUser $user, $realm)
*
* It can also contain the following optional parameters:
* - fields: list of aliases of fields to get (if not provided, all
* fields are gotten).
* fields are obtained).
* - filters: mapping of dimension names to their possible values.
* Results will only be included whose values for each of the
* given dimensions match one of the corresponding given values.
* - offset: starting row index of data to get.
*
* If successful, the response will include the following keys:
* - success: true.
* - fields: array containing the 'display' property of each field gotten.
* - data: array of arrays containing the field values gotten.
* If successful, the response will be a JSON text sequence. The first line
* will be an array containing the `display` property of each obtained
* field. Subsequent lines will be arrays containing the obtained field
* values for each record.
*
* @param Request $request
* @param Application $app
* @return \Symfony\Component\HttpFoundation\JsonResponse
* @return \Symfony\Component\HttpFoundation\StreamedResponse
* @throws BadRequestHttpException if any of the required parameters are
* not included; if an invalid start date,
* end date, realm, field alias, or filter
Expand All @@ -2145,7 +2145,6 @@ public function getRawData(Request $request, Application $app)
$reachedOffset = false;
$i = 1;
$offset = $params['offset'];
$echoedFirstRow = false;
// Jobs realm has a performance improvement by querying one day at
// a time.
if ('Jobs' === $params['realm']) {
Expand All @@ -2162,8 +2161,7 @@ public function getRawData(Request $request, Application $app)
$logger,
$reachedOffset,
$i,
$offset,
$echoedFirstRow
$offset
);
$currentDate = date(
'Y-m-d',
Expand All @@ -2184,15 +2182,14 @@ public function getRawData(Request $request, Application $app)
$logger,
$reachedOffset,
$i,
$offset,
$echoedFirstRow
$offset
);
}
};
return $app->stream(
$streamCallback,
200,
['Content-Type' => 'application/json']
['Content-Type' => 'application/json-seq']
);
}

Expand Down Expand Up @@ -2246,15 +2243,14 @@ private function getRawDataLogger()
}

/**
* Perform an unbuffered database query and echo the result as JSON, flushing every 10000 rows.
* Perform an unbuffered database query and echo the result as a JSON text sequence, flushing every 10000 rows.
*
* @param string $queryClass the fully qualified name of the query class.
* @param string $startDate the start date of the query in ISO 8601 format.
* @param string $endDate the end date of the query in ISO 8601 format.
* @param bool $isFirstQueryInSeries if true, echo the JSON prolog before echoing the data. Otherwise, just echo
* the data.
* @param bool $isLastQueryInSeries if true, echo the JSON epilog after echoing the data. Otherwise, just echo
* the data.
* @param bool $isFirstQueryInSeries if true, echo an array with the `display` header of each field before
* echoing the data.
* @param bool $isLastQueryInSeries if true, switch back to MySQL buffered query mode after echoing the last row.
* @param array $params validated parameter values from @see validateRawDataParams().
* @param XDUser $user the user making the request.
* @param \CCR\Logger $logger used to log the database request.
Expand All @@ -2264,8 +2260,6 @@ private function getRawDataLogger()
* @param int $i the number of rows iterated so far plus one — used to keep track of whether the offset has been
* reached and when to flush.
* @param int $offset the number of rows to ignore before echoing.
* @param bool $echoedFirstRow if true, the first row has already been echoed, so echo a comma before the next
* one. Otherwise, don't echo the comma.
* @return null
* @throws Exception if $startDate or $endDate are invalid ISO 8601 dates, if there is an error connecting to
* or querying the database, or if invalid fields have been specified in the query parameters.
Expand All @@ -2281,8 +2275,7 @@ private function echoRawData(
$logger,
&$reachedOffset,
&$i,
&$offset,
&$echoedFirstRow
&$offset
) {
$query = new $queryClass(
[
Expand All @@ -2301,19 +2294,12 @@ private function echoRawData(
$pdo = DB::factory($query->_db_profile)->handle();
if ($isFirstQueryInSeries) {
$pdo->setAttribute(PDO::MYSQL_ATTR_USE_BUFFERED_QUERY, false);
echo '{"success":true,"fields":'
. json_encode($dataset->getHeader())
. ',"data":[';
echo "\036" . json_encode($dataset->getHeader()) . "\n";
}
foreach ($dataset as $row) {
if ($reachedOffset || $i > $offset) {
$reachedOffset = true;
if ($echoedFirstRow) {
echo ',';
}
echo "\n";
echo json_encode($row);
$echoedFirstRow = true;
echo "\036" . json_encode($row) . "\n";
}
if (10000 === $i) {
ob_flush();
Expand All @@ -2326,7 +2312,6 @@ private function echoRawData(
$i++;
}
if ($isLastQueryInSeries) {
echo ']}';
$pdo->setAttribute(PDO::MYSQL_ATTR_USE_BUFFERED_QUERY, true);
}
}
Expand All @@ -2339,7 +2324,7 @@ private function echoRawData(
* @param \DataWarehouse\Query\RawQuery $query
* @param \CCR\Logger
* @return BatchDataset
* @throws Exception if the 'fields' parameter contains invalid field
* @throws Exception if the `fields` parameter contains invalid field
* aliases.
*/
private function getRawBatchDataset(
Expand All @@ -2366,7 +2351,7 @@ private function getRawBatchDataset(
}

/**
* Validate the 'start_date' and 'end_date' parameters of the given request
* Validate the `start_date` and `end_date` parameters of the given request
* to the raw data endpoint (@see getRawData()).
*
* @param Request $request
Expand Down Expand Up @@ -2399,8 +2384,8 @@ private function validateRawDataDateParams($request)

/**
* Get the array of field aliases from the given request to the raw data
* endpoint (@see getRawData()), e.g., the parameter 'fields=foo,bar,baz'
* results in ['foo', 'bar', 'baz'].
* endpoint (@see getRawData()), e.g., the parameter `fields=foo,bar,baz`
* results in `['foo', 'bar', 'baz']`.
*
* @param Request $request
* @return array|null containing the field aliases parsed from the request,
Expand All @@ -2417,9 +2402,9 @@ private function getRawDataFieldsArray($request)
}

/**
* Validate the optional 'filters' parameter of the given request to the
* Validate the optional `filters` parameter of the given request to the
* raw data endpoint (@see getRawData()), e.g., the parameter
* 'filters[foo]=bar,baz' results in ['foo' => ['bar', 'baz']].
* `filters[foo]=bar,baz` results in `['foo' => ['bar', 'baz']]`.
*
* @param Request $request
* @param array $queryDescripters the set of dimensions the user is
Expand Down Expand Up @@ -2454,7 +2439,7 @@ private function validateRawDataFiltersParams($request, $queryDescripters)
* dimension does not match any of the provided values.
*
* @param \DataWarehouse\Query\RawQuery $query
* @param array $params containing a 'filters' key whose value is an
* @param array $params containing a `filters` key whose value is an
* associative array of dimensions and dimension
* values.
* @return \DataWarehouse\Query\RawQuery the query with the filters
Expand All @@ -2481,10 +2466,10 @@ private function setRawDataQueryFilters($query, $params)
}

/**
* Validate a specific filter from the 'filters' parameter of a request to
* Validate a specific filter from the `filters` parameter of a request to
* the raw data endpoint (@see getRawData()), and return the parsed array
* of values for that filter (e.g., 'foo,bar,baz' becomes ['foo', 'bar',
* 'baz']).
* of values for that filter (e.g., `foo,bar,baz` becomes `['foo', 'bar',
* 'baz']`).
*
* @param Request $request
* @param array $queryDescripters the set of dimensions the user is
Expand Down
52 changes: 4 additions & 48 deletions docs/xdmod-rest-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1933,7 +1933,7 @@ layout: null

}
},
"404": { "$ref": "api-token-404" }
"404": { "$ref": "#/components/responses/api-token-404" }
}
},
"post": {
Expand Down Expand Up @@ -2111,7 +2111,7 @@ layout: null
}
}
},
"404": { "$ref": "api-token-404" },
"404": { "$ref": "#/components/responses/api-token-404" },
"500": {
"description": "Internal Server Error",
"content": {
Expand Down Expand Up @@ -2857,7 +2857,7 @@ layout: null
"Data Analytics Framework"
],
"summary": "Get raw data from the data warehouse",
"description": "",
"description": "On success, returns a stream of content type `application/json-seq` containing one line with an array containing the `display` property of each obtained field followed by one line for each obtained record with an array containing the obtained field values for that record. Each line starts with an ASCII Record Separator character and ends with an ASCII Line Feed character.",
"operationId": "get-warehouse-raw-data",
"security": [
{ "http_bearer": [] },
Expand Down Expand Up @@ -2916,51 +2916,7 @@ layout: null
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"type": "object",
"title": "success",
"properties": {
"success": {
"type": "boolean",
"enum": [ true ]
},
"fields": {
"type": "array",
"description": "Labels of fields returned",
"items": {
"type": "string"
}
},
"data": {
"type": "array",
"description": "Rows of raw data",
"items": {
"type": "array",
"items": {
"type": "string",
"nullable": true
}
}
}
},
"required": [
"success",
"fields",
"data"
]
},
"examples": {
"success": {
"description": "Parameters: `start_date=2022-01-01&end_date=2022-01-01&realm=SUPREMM&fields=nodes,cores`",
"value": {
"success": true,
"fields": [ "Cores", "Nodes" ],
"data": [ ["1", "1"], ["5", "1"], ["2304", "24"] ]
}
}
}
}
"application/json-seq": {}
}
},
"400": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
{"success":true,"fields":["Instance ID","PI Group","Instance Type"],"data":[["172df8e9-b2ec-4864-898d-a4d401de85f0","Polar Ocean and Climate Systems","m1.small"],["4d5e602e-97b7-4839-8818-097d6bcc8e62","Polar Ocean and Climate Systems","m1.small"],["dde9e2fb-a519-4299-a35f-8a096a46404c","Galactic Astronomy","m1.medium"],["b9c560b9-1066-483c-895e-aebb8b3c7ac1","Polar Ocean and Climate Systems","m1.medium"]]}
["Instance ID","PI Group","Instance Type"]
["172df8e9-b2ec-4864-898d-a4d401de85f0","Polar Ocean and Climate Systems","m1.small"]
["4d5e602e-97b7-4839-8818-097d6bcc8e62","Polar Ocean and Climate Systems","m1.small"]
["dde9e2fb-a519-4299-a35f-8a096a46404c","Galactic Astronomy","m1.medium"]
["b9c560b9-1066-483c-895e-aebb8b3c7ac1","Polar Ocean and Climate Systems","m1.medium"]

Large diffs are not rendered by default.

Loading