From b3b74de917dd49c4b708ba4aa0ddd2c830d6d10d Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 10 Sep 2024 19:42:58 -0400 Subject: [PATCH 01/75] [wip] update docs about prefer header vs mode/transmissionMode/response body parameters (relates to #376, #414, #701, https://github.com/opengeospatial/ogcapi-processes/issues/412) --- docs/source/configuration.rst | 1 + docs/source/processes.rst | 140 +++++++++++++++++++++++----------- docs/source/references.rst | 12 ++- weaver/utils.py | 2 +- 4 files changed, 107 insertions(+), 48 deletions(-) diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index a4ad6f14d..9f818b2cb 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -404,6 +404,7 @@ they are optional and which default value or operation is applied in each situat .. versionadded:: 4.15 .. versionchanged:: 4.34 +.. |weaver-execute-sync-max-wait| replace:: ``weaver.execute_sync_max_wait`` .. _weaver-execute-sync-max-wait: - | ``weaver.execute_sync_max_wait = `` [:class:`int`, seconds] diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 0e59561cd..ffd42cc1c 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -579,11 +579,14 @@ Execution of a process (Execute) For backward compatibility, the |exec-req-job|_ request is also supported as alias to the above :term:`OGC API - Processes` compliant endpoint. -This section will first describe the basics of this request format, and after go into details for specific use cases -and parametrization of various input/output combinations. Let's employ the following example of JSON body sent to the -:term:`Job` execution to better illustrate the requirements. +This section will first describe the basics of this request format (:ref:`proc_exec_body`), and after go into +further details for specific use cases and parametrization of various input/output combinations. -.. table:: +Below are some examples of :term:`JSON` body that can be sent to the :term:`Job` execution endpoint to +better illustrate where each of the mentioned parameters in following section are expected. + +.. table:: Example Job Execution Request Body + :name: table-exec-body :class: code-table :align: center @@ -596,8 +599,8 @@ and parametrization of various input/output combinations. Let's employ the follo | "response": "document", | "response": "document", | | "inputs": [ | "inputs": { | | { | "input-file": { | - | "id": "input-file", | "href": "" | + | "href": "" | }, | | }, | "input-value": { | | { | "value": 1 | | "id": "input-value", | } | @@ -623,17 +626,18 @@ and parametrization of various input/output combinations. Let's employ the follo .. note:: Other parameters can be added to the request to provide further functionalities. Above fields are the minimum - requirements to request a :term:`Job`. Please refer to the |exec-api|_ definition for all applicable features. + requirements to request a :term:`Job`. Please refer to the |exec-api|_ definition, as well as following sections, + for all applicable features. .. seealso:: - - :ref:`proc_exec_body` and :ref:`proc_exec_mode` details applicable for `Weaver` specifically. - - `OGC API - Processes, Process Outputs `_ - for more general details on ``transmissionMode`` parameter. - - `OGC API - Processes, Execution Mode `_ - for more general details on the execution negotiation (formerly with ``mode`` parameter) and more recently - with ``Prefer`` header. - - |ogc-exec-sync-responses|_ and |ogc-exec-async-responses|_ - for a complete listing of available ``response`` formats considering all other parameters. + - :ref:`proc_exec_body`, :ref:`proc_exec_mode` and :ref:`proc_exec_response` sections provide details + applicable to `Weaver`, which align with :term:`OGC API - Processes`, but that can also support additional + capabilities. + - |ogc-api-proc-exec-outputs|_ offers general details on ``transmissionMode`` parameter of requested outputs. + - |ogc-api-proc-exec-mode|_ describes general details about the execution negotiation (`sync`/`async`), + formerly with ``mode`` parameter, and more recently with ``Prefer`` header. + - |ogc-api-proc-exec-responses-sync|_ and |ogc-api-proc-exec-responses-async|_ provide + a complete listing of available ``response`` formats considering all other parameters. .. |exec-api| replace:: OpenAPI Execute .. _exec-api: `exec-req`_ @@ -656,34 +660,42 @@ required when submitting the execution request, even for a no-input process (an It defines which parameters to forward to the referenced :term:`Process` to be executed. All ``id`` elements in this :term:`Job` request body must correspond to valid ``inputs`` from the definition returned by :ref:`DescribeProcess ` -response. Obviously, all formatting requirements (i.e.: proper file :term:`MIME-types`), +response. Obviously, all formatting requirements (i.e.: proper file :term:`Media-Types`), data types (e.g.: ``int``, ``string``, etc.) and validations rules (e.g.: ``minOccurs``, ``AllowedValues``, etc.) must also be fulfilled. When providing files as input, multiple protocols are supported. See later section :ref:`File Reference Types` for details. -The ``outputs`` section defines, for each ``id`` corresponding to the :term:`Process` definition, how to -report the produced outputs from a successful :term:`Job` completion. For the time being, `Weaver` only implement the -``reference`` result as this is the most common variation. In this case, the produced file is -stored locally and exposed externally with returned reference URL. The other mode ``value`` returns the contents -directly in the response instead of the URL. +The ``outputs`` section defines, for each ``id`` available from the :term:`Process` definition, how to +report the produced outputs from a successful :term:`Job` execution. The method under which each output will +be returned depends on the negotiated :ref:`proc_exec_mode` and :ref:`proc_exec_response`. + +When an output corresponds to a file produced by the :term:`Application Package`, and stored locally, the +result will typically (unless requested otherwise), be exposed externally using the returned reference :term:`URL`. +For outputs that correspond to literal data, such as plain strings or numbers, `Weaver` will typically prefer +returning the ``value`` directly. However, alternate link representations can also be obtained if specified in the +execution request. -When ``outputs`` section is omitted, it simply means that the :term:`Process` to be executed should return all +When the ``outputs`` section is omitted, it simply means that the :term:`Process` to be executed should return all outputs it offers in the created :ref:`Job Results `. In such case, because no representation modes -is specified for individual outputs, `Weaver` automatically selects ``reference`` as it makes all outputs more easily -accessible with distinct URL afterwards. If the ``outputs`` section is specified, but that one of the outputs defined -in the :ref:`Process Description ` is not specified, that output should be omitted from the produced -results. For the time being, because only ``reference`` representation is offered for produced output files, this -filtering is not implemented as it offers no additional advantage for files accessed directly with their distinct URLs. -This could be added later if ``Multipart`` raw data representation is required. -Please |submit-issue|_ to request this feature if it is relevant for your use-cases. +is specified for individual outputs, `Weaver` automatically selects ``reference`` for files as it makes all outputs +more easily accessible with distinct :term:`URL` afterwards, and ``values`` for literal data to obtain them directly. +If the ``outputs`` section is specified, but that one of the ``outputs`` defined in +the :ref:`Process Description ` is not specified, this indicates that the :term:`Job` should +omit this output from the produced results. .. fixme: .. todo:: + For the time being, because only ``reference`` representation is offered for produced output files, this + filtering is not implemented as it offers no additional advantage for files accessed directly with their + distinct links. + This could be added later if ``Multipart`` raw data representation is required. + Please |submit-issue|_ to request this feature if it is relevant for your use-cases. + Filtering of ``outputs`` not implemented (everything always available). https://github.com/crim-ca/weaver/issues/380 Other parameters presented in the above examples, namely ``mode`` and ``response`` are further detailed in -the following :ref:`proc_exec_mode` section. +the following :ref:`proc_exec_mode` and :ref:`proc_exec_response` sections. .. _proc_exec_mode: @@ -694,8 +706,8 @@ In order to select how to execute a :term:`Process`, either `synchronously` or ` should be specified. If omitted, `Weaver` defaults to `asynchronous` execution. To execute `asynchronously` explicitly, ``Prefer: respond-async`` should be used. Otherwise, the `synchronous` execution can be requested with ``Prefer: wait=X`` where ``X`` is the duration in seconds to wait for a response. If no worker becomes available -within that time, or if this value is greater than ``weaver.exec_sync_max_wait``, the :term:`Job` will resume -`asynchronously` and the response will be returned. Furthermore, `synchronous` and `asynchronous` execution of +within that time, or if this value is greater than the |weaver-execute-sync-max-wait|_ setting, the :term:`Job` will +resume `asynchronously` and the response will be returned. Furthermore, `synchronous` and `asynchronous` execution of a :term:`Process` can only be requested for corresponding ``jobControlOptions`` it reports as supported in its :ref:`Process Description `. It is important to provide the ``jobControlOptions`` parameter with applicable modes when :ref:`Deploying a Process ` to allow it to run as desired. By default, `Weaver` @@ -713,19 +725,25 @@ will assume that deployed processes are only `asynchronous` to handle longer ope queue, as this allows `Weaver` to offer better availability for all requests submitted by its users. The `synchronous` mode should be reserved only for very quick and relatively low computation intensive operations. -The ``mode`` field displayed in the body is another method to tell whether to run the :term:`Process` in a blocking -(``sync``) or non-blocking (``async``) manner. Note that support is limited for mode ``sync`` as this use case is often -more cumbersome than ``async`` execution. Effectively, ``sync`` mode requires to have a task worker executor available -to run the :term:`Job` (otherwise it fails immediately due to lack of processing resource), and the requester must wait -for the *whole* execution to complete to obtain the result. Given that :term:`Process` could take a very long time to -complete, it is not practical to execute them in this manner and potentially have to wait hours to retrieve outputs. +.. fixme: +.. todo:: + Support the ``Prefer: handling=strict`` modifier to disallow switching between sync/async + https://github.com/crim-ca/weaver/issues/701 + +The ``mode`` field displayed in the :ref:`table-exec-body` is another method to tell whether to run the :term:`Process` +in a blocking (``sync``) or non-blocking (``async``) manner. Note that support is limited for mode ``sync`` as this use +case is often more cumbersome than ``async`` execution. Effectively, ``sync`` mode requires to have a task worker +executor available to run the :term:`Job` (otherwise it fails immediately due to lack of processing resource), and +the requester must wait for the *whole* execution to complete to obtain the result. +Given that :term:`Process` could take a very long time to complete, it is not practical to execute them in this +manner and potentially have to wait hours to retrieve outputs. Instead, the preferred and default approach is to request an ``async`` :term:`Job` execution. When doing so, `Weaver` will add this to a task queue for processing, and will immediately return a :term:`Job` identifier and ``Location`` where the user can probe for its status, using :ref:`Monitoring ` request. As soon as any task worker becomes available, it will pick any leftover queued :term:`Job` to execute it. .. note:: - The ``mode`` field is an older methodology that precedes the official :term:`OGC API - Processes` method using + The ``mode`` field is an older methodology that precedes the latest :term:`OGC API - Processes` method using the ``Prefer`` header. It is recommended to employ the ``Prefer`` header that ensures higher interoperability with other services using the same standard. The ``mode`` field is deprecated and preserved only for backward compatibility purpose. @@ -754,7 +772,35 @@ It is also possible that a ``failed`` :term:`Job`, even when `synchronous`, will to the status location instead of results. This is because it is impossible for `Weaver` to return the result(s) as outputs would not be generated by the incomplete :term:`Job`. -Finally, the ``response`` parameter defines how to return the results produced by the :term:`Process`. +For any of the execution combinations, it is always possible to obtain :term:`Job` outputs, +along with logs, exceptions and other details using the :ref:`proc_op_result` endpoints. + +.. _proc_exec_response: + +Execution Response +~~~~~~~~~~~~~~~~~~~~~~~~~ + +When requesting a :term:`Job` execution, the structure under which the :term:`Process` results are returned can +be adjusted using the ``Prefer`` header with the ``return`` parameter. More precisely, the ``Prefer: return=minimal`` +and ``Prefer: return=representation`` definitions can be used to control whether the resulting ``outputs`` would be +provided by + +.. note:: + The previous :term:`OGC API - Processes` standard revision instead made use of the ``response`` parameter + in the execution request body, as shown in the table :ref:`table-exec-body`. In general, the ``return=minimal`` + representation + + +.. fixme: requested ``transmissionMode`` parameter (``value``/``reference``), + +.. fixme: The other ``transmissionMode: value`` would instead return + the contents directly in the response rather than the :term:`URL`. + +.. fixme: Using ``Prefer: return=representation`` ... + +.. fixme: reword below with ``Prefer`` + +The ``response`` parameter defines how to return the results produced by the :term:`Process`. When ``response=document``, regardless of ``mode=async`` or ``mode=sync``, and regardless of requested outputs ``transmissionMode=value`` or ``transmissionMode=reference``, the results will be returned in a :term:`JSON` format containing either literal values or URL references to produced files. If ``mode=async``, @@ -764,11 +810,19 @@ depends both on the number of available :term:`Process` outputs, which ones were requested (i.e.: ``transmissionMode``). It is also possible that further content negotiation gets involved accordingly to the ``Accept`` header and available ``Content-Type`` of the outputs if multiple formats are supported by the :term:`Process`. For more details regarding those combination, the official -|ogc-exec-sync-responses|_ and |ogc-exec-async-responses|_ should be employed as reference. +|ogc-api-proc-exec-responses-sync|_ and |ogc-api-proc-exec-responses-async|_ should be employed as reference. + +.. note:: + The ``transmissionMode`` and ``response`` fields are part of the older methodology that precedes + the latest :term:`OGC API - Processes` standard revision using the ``Prefer`` header. -For any of the previous combinations, it is always possible to obtain :term:`Job` outputs, along with logs, exceptions -and other details using the :ref:`proc_op_result` endpoints. + Whenever possible, it is recommended to employ the ``Prefer`` header that should provide higher interoperability + with other services using the same standard. However, given that ``transmissionMode`` and ``response`` fields + can allow more flexibility and strict control regarding how data is returned is specific edge cases, in contrast + to the ``Prefer`` header approach, they remain available in `Weaver`. + See the `opengeospatial/ogcapi-processes#412 `_ + discussions for more details about each approach, their considerations, and potential side-effects. .. _proc_exec_steps: diff --git a/docs/source/references.rst b/docs/source/references.rst index 576de94cc..160665178 100644 --- a/docs/source/references.rst +++ b/docs/source/references.rst @@ -126,10 +126,14 @@ .. _ogc-api-proc-part1-spec-pdf: https://docs.ogc.org/is/18-062r2/18-062r2.pdf .. |ogc-api-proc-part1-spec-json| replace:: *OGC API - Processes* - Part 1: Core JSON schema .. _ogc-api-proc-part1-spec-json: https://raw.githubusercontent.com/opengeospatial/ogcapi-processes/master/openapi/ogcapi-processes.bundled.json -.. |ogc-exec-sync-responses| replace:: *OGC API - Processes*, Responses (sync) -.. _ogc-exec-sync-responses: https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execute_response -.. |ogc-exec-async-responses| replace:: *OGC API - Processes*, Responses (async) -.. _ogc-exec-async-responses: https://docs.ogc.org/is/18-062r2/18-062r2.html#_response_7 +.. |ogc-api-proc-exec-mode| replace:: *OGC API - Processes* - Execution Mode +.. _ogc-api-proc-exec-mode: https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execution_mode +.. |ogc-api-proc-exec-responses-sync| replace:: *OGC API - Processes* - Execution Responses (sync) +.. _ogc-api-proc-exec-responses-sync: https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execute_response +.. |ogc-api-proc-exec-responses-async| replace:: *OGC API - Processes* - Execution Responses (async) +.. _ogc-api-proc-exec-responses-async: https://docs.ogc.org/is/18-062r2/18-062r2.html#_response_7 +.. |ogc-api-proc-exec-outputs| replace:: *OGC API - Processes* - Execution Outputs +.. _ogc-api-proc-exec-outputs: https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_process_outputs .. |ogc-ets-weaver-impl-ref| replace:: Weaver Product Implementation .. _ogc-ets-weaver-impl-ref: https://www.ogc.org/resources/product-details/?pid=1767 .. |ogc-api-proc-echo| replace:: *OGC API - Processes* - Part 1: Core - Echo Process diff --git a/weaver/utils.py b/weaver/utils.py index 9a74ea0bb..56ab84be6 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -959,7 +959,7 @@ def as_version_major_minor_patch(version): def as_version_major_minor_patch(version, version_format=VersionFormat.PARTS): # type: (Optional[AnyVersion], VersionFormat) -> AnyVersion """ - Generates a ``MAJOR.MINOR.PATCH`` version with padded with zeros for any missing parts. + Generates a ``MAJOR.MINOR.PATCH`` version with padded zeros for any missing parts. """ if isinstance(version, (str, float, int)): ver_parts = list(Version(str(version)).version) From bc3c9248d67013360f2a57ffb542f1f77c9100b8 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 11 Sep 2024 15:15:45 -0400 Subject: [PATCH 02/75] [wip] update docs job exec and responses --- docs/source/configuration.rst | 2 +- docs/source/package.rst | 24 ++-- docs/source/processes.rst | 223 ++++++++++++++++++++++++++-------- 3 files changed, 184 insertions(+), 65 deletions(-) diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index 9f818b2cb..baad77a2d 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -325,7 +325,7 @@ they are optional and which default value or operation is applied in each situat default. Explicit response media-types can be requested in both cases using either an explicit ``Accept`` header of the desired media-type, or their corresponding ``f`` query format. | - | This option is Only applicable when |weaver-wps-restapi-html|_ is enabled. Otherwise, :term:`JSON` responses are + | This option is only applicable when |weaver-wps-restapi-html|_ is enabled. Otherwise, :term:`JSON` responses are always employed by default. .. versionadded:: 5.7.0 diff --git a/docs/source/package.rst b/docs/source/package.rst index 8f87198ce..d8d8e6383 100644 --- a/docs/source/package.rst +++ b/docs/source/package.rst @@ -2,6 +2,19 @@ .. _package: .. _application-package: +.. shortcuts for visualization + +.. |br| raw:: html + +
+ +.. |na| replace:: *n/a* + +.. |nbsp| unicode:: 0xA0 + :trim: + +.. |<=>| unicode:: 0x21D4 + ************************* Application Package ************************* @@ -1188,17 +1201,6 @@ Below is a list of compatible elements. .. [#cwl_schemaorg] See example: `cwl-metadata`_ -.. |br| raw:: html - -
- -.. |na| replace:: *n/a* - -.. |nbsp| unicode:: 0xA0 - :trim: - -.. |<=>| unicode:: 0x21D4 - .. _app_pkg_secret_parameters: Using Secret Parameters diff --git a/docs/source/processes.rst b/docs/source/processes.rst index ffd42cc1c..bfd9f0f51 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -1,6 +1,23 @@ .. include:: references.rst .. _processes: +.. shortcuts for visualization + +.. |br| raw:: html + +
+ +.. |any| replace:: ** + +.. |none| replace:: ** + +.. |na| replace:: *n/a* + +.. |nbsp| unicode:: 0xA0 + :trim: + +.. |<=>| unicode:: 0x21D4 + ********** Processes ********** @@ -630,7 +647,7 @@ better illustrate where each of the mentioned parameters in following section ar for all applicable features. .. seealso:: - - :ref:`proc_exec_body`, :ref:`proc_exec_mode` and :ref:`proc_exec_response` sections provide details + - :ref:`proc_exec_body`, :ref:`proc_exec_mode` and :ref:`proc_exec_results` sections provide details applicable to `Weaver`, which align with :term:`OGC API - Processes`, but that can also support additional capabilities. - |ogc-api-proc-exec-outputs|_ offers general details on ``transmissionMode`` parameter of requested outputs. @@ -667,7 +684,7 @@ multiple protocols are supported. See later section :ref:`File Reference Types` The ``outputs`` section defines, for each ``id`` available from the :term:`Process` definition, how to report the produced outputs from a successful :term:`Job` execution. The method under which each output will -be returned depends on the negotiated :ref:`proc_exec_mode` and :ref:`proc_exec_response`. +be returned depends on the negotiated :ref:`proc_exec_mode` and :ref:`proc_exec_results`. When an output corresponds to a file produced by the :term:`Application Package`, and stored locally, the result will typically (unless requested otherwise), be exposed externally using the returned reference :term:`URL`. @@ -695,7 +712,7 @@ omit this output from the produced results. https://github.com/crim-ca/weaver/issues/380 Other parameters presented in the above examples, namely ``mode`` and ``response`` are further detailed in -the following :ref:`proc_exec_mode` and :ref:`proc_exec_response` sections. +the following :ref:`proc_exec_mode` and :ref:`proc_exec_results` sections. .. _proc_exec_mode: @@ -775,30 +792,102 @@ the result(s) as outputs would not be generated by the incomplete :term:`Job`. For any of the execution combinations, it is always possible to obtain :term:`Job` outputs, along with logs, exceptions and other details using the :ref:`proc_op_result` endpoints. -.. _proc_exec_response: +.. _proc_exec_results: -Execution Response +Execution Results ~~~~~~~~~~~~~~~~~~~~~~~~~ When requesting a :term:`Job` execution, the structure under which the :term:`Process` results are returned can be adjusted using the ``Prefer`` header with the ``return`` parameter. More precisely, the ``Prefer: return=minimal`` and ``Prefer: return=representation`` definitions can be used to control whether the resulting ``outputs`` would be -provided by +provided using link references, or directly using their raw data representation. This behavior is described by the +:term:`OGC API - Processes` (v2.0) standard revision. + +The previous :term:`OGC API - Processes` (v1.0) standard revision instead made use of a combination of the ``response`` +and ``transmissionMode`` parameters in the execution request body, as previously shown in table :ref:`table-exec-body`. + +In general, both approaches can be used interchangeably, but some combinations are not directly portable. +Whenever possible, it is recommended to employ the ``Prefer`` header that should provide higher interoperability +with latest service implementations using the same standard. However, given that ``transmissionMode`` and ``response`` +fields can allow more flexibility and strict control regarding how data is returned is specific edge cases, in contrast +to the ``Prefer`` header approach, both approaches remain available in `Weaver`. + +.. seealso:: + See the `opengeospatial/ogcapi-processes#412 `_ + discussions for more details about each approach, their considerations, and potential side-effects. + +Following is a detailed listing of the expected response structure according to requested parameters. + +.. table:: Expected *Execution Results* according to *Requested Parameters* + :name: table-exec-resp + :align: center + + +--------------------+------------------------------+-----------+-----------------------------------------------+ + | |oap| v2.0 | |oap| v1.0 | # outputs | Results | + +--------------------+--------------+---------------+ [#n_out]_ | | + | ``Prefer: return`` | ``response`` | |out-mode| | | | + | header | |body-param| | |body-param| | | | + +====================+==============+===============+===========+===============================================+ + | |none| | |none| | |none| | 1 | [#res-auto]_ | + +--------------------+--------------+---------------+-----------+-----------------------------------------------+ + | |none| | ``document`` | |none| | 1 | [#res-auto]_ | + +--------------------+--------------+---------------+-----------+-----------------------------------------------+ + | |none| | ``document`` | ``value`` | 1 | Results as :term:`JSON`, but each | + +--------------------+--------------+---------------+-----------+-----------------------------------------------+ + | |none| | ``document`` | ``reference`` | 1 | Auto | + +--------------------+--------------+---------------+-----------+-----------------------------------------------+ + | |none| | ``raw`` | |none| | 1 | |res-raw| | + +--------------------+--------------+---------------+-----------+-----------------------------------------------+ + +.. |oap| replace:: :term:`OGC API - Processes` +.. |body-param| replace:: body parameter +.. |out-mode| replace:: ``transmissionMode`` +.. |res-auto| replace:: Auto. Resolves as if ``response=document`` and ``transmissionMode=reference`` +.. |res-raw| replace:: + Results are returned in their raw data representation, whether they represent + + +.. warning:: + It is important not to confuse expected *Results* above with *Responses*. + + The actual HTTP *Response* returned from the execution endpoint will depend on the requested :ref:`proc_exec_mode`. + A :term:`Job` resolved with `synchronous` execution will return the *Results* shown in the table *directly*, whereas + an `asynchronous` execution will *always* return a :term:`JSON` :ref:`Job Status ` *Response*. + In this case, a subsequent :ref:`Results Request ` following the successful :term:`Job` completion + is needed to obtain the *Results* presented in the table. Note that a `synchronous` execution can also + make use of the :ref:`Results ` operations at a later time to obtain :term:`Job` information. .. note:: - The previous :term:`OGC API - Processes` standard revision instead made use of the ``response`` parameter - in the execution request body, as shown in the table :ref:`table-exec-body`. In general, the ``return=minimal`` - representation + Typically, clients should **NOT** use ``Prefer`` header and ``response``/``transmissionMode`` body parameters + simultaneously, since they should be interchangeable in most situations. The table indicates both variations to + illustrate which combinations lead to the same result. If a client happens to use both combination simultaneously, + the body parameters will take precedence over the ``Prefer`` header, except for cases where ``transmissionMode`` + would be omitted for specific ``outputs`` entries. +.. note:: + Combinations using |none| indicate that the parameter is omitted entirely from the request. -.. fixme: requested ``transmissionMode`` parameter (``value``/``reference``), +.. rubric:: Footnotes -.. fixme: The other ``transmissionMode: value`` would instead return - the contents directly in the response rather than the :term:`URL`. +.. fixme: +.. [#n_out] + Corresponds to the number of ``outputs`` *requested* in the :ref:`proc_exec_body`. -.. fixme: Using ``Prefer: return=representation`` ... +.. fixme: distinguish omitted 'outputs' (ie default "all") vs '{}' no outputs (empty contents) +.. todo:: update description, and add example to the table + The |empty|, meaning that *no outputs were explicitly requested*, definition must be distinguished from the empty :term:`JSON`` ``{}`` + +.. fixme: +.. [#res-auto] + sss -.. fixme: reword below with ``Prefer`` + +.. fixme: requested ``transmissionMode`` parameter (``value``/``reference``), + +.. fixme: + reword below, above table results identical for Prefer/mode sync/async, + except that returned directly for sync, and via results endpoint in async + describe that sync can still access results afterward, as if async was used The ``response`` parameter defines how to return the results produced by the :term:`Process`. When ``response=document``, regardless of ``mode=async`` or ``mode=sync``, and regardless of requested @@ -812,17 +901,6 @@ accordingly to the ``Accept`` header and available ``Content-Type`` of the outpu by the :term:`Process`. For more details regarding those combination, the official |ogc-api-proc-exec-responses-sync|_ and |ogc-api-proc-exec-responses-async|_ should be employed as reference. -.. note:: - The ``transmissionMode`` and ``response`` fields are part of the older methodology that precedes - the latest :term:`OGC API - Processes` standard revision using the ``Prefer`` header. - - Whenever possible, it is recommended to employ the ``Prefer`` header that should provide higher interoperability - with other services using the same standard. However, given that ``transmissionMode`` and ``response`` fields - can allow more flexibility and strict control regarding how data is returned is specific edge cases, in contrast - to the ``Prefer`` header approach, they remain available in `Weaver`. - - See the `opengeospatial/ogcapi-processes#412 `_ - discussions for more details about each approach, their considerations, and potential side-effects. .. _proc_exec_steps: @@ -845,13 +923,6 @@ indicate its completion, notably the completed percentage, time it finished exec moment, the requests for retrieving either error details or produced outputs become accessible. Examples are presented in :ref:`Result ` section. - -Process Operations -~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. todo:: detail 'operations' accomplished (stage-in, exec-cwl, stage-out) - - .. _proc_workflow_ops: Workflow Step Operations @@ -1059,7 +1130,6 @@ combinations. | | *Note*: |HYBRID| assumes |EMS| role | |vault_ref| | | +-----------+------------------------------------------+---------------+-------------------------------------------+ -.. |any| replace:: ** .. |cfg| replace:: Configuration .. |os_scheme| replace:: ``opensearchfile://`` .. |http_scheme| replace:: ``http(s)://`` @@ -1070,10 +1140,6 @@ combinations. .. |EMS| replace:: :term:`EMS` .. |HYBRID| replace:: :term:`HYBRID` -.. |br| raw:: html - -
- .. rubric:: Footnotes .. [#openseach] @@ -1120,9 +1186,6 @@ combinations. .. todo:: method to indicate explicit fetch to override these? (https://github.com/crim-ca/weaver/issues/183) -.. todo:: - add tests that validate each combination of operation - .. _file_reference_names: File Reference Names @@ -1637,6 +1700,11 @@ Multiple Inputs Multiple Outputs ~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. warning:: + In this section, *Multiple Outputs* refer to multiple value or reference *items* under a single ``{outputID}``. + This is not to be confused by a :term:`Process` which as multiple and distinct ``{outputID}`` under its ``outputs`` + definition, which is supported by all :term:`CWL`, :term:`WPS` and :term:`OGC API - Processes` representations. + Although :term:`CWL` allows output ``type: array``, :term:`WPS` does not support it directly. According to :term:`WPS` specification, only a single value is allowed under each corresponding outputs ID. Adding more than one ```` or ```` definition causes undefined behavior. @@ -1666,9 +1734,9 @@ To work around this limitation, there are two potential solutions. .. versionadded:: 5.5 This method relies on encoding the resulting :term:`CWL` ``array`` output into its corresponding ``string`` - representation, and transforms the :term:`WPS` output into a ``ComplexData`` containing this :term:`JSON` string + representation, and transforms the :term:`WPS` output into a ``ComplexData`` containing this :term:`JSON` "string" instead of a ``File``. When obtaining the result from the :term:`WPS` interface, the output will therefore be - represented as a single value to respect the specification. Once this output is retrieved with + represented as a single raw string value to respect the specification. Once this output is retrieved with the :term:`OGC API - Processes` interface, it will be automatically unpacked into its original :term:`JSON` ``array`` form for the HTTP response. From the point of view of a user interacting only with :term:`OGC API - Processes`, transition from :term:`CWL` and :term:`WPS` will be transparent. Users of the :term:`WPS` would need to perform a @@ -1676,8 +1744,8 @@ To work around this limitation, there are two potential solutions. To disambiguate from ``ComplexData`` that could be an actual single-value :term:`JSON` (i.e.: a `Process` returning any :term:`JSON`-like media-type, such as ``application/geo+json``), `Weaver` will employ the special - media-type ``application/raw+json`` to detect an embedded :term:`JSON` strategy to represent a :term:`CWL` ``array``. - Other :term:`JSON`-like media-types will remain unmodified. + media-type ``application/raw+json`` to detect this embedded :term:`JSON` strategy used to represent + the :term:`CWL` ``array``. Other :term:`JSON`-like media-types will remain unmodified. .. seealso:: - :ref:`Multiple and Optional Values` @@ -1802,9 +1870,14 @@ format is employed according to the chosen location. .. _proc_op_result: -Obtaining results, outputs, logs or errors +Obtaining job results, outputs, logs or errors --------------------------------------------------------------------- +.. _proc_op_job_outputs: + +Job Outputs +^^^^^^^^^^^^^^^^^^^^ + In the case of successful :term:`Job` execution, the *outputs* can be retrieved with |outputs-req|_ request to list each corresponding output ``id`` with the generated file reference URL. Keep in mind that the purpose of those URLs are only to fetch the results (not persistent storage), and could therefore be purged after some reasonable amount of time. @@ -1822,20 +1895,59 @@ parameters for the base :term:`WPS` output location: ] } -For the :term:`OGC` compliant endpoint, the |results-req| request can be employed instead. -In the event of a :term:`Job` executed with ``response=document``, the contents will be very similar. -On the other hand, a :term:`Job` submitted with ``response=raw`` can produce many alternative variations according -to :term:`OGC` requirements. For this reason, the *outputs* endpoint will always provide all data and file references -in the response body as :term:`Job`, no matter the original ``response`` format. The *outputs* endpoint can also +In the event of a :term:`Job` executed with ``response=document`` or ``Prefer: return=minimal``, the contents +of a :ref:`proc_op_job_results` be very similar to the above :term:`JSON` contents, but using the ``{outputID}`` +mapping representation instead. + +On the other hand, a :term:`Job` submitted with ``response=raw`` or ``Prefer: return=representation`` can produce +many alternative variations according to :term:`OGC` requirements, the number of ``outputs`` the :term:`Process` +supports, and the respective :term:`Media-Type`, schema or literal data of each output. For this reason, +the :ref:`proc_op_job_outputs` endpoint will always provide all data and file references in the response body +as the above :term:`JSON`, no matter which :ref:`proc_exec_results` parameters where originally submitted. + +The *outputs* endpoint can also receive additional query parameters, such as ``schema``, to return contents formatted similarly to *results*, but enforcing a :term:`JSON` body as if ``response=document`` was specified during submission of the :term:`Process` execution. +.. _proc_op_job_results: + +Job Results +^^^^^^^^^^^^^^^^^^^^ + +This corresponds to the :term:`OGC API - Processes` compliant endpoint, using the |results-req| request. + +In the event of a :term:`Job` executed with ``response=document`` or ``Prefer: return=minimal``, the contents +will be very similar to the following :term:`JSON` contents. + +.. fixme: +.. todo:: add job results JSON example +.. todo:: cross-reference :ref:`proc_exec_results` + + + +.. _proc_op_job_inputs: + +Job Inputs +^^^^^^^^^^^^^^^^^^^^ + In order to better understand the parameters that where submitted during :term:`Job` creation, the |inputs-req|_ -can be employed. This will return both the data and reference inputs that were submitted, as well as -the *requested outputs* to retrieve any relevant ``transmissionMode`` definition. +can be employed. This will return both the data and reference ``inputs`` that were submitted, as well as +the *requested* ``outputs`` to retrieve any relevant ``transmissionMode``, ``format``, etc. parameters +that where specified during submission of the :ref:`proc_exec_body`. + + +.. fixme: +.. todo:: add job inputs JSON example + -In situations where the :term:`Job` resulted into ``failed`` status, the |except-req|_ can be use to retrieve +.. _proc_op_job_error: +.. _proc_op_job_exceptions: + +Job Exceptions +^^^^^^^^^^^^^^^^^^^^ + +In situations where the :term:`Job` resulted into ``failed`` status, the |except-req|_ can be used to retrieve the potential cause of failure, by capturing any raised exception. Below is an example of such exception details. .. code-block:: json @@ -1847,6 +1959,11 @@ the potential cause of failure, by capturing any raised exception. Below is an e The returned exception are often better understood when compared against, or in conjunction with, the logs that provide details over each step of the operation. +.. _proc_op_job_logs: + +Job Logs +^^^^^^^^^^^^^^^^^^^^ + Any :term:`Job` executed by `Weaver` will provide minimal information log, such as operation setup, the moment when it started execution and latest status. The extent of other log entries will more often than not depend on the verbosity of the underlying process being executed. When executing an :ref:`Application Package`, `Weaver` tries as From 06fb1cb27196613ba996d96a355a5fdac84fc22d Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 11 Sep 2024 20:12:00 -0400 Subject: [PATCH 03/75] [Wip] more docs updates and examples for job results --- docs/examples/job_outputs_listing.json | 13 ++++ docs/examples/job_outputs_mapping.json | 9 +++ docs/examples/job_results_document.json | 7 ++ docs/examples/job_results_raw_multi.http | 15 ++++ docs/examples/job_results_raw_single.http | 5 ++ docs/source/processes.rst | 86 ++++++++++++++++------- 6 files changed, 110 insertions(+), 25 deletions(-) create mode 100644 docs/examples/job_outputs_listing.json create mode 100644 docs/examples/job_outputs_mapping.json create mode 100644 docs/examples/job_results_document.json create mode 100644 docs/examples/job_results_raw_multi.http create mode 100644 docs/examples/job_results_raw_single.http diff --git a/docs/examples/job_outputs_listing.json b/docs/examples/job_outputs_listing.json new file mode 100644 index 000000000..e25123e9e --- /dev/null +++ b/docs/examples/job_outputs_listing.json @@ -0,0 +1,13 @@ +{ + "outputs": [ + { + "id": "output-file", + "href": "https://example.com/wpsoutputs/f93a15be-6e16-11ea-b667-08002752172a/output_netcdf.nc", + "type": "application/x-netcdf" + }, + { + "id": "output-data", + "value": 3.1416 + } + ] +} diff --git a/docs/examples/job_outputs_mapping.json b/docs/examples/job_outputs_mapping.json new file mode 100644 index 000000000..880703829 --- /dev/null +++ b/docs/examples/job_outputs_mapping.json @@ -0,0 +1,9 @@ +{ + "outputs": { + "output-file": { + "href": "https://example.com/wpsoutputs/f93a15be-6e16-11ea-b667-08002752172a/output_netcdf.nc", + "type": "application/x-netcdf" + }, + "output-data": 3.1416 + } +} diff --git a/docs/examples/job_results_document.json b/docs/examples/job_results_document.json new file mode 100644 index 000000000..be8c6bcf5 --- /dev/null +++ b/docs/examples/job_results_document.json @@ -0,0 +1,7 @@ +{ + "output-file": { + "href": "https://example.com/wpsoutputs/f93a15be-6e16-11ea-b667-08002752172a/output_netcdf.nc", + "type": "application/x-netcdf" + }, + "output-data": 3.1416 +} diff --git a/docs/examples/job_results_raw_multi.http b/docs/examples/job_results_raw_multi.http new file mode 100644 index 000000000..50538d3c4 --- /dev/null +++ b/docs/examples/job_results_raw_multi.http @@ -0,0 +1,15 @@ +HTTP/1.1 200 OK +Host: weaver.example.com +Content-Type: multipart/form-data; boundary=43003e2f205a180ace9cd34d98f911ff + +--43003e2f205a180ace9cd34d98f911ff +Content-ID: output-file +Content-Location: https://example.com/wpsoutputs/f93a15be-6e16-11ea-b667-08002752172a/output_netcdf.nc +Content-Type: application/x-netcdf + +--43003e2f205a180ace9cd34d98f911ff +Content-ID: output-data +Content-Type: text/plain + +3.1416 +--43003e2f205a180ace9cd34d98f911ff-- diff --git a/docs/examples/job_results_raw_single.http b/docs/examples/job_results_raw_single.http new file mode 100644 index 000000000..74a8ea20d --- /dev/null +++ b/docs/examples/job_results_raw_single.http @@ -0,0 +1,5 @@ +HTTP/1.1 200 OK +Host: weaver.example.com +Content-Type: application/x-netcdf + + diff --git a/docs/source/processes.rst b/docs/source/processes.rst index bfd9f0f51..3dbf5ceed 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -1884,31 +1884,35 @@ only to fetch the results (not persistent storage), and could therefore be purge The format should be similar to the following example, with minor variations according to :ref:`Configuration` parameters for the base :term:`WPS` output location: -.. code-block:: json - - { - "outputs": [ - { - "id": "output", - "href": "{WEAVER_URL}/wpsoutputs/f93a15be-6e16-11ea-b667-08002752172a/output_netcdf.nc" - } - ] - } +.. literalinclude:: ../examples/job_outputs_listing.json + :language: json In the event of a :term:`Job` executed with ``response=document`` or ``Prefer: return=minimal``, the contents -of a :ref:`proc_op_job_results` be very similar to the above :term:`JSON` contents, but using the ``{outputID}`` -mapping representation instead. +of a :ref:`proc_op_job_results` will be very similar to the above :term:`JSON` contents, but using the ``{outputID}`` +mapping representation directly returned, instead of listing them as "output items" under ``outputs``. On the other hand, a :term:`Job` submitted with ``response=raw`` or ``Prefer: return=representation`` can produce -many alternative variations according to :term:`OGC` requirements, the number of ``outputs`` the :term:`Process` -supports, and the respective :term:`Media-Type`, schema or literal data of each output. For this reason, +many alternative content variations according to :term:`OGC` requirements, the number of requested ``outputs``, +and the respective :term:`Media-Type`, schema or literal data of each output. For this reason, the :ref:`proc_op_job_outputs` endpoint will always provide all data and file references in the response body -as the above :term:`JSON`, no matter which :ref:`proc_exec_results` parameters where originally submitted. - -The *outputs* endpoint can also -receive additional query parameters, such as ``schema``, to return contents formatted similarly to *results*, but -enforcing a :term:`JSON` body as if ``response=document`` was specified during submission of the :term:`Process` -execution. +as represented by the above :term:`JSON`, no matter which :ref:`proc_exec_results` parameters where originally +submitted. In other words, the contents of the "``output_netcdf.nc``" file will never be directly returned as +response when using the :ref:`proc_op_job_outputs` endpoint, and will always use the ``document``/``minimal`` links. + +Furthermore, because this response nests the items under ``outputs``, other information can be returned, +such as relevant ``links`` +with references to :ref:`proc_op_job_inputs`, :ref:`proc_op_job_logs`, :ref:`Job Status `, +or the source :ref:`Process Description ` that produced returned :term:`Job` outputs. + +The :ref:`proc_op_job_outputs` endpoint can also receive additional query parameters, +such as ``schema=OGC+strict``, which +allows it to return contents formatted slightly differently, to imitate the :term:`JSON` mapping representation +(rather than the array) used by the :ref:`proc_exec_results` endpoint as if ``response=document`` was specified +during submission of the :term:`Process` execution. However, this :term:`JSON` mapping will still employ a +nested ``outputs`` property, as presented below, in order to allow additional ``links`` information. + +.. literalinclude:: ../examples/job_outputs_mapping.json + :language: json .. _proc_op_job_results: @@ -1916,15 +1920,47 @@ Job Results ^^^^^^^^^^^^^^^^^^^^ This corresponds to the :term:`OGC API - Processes` compliant endpoint, using the |results-req| request. +Contrary to :ref:`proc_op_job_outputs`, where the :term:`JSON` representation is always enforced, this endpoint +will respond according to the submitted :term:`Job` parameters, as described in :ref:`proc_exec_results`. -In the event of a :term:`Job` executed with ``response=document`` or ``Prefer: return=minimal``, the contents -will be very similar to the following :term:`JSON` contents. +In the event of a :term:`Job` executed with ``response=document`` or ``Prefer: return=minimal`` with multiple outputs, +the contents will typically be a :term:`JSON` mapping representation, where each *requested* ``{outputID}`` can be +found either as ``value`` or ``reference``, accordingly to how they were requested or resolved according +to :ref:`proc_exec_results`. An example of such results is presented below. -.. fixme: -.. todo:: add job results JSON example -.. todo:: cross-reference :ref:`proc_exec_results` +.. literalinclude:: ../examples/job_results_document.json + :language: json + +.. note:: + The ``{outputID}`` are returned at the root of the contents using this representation, + contrary to the :ref:`proc_op_job_outputs` endpoint that nests them under ``outputs``. + +When a :term:`Job` is executed with ``response=raw``, or when the *requested* ``outputs``[#n_out]_ consisted only of +a single ``{outputID}``, the returned data will directly +be the contents of the produced file, or literal value, as applicable according to the ``schema`` definition of the +corresponding output in the :ref:`Process Description `. For example, a single-output results +could be returned in the following response. + +.. literalinclude:: ../examples/job_results_raw_single.http + :caption: Example of a single output returned directly (``raw``) with ``representation`` preference + :language: http + +When the number of *requested* ``outputs``[#n_out]_ is more than one, the response will either be +multipart contents or similar to the above ``document`` :term:`JSON` structure, accordingly to the +negotiated ``Content-Type``. An example of a multipart representation is shown below. + +.. literalinclude:: ../examples/job_results_raw_multi.http + :caption: Example of a multiple outputs returned directly (``raw``) with ``minimal`` preference + :language: http +Note that, in the above response, the ``Content-Location`` is used for the ``output-file``, whereas the data +is directly returned for the ``output-data``. This is based on `Weaver` auto-resolving ``transmissionMode: reference`` +for a :ref:`File Reference ` result, while using ``transmissionMode: value`` by default for literal +data types. This is equivalent to requesting the :term:`Job` execution with ``Prefer: return=minimal``. +If the ``transmissionMode: value`` under ``output-file`` in the *requested* ``outputs``[#n_out]_ +or ``Prefer: return=representation`` were used, the data of the file would be directly included in the +response instead of using ``Content-Location``. .. _proc_op_job_inputs: From a31d3443f3289c2bfb1d71095129218519be5ea6 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 12 Sep 2024 14:55:10 -0400 Subject: [PATCH 04/75] [wip] job requests outputs --- docs/source/processes.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 3dbf5ceed..eba412b4f 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -832,7 +832,7 @@ Following is a detailed listing of the expected response structure according to +--------------------+--------------+---------------+-----------+-----------------------------------------------+ | |none| | ``document`` | |none| | 1 | [#res-auto]_ | +--------------------+--------------+---------------+-----------+-----------------------------------------------+ - | |none| | ``document`` | ``value`` | 1 | Results as :term:`JSON`, but each | + | |none| | ``document`` | ``value`` | 1 | Results as :term:`JSON`, but each | +--------------------+--------------+---------------+-----------+-----------------------------------------------+ | |none| | ``document`` | ``reference`` | 1 | Auto | +--------------------+--------------+---------------+-----------+-----------------------------------------------+ @@ -869,9 +869,11 @@ Following is a detailed listing of the expected response structure according to .. rubric:: Footnotes -.. fixme: .. [#n_out] Corresponds to the number of ``outputs`` *requested* in the :ref:`proc_exec_body`. + Note that omitting ``outputs`` (i.e.: indicated by |none| in the table) is equivalent to requesting *all* outputs. + To request "*no outputs at all*" (if it makes sense for :term:`Process` to do so), the empty mapping ``outputs: {}`` + should be submitted explicitly. See table :ref:`table-exec-body` for an example requesting specific outputs. .. fixme: distinguish omitted 'outputs' (ie default "all") vs '{}' no outputs (empty contents) .. todo:: update description, and add example to the table @@ -1967,7 +1969,7 @@ response instead of using ``Content-Location``. Job Inputs ^^^^^^^^^^^^^^^^^^^^ -In order to better understand the parameters that where submitted during :term:`Job` creation, the |inputs-req|_ +In order to better understand the parameters that were submitted during :term:`Job` creation, the |inputs-req|_ can be employed. This will return both the data and reference ``inputs`` that were submitted, as well as the *requested* ``outputs`` to retrieve any relevant ``transmissionMode``, ``format``, etc. parameters that where specified during submission of the :ref:`proc_exec_body`. From e42c0a7732bf992082609c84671371e51ef20feb Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 13 Sep 2024 01:24:20 -0400 Subject: [PATCH 05/75] [wip] more docs updates for job exec results --- docs/_static/custom.css | 29 +- ...json => job_results_document_minimal.json} | 0 docs/examples/job_results_raw_multi.http | 6 +- ....http => job_results_raw_single_data.http} | 0 docs/examples/job_results_raw_single_ref.http | 5 + docs/source/appendix.rst | 4 + docs/source/conf.py | 4 +- docs/source/configuration.rst | 11 +- docs/source/package.rst | 24 +- docs/source/processes.rst | 259 ++++++++++++------ docs/source/references.rst | 4 - 11 files changed, 231 insertions(+), 115 deletions(-) rename docs/examples/{job_results_document.json => job_results_document_minimal.json} (100%) rename docs/examples/{job_results_raw_single.http => job_results_raw_single_data.http} (100%) create mode 100644 docs/examples/job_results_raw_single_ref.http diff --git a/docs/_static/custom.css b/docs/_static/custom.css index e497b5c13..f4e109c2c 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -3,10 +3,10 @@ max-width: none; } -/* force code-table to align their cells to top (align code line numbers between columns) +/* force table-code to align their cells to top (align code line numbers between columns) note: class attribute must be applied to match this specific type of table */ -.code-table tbody tr td { +.table-code tbody tr td { vertical-align: top !important; /* max-width: min-content; */ @@ -21,11 +21,34 @@ div[class^="highlight"] { max-width: min-content; } -.code-table tbody tr td div { +.table-code tbody tr td div { max-width: none !important; } */ +.table-exec-results thead, +.table-exec-results tbody { + vertical-align: top !important; +} + +.table-exec-results thead { + background-color: #cccccc; +} + +.table-exec-results thead tr:nth-child(1) > th:nth-child(1), +.table-exec-results thead tr:nth-child(1) > th:nth-child(2) { + border-bottom-color: #777777 !important; +} + +.table-exec-results tr:nth-child(1) > th:nth-child(2), +.table-exec-results tr:nth-child(1) > th:nth-child(3), +.table-exec-results tr:nth-child(2) > th:nth-child(2), +.table-exec-results td:nth-child(2), +.table-exec-results td:nth-child(4) { + border-left-color: #777777 !important; + border-left-width: medium !important; +} + /* override table width restrictions avoids cells trying to fit all their text single line with a slider instead, text will wrap according to specified :widths: specifications diff --git a/docs/examples/job_results_document.json b/docs/examples/job_results_document_minimal.json similarity index 100% rename from docs/examples/job_results_document.json rename to docs/examples/job_results_document_minimal.json diff --git a/docs/examples/job_results_raw_multi.http b/docs/examples/job_results_raw_multi.http index 50538d3c4..34a807649 100644 --- a/docs/examples/job_results_raw_multi.http +++ b/docs/examples/job_results_raw_multi.http @@ -1,15 +1,15 @@ HTTP/1.1 200 OK Host: weaver.example.com -Content-Type: multipart/form-data; boundary=43003e2f205a180ace9cd34d98f911ff +Content-Type: multipart/related; boundary=43003e2f205a180ace9cd34d98f911ff; type=application/x-netcdf --43003e2f205a180ace9cd34d98f911ff +Content-Type: application/x-netcdf Content-ID: output-file Content-Location: https://example.com/wpsoutputs/f93a15be-6e16-11ea-b667-08002752172a/output_netcdf.nc -Content-Type: application/x-netcdf --43003e2f205a180ace9cd34d98f911ff -Content-ID: output-data Content-Type: text/plain +Content-ID: output-data 3.1416 --43003e2f205a180ace9cd34d98f911ff-- diff --git a/docs/examples/job_results_raw_single.http b/docs/examples/job_results_raw_single_data.http similarity index 100% rename from docs/examples/job_results_raw_single.http rename to docs/examples/job_results_raw_single_data.http diff --git a/docs/examples/job_results_raw_single_ref.http b/docs/examples/job_results_raw_single_ref.http new file mode 100644 index 000000000..f6ae1da40 --- /dev/null +++ b/docs/examples/job_results_raw_single_ref.http @@ -0,0 +1,5 @@ +HTTP/1.1 200 OK +Host: weaver.example.com +Content-Type: application/x-netcdf +Content-Length: 0 +Content-Location: https://example.com/wpsoutputs/f93a15be-6e16-11ea-b667-08002752172a/output_netcdf.nc diff --git a/docs/source/appendix.rst b/docs/source/appendix.rst index 32995c0a8..de18c19da 100644 --- a/docs/source/appendix.rst +++ b/docs/source/appendix.rst @@ -205,6 +205,10 @@ Glossary More recent `Media-Type` naming is employed for the general use of ``Content-Type`` data representation in multiple situations and contexts. + .. seealso:: + - |iana-link|_ + - |edam-link|_ + OAS OpenAPI OpenAPI Specification (`OAS`) defines a standard, programming language-agnostic interface description for diff --git a/docs/source/conf.py b/docs/source/conf.py index 36ecc793f..bbb7cea76 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -245,7 +245,7 @@ def doc_redirect_include(file_path): # The name of an image file (relative to this directory) to place at the top # of the sidebar. -# html_logo = None +html_logo = "../_static/crim.png" # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 @@ -313,7 +313,7 @@ def doc_redirect_include(file_path): # Sphinx supports the following languages: # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' -# html_search_language = 'en' +html_search_language = "en" # A dictionary with options for the search language support, empty by default. # Now only 'ja' uses this config value diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index baad77a2d..cd3c97944 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -404,7 +404,6 @@ they are optional and which default value or operation is applied in each situat .. versionadded:: 4.15 .. versionchanged:: 4.34 -.. |weaver-execute-sync-max-wait| replace:: ``weaver.execute_sync_max_wait`` .. _weaver-execute-sync-max-wait: - | ``weaver.execute_sync_max_wait = `` [:class:`int`, seconds] @@ -696,7 +695,7 @@ and all corresponding functionalities, including `API` endpoints, will be disabl - | ``weaver.quotation_docker_image = `` [:class:`str`] | - | Specifies the :term:`Docker` image used as |quote-estimator|_ to evaluate a :term:`Quote` + | Specifies the :term:`Docker` image used for :ref:`quote_estimation` to evaluate a :term:`Quote` for the eventual :term:`Process` execution. | | Required if ``weaver.quotation`` is enabled. @@ -709,7 +708,7 @@ and all corresponding functionalities, including `API` endpoints, will be disabl - | ``weaver.quotation_docker_username = `` [:class:`str`] | - | Username to employ for authentication when retrieving the :term:`Docker` image used as |quote-estimator|_. + | Username to employ for authentication when retrieving the :term:`Docker` image used as :ref:`quote_estimation`. | | Only required if the :term:`Docker` image is not accessible publicly or already provided through some other means when requested by the :term:`Docker` daemon. @@ -723,7 +722,7 @@ and all corresponding functionalities, including `API` endpoints, will be disabl - | ``weaver.quotation_docker_password = `` [:class:`str`] | - | Password to employ for authentication when retrieving the :term:`Docker` image used as |quote-estimator|_. + | Password to employ for authentication when retrieving the :term:`Docker` image used as :ref:`quote_estimation`. | | Only required if the :term:`Docker` image is not accessible publicly or already provided through some other means when requested by the :term:`Docker` daemon. @@ -740,7 +739,7 @@ and all corresponding functionalities, including `API` endpoints, will be disabl | | Currency code in `ISO-4217 `_ format used by default. | - | It is up to the specified |quote-estimator|_ algorithm defined by ``weaver.quotation_docker_image`` and + | It is up to the specified :ref:`quote_estimation` algorithm defined by ``weaver.quotation_docker_image`` and employed by the various :term:`Process` to ensure that the returned :ref:`quote_estimation` cost makes sense according to the specified default currency. | @@ -804,7 +803,7 @@ and all corresponding functionalities, including `API` endpoints, will be disabl - | ``weaver.quotation_currency_token = `` [:class:`str`] | - | Password to employ for authentication when retrieving the :term:`Docker` image used as |quote-estimator|_. + | Password to employ for authentication when retrieving the :term:`Docker` image used as :ref:`quote_estimation`. | | Only required if the :term:`Docker` image is not accessible publicly or already provided through some other means when requested by the :term:`Docker` daemon. diff --git a/docs/source/package.rst b/docs/source/package.rst index 32563b26b..6aa34db0a 100644 --- a/docs/source/package.rst +++ b/docs/source/package.rst @@ -431,8 +431,8 @@ CWL Workflow ------------------------ `Weaver` also supports :term:`CWL` ``class: Workflow``. When an :term:`Application Package` is defined this way, the -|process-deploy-op|_ will attempt to resolve each ``step`` as another process. The reference to the :term:`CWL` -definition can be placed in any location supported as for the case of atomic processes +:ref:`Process Deployment ` operation will attempt to resolve each ``step`` as another :term:`Process`. +The reference to the :term:`CWL` definition can be placed in any location supported as for the case of atomic processes (see details about :ref:`supported package locations `). The following :term:`CWL` definition demonstrates an example ``Workflow`` process that would resolve each ``step`` with @@ -570,7 +570,7 @@ same process definition after deployment. For simplification purpose, below exam Other fields are discussed afterward in specific sections. .. table:: - :class: code-table + :class: table-code :align: center +-----------------------------------+----------------------------------------+----------------------------------+ @@ -698,7 +698,7 @@ specific types will be presented in :ref:`cwl-type` and :ref:`cwl-dir` sections. | | | | handled as nested ``Files`` to stage. | +----------------------+-------------------------+------------------------+--------------------------------------------+ -.. rubric:: Footnotes +.. rubric:: Details .. [#note1] Resolution method according to critical fields defined in :ref:`cwl-type`. @@ -830,7 +830,7 @@ multiple ``File`` locations from ``s3://`` buckets to stage for :ref:`Process Ex The following ``Directory`` listing formats are supported. .. table:: - :class: code-table + :class: table-code :align: center :widths: 70,30 @@ -878,7 +878,7 @@ will be mapped against corresponding *namespaced* ``format`` of :term:`CWL`. Following is an example where input definitions are equivalent in both :term:`CWL` and :term:`WPS` contexts. .. table:: - :class: code-table + :class: table-code :align: center :widths: 50,50 @@ -977,7 +977,7 @@ inputs to a specific set of values. In :term:`CWL`, the same can be achieved usi the following two variants are equivalent and completely interchangeable. .. table:: - :class: code-table + :class: table-code :align: center :widths: 50,50 @@ -1044,7 +1044,7 @@ a given input. Some parts of the following definitions are purposely omitted to of *multiple* and *optional* information. .. table:: - :class: code-table + :class: table-code :align: center :widths: 50,50 @@ -1098,7 +1098,7 @@ Obviously, corresponding definitions can become more or less complicated with mu parameters presented later in this section. Some definitions are also not completely portable between contexts. .. table:: - :class: code-table + :class: table-code :align: center :widths: 33,34,33 @@ -1209,7 +1209,7 @@ Following is a sample representation of equivalent variants :term:`JSON` definit automatically expended using the ``oneOf`` structure with other missing components if applicable. .. table:: - :class: code-table + :class: table-code :align: center :widths: 50,50 @@ -1285,7 +1285,7 @@ following structures. If the ``contentMediaType`` happens to be :term:`JSON`, th schema can be added as well, as presented in :ref:`oas_json_types` section. .. table:: - :class: code-table + :class: table-code :align: center :widths: 50,50 @@ -1351,7 +1351,7 @@ Below is a list of compatible elements. | ``version`` | ``s:version``/``s:softwareVersion`` [#cwl_schemaorg]_ | +-----------------------------------------+----------------------------------------------------------+ -.. rubric:: Footnotes +.. rubric:: Details .. [#cwl_schemaorg] When using these properties, it is expected that the :term:`CWL` :term:`Application Package` resolves diff --git a/docs/source/processes.rst b/docs/source/processes.rst index eba412b4f..49c319936 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -509,22 +509,27 @@ will have to be available and respect the expected update level to be accepted a The applicable revision level depends on the contents being modified using submitted request body fields according to the following table. When a combination of the below items occur, the higher update level is required. -+-------------+-----------+------------------------------------+------------------------------------------------------+ -| HTTP Method | Level | Change | Examples | -+=============+===========+====================================+======================================================+ -| ``PATCH`` | ``PATCH`` | Modifications to metadata | - :term:`Process` ``description``, ``title`` strings | -| | | not impacting the :term:`Process` | - :term:`Process` ``keywords``, ``metadata`` lists | -| | | execution or definition. | - inputs/outputs ``description``, ``title`` strings | -| | | | - inputs/outputs ``keywords``, ``metadata`` lists | -+-------------+-----------+------------------------------------+------------------------------------------------------+ -| ``PATCH`` | ``MINOR`` | Modification that impacts *how* | - :term:`Process` ``jobControlOptions`` (async/sync) | -| | | the :term:`Process` could be | - :term:`Process` ``outputTransmission`` (ref/value) | -| | | executed, but not its definition. | - :term:`Process` ``visibility`` | -+-------------+-----------+------------------------------------+------------------------------------------------------+ -| ``PUT`` | ``MAJOR`` | Modification that impacts *what* | - Any :term:`Application Package` modification | -| | | the :term:`Process` executes. | - Any inputs/outputs change (formats, occurs, type) | -| | | | - Any inputs/outputs addition or removal | -+-------------+-----------+------------------------------------+------------------------------------------------------+ +.. table:: Process Semantic Version Level Resolution according to Applied Changes + :name: table-process-version + :align: center + + +-------------+-----------+---------------------------------+------------------------------------------------------+ + | HTTP Method | Level | Change | Examples | + +=============+===========+=================================+======================================================+ + | ``PATCH`` | ``PATCH`` | Modifications to metadata | - :term:`Process` ``description``, ``title`` strings | + | | | not impacting the | - :term:`Process` ``keywords``, ``metadata`` lists | + | | | :term:`Process` execution | - inputs/outputs ``description``, ``title`` strings | + | | | or definition. | - inputs/outputs ``keywords``, ``metadata`` lists | + +-------------+-----------+---------------------------------+------------------------------------------------------+ + | ``PATCH`` | ``MINOR`` | Modification that impacts *how* | - :term:`Process` ``jobControlOptions`` (async/sync) | + | | | the :term:`Process` could be | - :term:`Process` ``outputTransmission`` (ref/value) | + | | | executed, but not its | - :term:`Process` ``visibility`` | + | | | definition. | | + +-------------+-----------+---------------------------------+------------------------------------------------------+ + | ``PUT`` | ``MAJOR`` | Modification that impacts | - Any :term:`Application Package` modification | + | | | *what* the :term:`Process` | - Any inputs/outputs change (formats, occurs, type) | + | | | executes. | - Any inputs/outputs addition or removal | + +-------------+-----------+---------------------------------+------------------------------------------------------+ .. note:: For all applicable fields of updating a :term:`Process`, refer to the schema of |update-req|_. @@ -604,7 +609,7 @@ better illustrate where each of the mentioned parameters in following section ar .. table:: Example Job Execution Request Body :name: table-exec-body - :class: code-table + :class: table-code :align: center +-----------------------------------------------+-----------------------------------------------+ @@ -723,7 +728,8 @@ In order to select how to execute a :term:`Process`, either `synchronously` or ` should be specified. If omitted, `Weaver` defaults to `asynchronous` execution. To execute `asynchronously` explicitly, ``Prefer: respond-async`` should be used. Otherwise, the `synchronous` execution can be requested with ``Prefer: wait=X`` where ``X`` is the duration in seconds to wait for a response. If no worker becomes available -within that time, or if this value is greater than the |weaver-execute-sync-max-wait|_ setting, the :term:`Job` will +within that time, or if this value is greater than +the ``weaver.execute_sync_max_wait`` setting (see :ref:`detail `), the :term:`Job` will resume `asynchronously` and the response will be returned. Furthermore, `synchronous` and `asynchronous` execution of a :term:`Process` can only be requested for corresponding ``jobControlOptions`` it reports as supported in its :ref:`Process Description `. It is important to provide the ``jobControlOptions`` parameter with @@ -818,73 +824,132 @@ to the ``Prefer`` header approach, both approaches remain available in `Weaver`. Following is a detailed listing of the expected response structure according to requested parameters. +.. fixme: add missing combinations + .. table:: Expected *Execution Results* according to *Requested Parameters* - :name: table-exec-resp + :name: table-exec-results + :class: table-exec-results :align: center - +--------------------+------------------------------+-----------+-----------------------------------------------+ - | |oap| v2.0 | |oap| v1.0 | # outputs | Results | - +--------------------+--------------+---------------+ [#n_out]_ | | - | ``Prefer: return`` | ``response`` | |out-mode| | | | - | header | |body-param| | |body-param| | | | - +====================+==============+===============+===========+===============================================+ - | |none| | |none| | |none| | 1 | [#res-auto]_ | - +--------------------+--------------+---------------+-----------+-----------------------------------------------+ - | |none| | ``document`` | |none| | 1 | [#res-auto]_ | - +--------------------+--------------+---------------+-----------+-----------------------------------------------+ - | |none| | ``document`` | ``value`` | 1 | Results as :term:`JSON`, but each | - +--------------------+--------------+---------------+-----------+-----------------------------------------------+ - | |none| | ``document`` | ``reference`` | 1 | Auto | - +--------------------+--------------+---------------+-----------+-----------------------------------------------+ - | |none| | ``raw`` | |none| | 1 | |res-raw| | - +--------------------+--------------+---------------+-----------+-----------------------------------------------+ + +---------------------+------------------------------+-----------+-----------------------------------------------+ + | |oap| v2.0 | |oap| v1.0 | # | Results | + +---------------------+--------------+---------------+ requested | | + | ``Prefer: return`` | ``response`` | |out-mode| | outputs | | + | header | |body-param| | |body-param| | [#outN]_ | | + +=====================+==============+===============+===========+===============================================+ + | |any| | |any| | |na| | 0 | |res-empty| [#resNoContent]_ | + +---------------------+--------------+---------------+-----------+-----------------------------------------------+ + | |none| | |none| | |none| | 1 | |res-accept| | + | | | | | |res-json-warn|_ | + +---------------------+--------------+---------------+-----------+-----------------------------------------------+ + | |none| | ``raw`` | |none| | 1 | - |res-accept| | + | | | | | - |res-auto| [#resValRef]_ | + +---------------------+--------------+---------------+-----------+-----------------------------------------------+ + | |none| | ``raw`` | ``value`` | 1 | - |res-accept| | + | | | | | - |res-data|_ | + +---------------------+--------------+---------------+-----------+-----------------------------------------------+ + | |none| | ``raw`` | ``reference`` | 1 | - |res-accept| | + | | | | | - |res-ref|_ | + +---------------------+--------------+---------------+-----------+-----------------------------------------------+ + | |none| | |none| | |none| | >1 | |res-accept| [#resCTypeMulti]_ | + +---------------------+--------------+---------------+-----------+-----------------------------------------------+ + | |none| | ``document`` | |none| | 1 | :ref:`Results ` | + | | | | | [#resValRef]_ | + +---------------------+ | | | | + | ``minimal`` | | | | | + | | | | | | + +---------------------+--------------+---------------+-----------+-----------------------------------------------+ + | |na| | ``document`` | ``value`` | 1 | :ref:`Results ` | + | | | | | with data included inline. | + +---------------------+--------------+---------------+-----------+-----------------------------------------------+ + | |none| | ``document`` | ``reference`` | 1 | :ref:`Results ` | + | | | | | with linked file reference. | + +---------------------+ | | | | + | ``minimal`` | | | | | + | | | | | | + +---------------------+--------------+---------------+-----------+-----------------------------------------------+ .. |oap| replace:: :term:`OGC API - Processes` .. |body-param| replace:: body parameter .. |out-mode| replace:: ``transmissionMode`` -.. |res-auto| replace:: Auto. Resolves as if ``response=document`` and ``transmissionMode=reference`` -.. |res-raw| replace:: - Results are returned in their raw data representation, whether they represent +.. |res-empty| replace:: *empty* +.. |res-accept| replace:: *as negotiated by* ``Accept`` *header or* ``format`` *parameter* +.. |res-auto| replace:: *with auto resolution of data/link representation* +.. |res-data| replace:: Results for a Single Output with Data +.. _res-data: processes.html#job-results-raw-single-data -.. warning:: +.. |res-ref| replace:: Results for a Single Output with Link +.. _res-ref: processes.html#job-results-raw-single-ref + +.. important:: + Typically, clients will not use ``Prefer`` header and ``response``/``transmissionMode`` body parameters + simultaneously (although permitted), since they should be interchangeable in most situations. + The table indicates both variations to illustrate which combinations lead to the **same result**. + If a client happens to use both combination simultaneously, the body parameters will take precedence + over the ``Prefer`` header. + +.. important:: It is important not to confuse expected *Results* above with *Responses*. The actual HTTP *Response* returned from the execution endpoint will depend on the requested :ref:`proc_exec_mode`. - A :term:`Job` resolved with `synchronous` execution will return the *Results* shown in the table *directly*, whereas - an `asynchronous` execution will *always* return a :term:`JSON` :ref:`Job Status ` *Response*. - In this case, a subsequent :ref:`Results Request ` following the successful :term:`Job` completion - is needed to obtain the *Results* presented in the table. Note that a `synchronous` execution can also - make use of the :ref:`Results ` operations at a later time to obtain :term:`Job` information. + A :term:`Job` successfully resolved with `synchronous` execution will return the *Results* shown in the table + directly with a *HTTP 200 OK* status, whereas an `asynchronous` execution will always return a + :ref:`Job Status ` *Response* with *HTTP 201 Created* or *HTTP 202 Accepted* status. -.. note:: - Typically, clients should **NOT** use ``Prefer`` header and ``response``/``transmissionMode`` body parameters - simultaneously, since they should be interchangeable in most situations. The table indicates both variations to - illustrate which combinations lead to the same result. If a client happens to use both combination simultaneously, - the body parameters will take precedence over the ``Prefer`` header, except for cases where ``transmissionMode`` - would be omitted for specific ``outputs`` entries. + In the case of a successfully completed `asynchronous` execution, a + subsequent :ref:`Results Request ` using the :term:`Job` ``Location`` + is needed to obtain the *Results* presented in the above table. Note that a `synchronous` execution can also + make use of the :ref:`Results Request ` operations at a later time to obtain + additional :term:`Job` information such as logs or metadata. .. note:: - Combinations using |none| indicate that the parameter is omitted entirely from the request. + Combinations using |none| indicate that the parameter is **omitted entirely** from the request. + When the value is provided but "*does not matter*" (i.e.: leading to the same outcome regardless), + the |any| notation is used instead. + The |na| notation indicates *not applicable* cases, due to a technical or logical impossibility. -.. rubric:: Footnotes +.. |res-json-warn| replace:: :sup:`(warning: ambiguity)` +.. _res-json-warn: -.. [#n_out] - Corresponds to the number of ``outputs`` *requested* in the :ref:`proc_exec_body`. - Note that omitting ``outputs`` (i.e.: indicated by |none| in the table) is equivalent to requesting *all* outputs. - To request "*no outputs at all*" (if it makes sense for :term:`Process` to do so), the empty mapping ``outputs: {}`` - should be submitted explicitly. See table :ref:`table-exec-body` for an example requesting specific outputs. +.. warning:: + When negotiating a single output as :term:`JSON`, there is a potential ambiguity between + :ref:`Results ` representation and a single file's data, such as in the + case of a :term:`GeoJSON` structure, both of which are encoded in :term:`JSON`. -.. fixme: distinguish omitted 'outputs' (ie default "all") vs '{}' no outputs (empty contents) -.. todo:: update description, and add example to the table - The |empty|, meaning that *no outputs were explicitly requested*, definition must be distinguished from the empty :term:`JSON`` ``{}`` + Similar ambiguities could also occur, depending on supported formats, such as representing + :term:`Job` results in :term:`XML`, or retrieving a file's data encoded as GML :term:`XML`. -.. fixme: -.. [#res-auto] - sss + To avoid ambiguity, it is recommended that the ``response: document`` or ``response: raw`` + is explicitly set for such cases to ensure the result matches the desired outcome. +.. rubric:: Details -.. fixme: requested ``transmissionMode`` parameter (``value``/``reference``), +.. [#outN] + Corresponds to the number of ``outputs`` *requested* in the :ref:`proc_exec_body`. + Note that omitting ``outputs`` (i.e.: indicated by |out-mode| with |none| in the table) is equivalent to + requesting *all* outputs offered by the :term:`Process`. To request "*no outputs at all*" + (if it makes sense for :term:`Process` to do so), + the empty mapping ``outputs: {}`` should be submitted explicitly [#resNoContent]_. + See table :ref:`table-exec-body` for an example requesting specific outputs. + +.. [#resNoContent] + The *HTTP 204 No Contents* response will be returned regardless of the ``response`` parameter, the ``Prefer`` + header, or the requested ``Accept`` header. Since "*no outputs*" is requested with an explicit ``outputs: {}``, + the ``transmissionMode`` do not apply by definition. + +.. [#resCTypeMulti] + The data of the multiple outputs are simultaneously returned, but their encoding depend on the requested ``Accept`` + header. By default, the :ref:`Results ` structure encoded as :term:`JSON` is employed. + However, the :ref:`Results for Multiple Outputs ` example using ``multipart/related`` + contents could also be obtained if requested. Other representations, such as packaging the results under + a single ZIP archive could also be returned. + +.. [#resValRef] + Although the general "*response structure*" is established by other parameters in this case, whether respective + outputs are returned by ``value`` or by ``reference`` depend on the ``Prefer`` header and ``transmissionMode`` + combinations, as well as each output's literal/complex data type representation. See :ref:`proc_op_job_results` + for more details. .. fixme: reword below, above table results identical for Prefer/mode sync/async, @@ -990,7 +1055,7 @@ File Reference Types Most inputs can be categorized into two of the most commonly employed types, namely ``LiteralData`` and ``ComplexData``. The former represents basic values such as integers or strings, while the other represents a ``File`` or ``Directory`` -reference. Files in `Weaver` (and :term:`WPS` in general) can be specified with any ``formats`` as |media-types|_. +reference. Files in `Weaver` (and :term:`WPS` in general) can be specified with any ``formats`` as :term:`Media-Types`. .. seealso:: - :ref:`cwl-wps-mapping` @@ -1142,7 +1207,7 @@ combinations. .. |EMS| replace:: :term:`EMS` .. |HYBRID| replace:: :term:`HYBRID` -.. rubric:: Footnotes +.. rubric:: Details .. [#openseach] References defined by |os_scheme| will trigger an :term:`OpenSearch` query using the provided URL as @@ -1878,7 +1943,7 @@ Obtaining job results, outputs, logs or errors .. _proc_op_job_outputs: Job Outputs -^^^^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~ In the case of successful :term:`Job` execution, the *outputs* can be retrieved with |outputs-req|_ request to list each corresponding output ``id`` with the generated file reference URL. Keep in mind that the purpose of those URLs are @@ -1919,7 +1984,7 @@ nested ``outputs`` property, as presented below, in order to allow additional ` .. _proc_op_job_results: Job Results -^^^^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~ This corresponds to the :term:`OGC API - Processes` compliant endpoint, using the |results-req| request. Contrary to :ref:`proc_op_job_outputs`, where the :term:`JSON` representation is always enforced, this endpoint @@ -1930,8 +1995,10 @@ the contents will typically be a :term:`JSON` mapping representation, where each found either as ``value`` or ``reference``, accordingly to how they were requested or resolved according to :ref:`proc_exec_results`. An example of such results is presented below. -.. literalinclude:: ../examples/job_results_document.json +.. literalinclude:: ../examples/job_results_document_minimal.json :language: json + :caption: Results for a ``document`` response with ``minimal`` representation + :name: job-results-document-minimal .. note:: The ``{outputID}`` are returned at the root of the contents using this representation, @@ -1940,34 +2007,55 @@ to :ref:`proc_exec_results`. An example of such results is presented below. When a :term:`Job` is executed with ``response=raw``, or when the *requested* ``outputs``[#n_out]_ consisted only of a single ``{outputID}``, the returned data will directly be the contents of the produced file, or literal value, as applicable according to the ``schema`` definition of the -corresponding output in the :ref:`Process Description `. For example, a single-output results -could be returned in the following response. +corresponding output in the :ref:`Process Description `. + +The following result will be obtained if any of the following conditions are encountered: +1. The result is a :ref:`File Reference ` and the ``Prefer: return=representation`` header was used +2. The result is a :ref:`File Reference ` and the ``transmissionMode: value`` parameter was used +3. The result is a literal data type, whether or not ``Prefer``/``transmissionMode`` were specified with above values. -.. literalinclude:: ../examples/job_results_raw_single.http - :caption: Example of a single output returned directly (``raw``) with ``representation`` preference +.. literalinclude:: ../examples/job_results_raw_single_data.http :language: http + :caption: Results for a single output returned directly by value + :name: job-results-raw-single-data + +The following result will be obtained if any of the following conditions are encountered: +1. The result is a :ref:`File Reference ` and the ``Prefer: return=minimal`` header was used +2. The result is a :ref:`File Reference ` and the ``transmissionMode: reference`` parameter was used +3. The result is a literal data type, and any above ``Prefer``/``transmissionMode`` value is *explicitly* requested. + +.. literalinclude:: ../examples/job_results_raw_single_ref.http + :language: http + :caption: Results for a single output returned directly by reference + :name: job-results-raw-single-ref When the number of *requested* ``outputs``[#n_out]_ is more than one, the response will either be -multipart contents or similar to the above ``document`` :term:`JSON` structure, accordingly to the +multipart contents or similar to the first ``document`` :term:`JSON` structure, accordingly to the negotiated ``Content-Type``. An example of a multipart representation is shown below. +The resolution of the nested outputs within each boundary, either by value or reference, will resolve +for each respective output according to the same rules combinations specified above for single output. .. literalinclude:: ../examples/job_results_raw_multi.http - :caption: Example of a multiple outputs returned directly (``raw``) with ``minimal`` preference - :language: http + :language: mime + :caption: Results for multiple outputs returned directly (``raw``) with ``minimal`` preference + :name: job-results-raw-multi Note that, in the above response, the ``Content-Location`` is used for the ``output-file``, whereas the data is directly returned for the ``output-data``. This is based on `Weaver` auto-resolving ``transmissionMode: reference`` for a :ref:`File Reference ` result, while using ``transmissionMode: value`` by default for literal -data types. This is equivalent to requesting the :term:`Job` execution with ``Prefer: return=minimal``. +data types. This is equivalent to requesting the :term:`Job` execution with ``Prefer: return=minimal``, since the +most succinct *response contents* for a file is obtained by using a link reference, whereas literal data types can be +provided directly. -If the ``transmissionMode: value`` under ``output-file`` in the *requested* ``outputs``[#n_out]_ -or ``Prefer: return=representation`` were used, the data of the file would be directly included in the -response instead of using ``Content-Location``. +If the ``transmissionMode: value`` under ``output-file`` in the *requested* ``outputs`` [#outN]_ +or ``Prefer: return=representation`` were used, the data of the file would be directly included inline within the +response instead of using ``Content-Location``, similarly to the :ref:`job-results-raw-single-data` example, +but nested within its respective ``Content-ID: output-file`` multipart bounds. .. _proc_op_job_inputs: Job Inputs -^^^^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~ In order to better understand the parameters that were submitted during :term:`Job` creation, the |inputs-req|_ can be employed. This will return both the data and reference ``inputs`` that were submitted, as well as @@ -1983,7 +2071,7 @@ that where specified during submission of the :ref:`proc_exec_body`. .. _proc_op_job_exceptions: Job Exceptions -^^^^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~~~~~~~~ In situations where the :term:`Job` resulted into ``failed`` status, the |except-req|_ can be used to retrieve the potential cause of failure, by capturing any raised exception. Below is an example of such exception details. @@ -2000,7 +2088,7 @@ provide details over each step of the operation. .. _proc_op_job_logs: Job Logs -^^^^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~ Any :term:`Job` executed by `Weaver` will provide minimal information log, such as operation setup, the moment when it started execution and latest status. The extent of other log entries will more often than not depend on the @@ -2139,7 +2227,8 @@ ADES dispatching using Data Sources -------------------------------------- When using either the :term:`EMS` or :term:`HYBRID` [#notedatasource]_ configurations, :term:`Process` -executions are dispatched to the relevant :term:`ADES` or another :term:`HYBRID` server supporting |process-deploy-op|_ +executions are dispatched to the relevant :term:`ADES` or another :term:`HYBRID` server supporting +:ref:`Process Deployment ` when inputs are matched against one of the configured :term:`Data Source`. Minimal implementations of :term:`OGC API - Processes` can also work as external :term:`Provider` where to dispatch executions, but in the case of *core* implementations, the :term:`Process` should be already available since it cannot be deployed. @@ -2158,7 +2247,7 @@ pulling the data locally when :term:`Data Source` become substantial. Furthermor providers to define custom or private data retrieval mechanisms, where data cannot be exposed or offered externally, but are still available for use when requested. -.. rubric:: Footnotes +.. rubric:: Details .. [#notedatasource] Configuration :term:`HYBRID` applies here in cases where `Weaver` acts as an :term:`EMS` for remote dispatch diff --git a/docs/source/references.rst b/docs/source/references.rst index 1ff757ecd..809cc5c27 100644 --- a/docs/source/references.rst +++ b/docs/source/references.rst @@ -154,7 +154,6 @@ .. |ogc-proc-ext-quotation| replace:: *OGC API - Processes* - Quotation extension .. _ogc-proc-ext-quotation: https://github.com/opengeospatial/ogcapi-processes/tree/master/extensions/quotation .. |quote-estimator| replace:: *Quote Estimator* -.. _quote-estimator: :ref:`quotation_quote_estimator` .. |quote-estimation-config| replace:: *Quote Estimator Configuration* .. _quote-estimation-config: ../../../weaver/schemas/quotation/quote-estimator.yaml .. |quote-estimation-result| replace:: *Quote Estimation Result* @@ -177,9 +176,6 @@ .. _weaver-issues: https://github.com/crim-ca/weaver/issues .. |submit-issue| replace:: submit a new issue .. _submit-issue: https://github.com/crim-ca/weaver/issues/new/choose -.. inter-reference to 'process->Deploy' section, but cannot be a link since not included -.. _process-deploy-op: :ref:`proc_op_deploy` -.. |process-deploy-op| replace:: Process deployment operation .. STAC .. |stac-spec| replace:: STAC Specification From f825cb3540ee2778a72aac5e99db73056bf98f85 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 16 Sep 2024 11:44:55 -0400 Subject: [PATCH 06/75] add missing doc results combinations --- docs/_static/custom.css | 5 ++ docs/source/processes.rst | 157 +++++++++++++++++++++++++------------- 2 files changed, 109 insertions(+), 53 deletions(-) diff --git a/docs/_static/custom.css b/docs/_static/custom.css index f4e109c2c..39f14d7f0 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -49,6 +49,11 @@ div[class^="highlight"] { border-left-width: medium !important; } +/* avoid unnecessary spacing causing table to be massively longer than needed */ +.table-exec-results ul { + margin-bottom: 0 !important; +} + /* override table width restrictions avoids cells trying to fit all their text single line with a slider instead, text will wrap according to specified :widths: specifications diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 49c319936..f7bff5d5d 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -824,57 +824,81 @@ to the ``Prefer`` header approach, both approaches remain available in `Weaver`. Following is a detailed listing of the expected response structure according to requested parameters. -.. fixme: add missing combinations - .. table:: Expected *Execution Results* according to *Requested Parameters* :name: table-exec-results :class: table-exec-results :align: center - +---------------------+------------------------------+-----------+-----------------------------------------------+ - | |oap| v2.0 | |oap| v1.0 | # | Results | - +---------------------+--------------+---------------+ requested | | - | ``Prefer: return`` | ``response`` | |out-mode| | outputs | | - | header | |body-param| | |body-param| | [#outN]_ | | - +=====================+==============+===============+===========+===============================================+ - | |any| | |any| | |na| | 0 | |res-empty| [#resNoContent]_ | - +---------------------+--------------+---------------+-----------+-----------------------------------------------+ - | |none| | |none| | |none| | 1 | |res-accept| | - | | | | | |res-json-warn|_ | - +---------------------+--------------+---------------+-----------+-----------------------------------------------+ - | |none| | ``raw`` | |none| | 1 | - |res-accept| | - | | | | | - |res-auto| [#resValRef]_ | - +---------------------+--------------+---------------+-----------+-----------------------------------------------+ - | |none| | ``raw`` | ``value`` | 1 | - |res-accept| | - | | | | | - |res-data|_ | - +---------------------+--------------+---------------+-----------+-----------------------------------------------+ - | |none| | ``raw`` | ``reference`` | 1 | - |res-accept| | - | | | | | - |res-ref|_ | - +---------------------+--------------+---------------+-----------+-----------------------------------------------+ - | |none| | |none| | |none| | >1 | |res-accept| [#resCTypeMulti]_ | - +---------------------+--------------+---------------+-----------+-----------------------------------------------+ - | |none| | ``document`` | |none| | 1 | :ref:`Results ` | - | | | | | [#resValRef]_ | - +---------------------+ | | | | - | ``minimal`` | | | | | - | | | | | | - +---------------------+--------------+---------------+-----------+-----------------------------------------------+ - | |na| | ``document`` | ``value`` | 1 | :ref:`Results ` | - | | | | | with data included inline. | - +---------------------+--------------+---------------+-----------+-----------------------------------------------+ - | |none| | ``document`` | ``reference`` | 1 | :ref:`Results ` | - | | | | | with linked file reference. | - +---------------------+ | | | | - | ``minimal`` | | | | | - | | | | | | - +---------------------+--------------+---------------+-----------+-----------------------------------------------+ + +---------------------+------------------------------+-----------+-------------------------------------------------+ + | |oap| v2.0 | |oap| v1.0 | |nReqOut| | Results |res-important|_ | + +---------------------+--------------+---------------+ [#outN]_ | | + | ``Prefer: return`` | ``response`` | |out-mode| | | | + | header | |body-param| | |body-param| | | | + +=====================+==============+===============+===========+=================================================+ + | |any| | |any| | |na| | 0 | |res-empty| [#resNoContent]_ | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ + | |none| | |none| | |none| | 1 | |res-accept| |res-fmt-warn|_ | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ + | ``representation`` | ``raw`` | |none| | 1 | - |res-accept| | + | | | | | - |res-auto| [#resValRef]_ | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ + | ``representation`` | ``raw`` | ``value`` | 1 | - |res-accept| | + | | | | (literal) | - |res-data|_ | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ + | ``representation`` | ``raw`` | ``reference`` | 1 | - |res-accept| | + | | | | (complex) | - |res-ref|_ | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ + | |na| | ``raw`` | ``value`` | 1 | - |res-accept| | + | [#resPreferReturn]_ | | | (complex) | - |res-data|_ | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ + | |na| | ``raw`` | ``reference`` | 1 | - |res-accept| | + | [#resPreferReturn]_ | | | (literal) | - |res-ref|_ | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ + | |none| | |none| | |none| | >1 | - :ref:`Results ` | + | | | | | content by default [#resCTypeMulti]_ | + | | | | | - otherwise, |res-accept| |res-fmt-warn|_ | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ + | ``representation`` | ``raw`` | |none| | >1 | - :ref:`Multipart ` | + | | | | | content [#resCTypeMulti]_ | + | | | | | - |res-auto| [#resValRef]_ | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ + | |na| | ``raw`` | ``value`` | >1 | - :ref:`Multipart ` | + | [#resPreferReturn]_ | | *or* | | content [#resCTypeMulti]_ | + | | | ``reference`` | | - using embedded content part data/link | + | | | | | as requested by |out-mode| [#resValRefForce]_ | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ + | |none| | ``document`` | |none| | |any| | - :ref:`Results ` | + | | | | | content | + | | | | | - |res-auto| [#resValRef]_ | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ + | ``minimal`` | ``document`` | |none| | |any| | - :ref:`Results ` | + | | | | | content | + | | | | | - |res-auto| [#resValRef]_ | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ + | ``minimal`` | ``document`` | ``value`` | |any| | - :ref:`Results ` | + | [#resPreferReturn]_ | | | (literal) | content | + | | | | | - using data included inline | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ + | ``minimal`` | ``document`` | ``reference`` | |any| | - :ref:`Results ` | + | [#resPreferReturn]_ | | | (complex) | content | + | | | | | - using file link reference | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ + | |na| | ``document`` | ``value`` | |any| | - :ref:`Results ` | + | [#resPreferReturn]_ | | | (complex) | content | + | | | | | - using data included inline [#resValRefForce]_ | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ + | |na| | ``document`` | ``reference`` | |any| | - :ref:`Results ` | + | [#resPreferReturn]_ | | | (literal) | content | + | | | | | - using file link reference [#resValRefForce]_ | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ .. |oap| replace:: :term:`OGC API - Processes` .. |body-param| replace:: body parameter .. |out-mode| replace:: ``transmissionMode`` +.. |nReqOut| replace:: Amount and type of |br| *requested outputs* .. |res-empty| replace:: *empty* .. |res-accept| replace:: *as negotiated by* ``Accept`` *header or* ``format`` *parameter* -.. |res-auto| replace:: *with auto resolution of data/link representation* +.. |res-auto| replace:: *using automatic resolution of data/link representation* .. |res-data| replace:: Results for a Single Output with Data .. _res-data: processes.html#job-results-raw-single-data @@ -885,10 +909,13 @@ Following is a detailed listing of the expected response structure according to .. important:: Typically, clients will not use ``Prefer`` header and ``response``/``transmissionMode`` body parameters simultaneously (although permitted), since they should be interchangeable in most situations. - The table indicates both variations to illustrate which combinations lead to the **same result**. + The table indicates both |oap| v1.0/v2.0 variations to illustrate which combinations lead to the **same result**. If a client happens to use both combination simultaneously, the body parameters will take precedence over the ``Prefer`` header. +.. |res-important| replace:: :sup:`(see: important note)` +.. _res-important: + .. important:: It is important not to confuse expected *Results* above with *Responses*. @@ -896,7 +923,6 @@ Following is a detailed listing of the expected response structure according to A :term:`Job` successfully resolved with `synchronous` execution will return the *Results* shown in the table directly with a *HTTP 200 OK* status, whereas an `asynchronous` execution will always return a :ref:`Job Status ` *Response* with *HTTP 201 Created* or *HTTP 202 Accepted* status. - In the case of a successfully completed `asynchronous` execution, a subsequent :ref:`Results Request ` using the :term:`Job` ``Location`` is needed to obtain the *Results* presented in the above table. Note that a `synchronous` execution can also @@ -909,16 +935,15 @@ Following is a detailed listing of the expected response structure according to the |any| notation is used instead. The |na| notation indicates *not applicable* cases, due to a technical or logical impossibility. -.. |res-json-warn| replace:: :sup:`(warning: ambiguity)` -.. _res-json-warn: +.. |res-fmt-warn| replace:: :sup:`(warning: ambiguity)` +.. _res-fmt-warn: .. warning:: When negotiating a single output as :term:`JSON`, there is a potential ambiguity between :ref:`Results ` representation and a single file's data, such as in the case of a :term:`GeoJSON` structure, both of which are encoded in :term:`JSON`. - - Similar ambiguities could also occur, depending on supported formats, such as representing - :term:`Job` results in :term:`XML`, or retrieving a file's data encoded as GML :term:`XML`. + Similar ambiguities could also occur for other :term:`Media-Types`, depending on supported formats, + such as representing :term:`Job` results in :term:`XML`, or retrieving a file's data encoded as GML :term:`XML`. To avoid ambiguity, it is recommended that the ``response: document`` or ``response: raw`` is explicitly set for such cases to ensure the result matches the desired outcome. @@ -926,7 +951,9 @@ Following is a detailed listing of the expected response structure according to .. rubric:: Details .. [#outN] - Corresponds to the number of ``outputs`` *requested* in the :ref:`proc_exec_body`. + Corresponds to the number of ``outputs`` *requested* in the :ref:`proc_exec_body`, and the data type of + those outputs if this distinction impacts the results. + Note that omitting ``outputs`` (i.e.: indicated by |out-mode| with |none| in the table) is equivalent to requesting *all* outputs offered by the :term:`Process`. To request "*no outputs at all*" (if it makes sense for :term:`Process` to do so), @@ -942,14 +969,38 @@ Following is a detailed listing of the expected response structure according to The data of the multiple outputs are simultaneously returned, but their encoding depend on the requested ``Accept`` header. By default, the :ref:`Results ` structure encoded as :term:`JSON` is employed. However, the :ref:`Results for Multiple Outputs ` example using ``multipart/related`` - contents could also be obtained if requested. Other representations, such as packaging the results under - a single ZIP archive could also be returned. + contents could also be obtained if requested, or as established by using other parameter combinations. + Other content representations, such as packaging the results under a single ZIP archive, could also be returned + if requested. However, alternate representations might not allow some ``transmissionMode`` combinations according + to their logical representation (e.g.: a ZIP archive could refuse ``transmissionMode: reference`` to only allow + files to be directly included in the ZIP, rather than link references to them). .. [#resValRef] Although the general "*response structure*" is established by other parameters in this case, whether respective outputs are returned by ``value`` or by ``reference`` depend on the ``Prefer`` header and ``transmissionMode`` - combinations, as well as each output's literal/complex data type representation. See :ref:`proc_op_job_results` - for more details. + combinations, as well as each output's literal/complex data type representation. + Typically, complex file-like outputs would be automatically represented by link references, and literal data + outputs would be represented with their values inline. See :ref:`proc_op_job_results` for more details. + + To request only a specific output, while using the automatic resolution rather than + specifying ``value`` or ``reference`` explicitly, the ``transmissionMode`` should be + omitted from the :ref:`proc_exec_body` (i.e.: ``outputs: { "": {} }``). + +.. [#resValRefForce] + The ``value`` or ``reference`` format is enforced accordingly to the requested ``transmissionMode`` of each + respective output. + In the case of a file-like complex data, ``value`` would force the file contents to be embedded inline in the + document, whereas ``reference`` would use a link (its usual default behavior). Similarly, a literal data type + would have its output placed inline in the document using ``value`` (its usual default behavior), whereas a + link would be enforced if ``reference`` was requested. + +.. [#resPreferReturn] + Using only the |oap| v2.0 ``Prefer: return`` header parameter, it is not always possible to *enforce* every + result combination as when using |oap| v1.0 parameters. More specifically, it is not possible to replicate + cases where a requested output specifies a ``transmissionMode`` using an *opposite* representation from its + "*default minimum*" representation of literal or complex data. However, ``Prefer: return`` header is equivalent + for cases where *every requested output* uses the default matching the specified or resolved ``transmissionMode`` + (i.e.: ``value`` for literal data, ``reference`` for complex data). .. fixme: reword below, above table results identical for Prefer/mode sync/async, @@ -1555,7 +1606,7 @@ Collection Inputs ~~~~~~~~~~~~~~~~~~~~~~~~~~ The |ogc-api-proc-part3-collection-input|_ is defined by the |ogc-api-proc-part3|_ extension. This allows to submit a -:term:`Process Execution ` using the following :term:`JSON` structure when the targeted :term:`Process` +:ref:`Process Execution ` using the following :term:`JSON` structure when the targeted :term:`Process` can make use of the resulting data sources retrieved from the referred :term:`Collection` and processing conditions. The ``collection`` keyword is employed to identify this type of input, in contrast to literal data and complex file inputs respectively using ``value`` and ``href``, as presented in the :ref:`Process Execution ` From 4d65b5c4ea24e4e31f11e4a8b63ce574d4cbb0b4 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 16 Sep 2024 11:49:01 -0400 Subject: [PATCH 07/75] doc note reword --- docs/source/processes.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/source/processes.rst b/docs/source/processes.rst index f7bff5d5d..d645762bf 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -925,9 +925,11 @@ Following is a detailed listing of the expected response structure according to :ref:`Job Status ` *Response* with *HTTP 201 Created* or *HTTP 202 Accepted* status. In the case of a successfully completed `asynchronous` execution, a subsequent :ref:`Results Request ` using the :term:`Job` ``Location`` - is needed to obtain the *Results* presented in the above table. Note that a `synchronous` execution can also - make use of the :ref:`Results Request ` operations at a later time to obtain - additional :term:`Job` information such as logs or metadata. + is needed to obtain the *Results* presented in the above table. + + Note that a `synchronous` execution can also + make use of the :ref:`Results Request ` operation to obtain the outputs again at a later time, + or to request alternate representations, or retrieve additional :term:`Job` information such as logs or metadata. .. note:: Combinations using |none| indicate that the parameter is **omitted entirely** from the request. From b5028c6b29751326db2f223effba7dc69cdd7142 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 16 Sep 2024 12:45:11 -0400 Subject: [PATCH 08/75] fix docs formatting/invalid references --- docs/source/processes.rst | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/source/processes.rst b/docs/source/processes.rst index d645762bf..d74c73c3a 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -843,10 +843,10 @@ Following is a detailed listing of the expected response structure according to | | | | | - |res-auto| [#resValRef]_ | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | ``representation`` | ``raw`` | ``value`` | 1 | - |res-accept| | - | | | | (literal) | - |res-data|_ | + | [#resPreferReturn]_ | | | (literal) | - |res-data|_ | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | ``representation`` | ``raw`` | ``reference`` | 1 | - |res-accept| | - | | | | (complex) | - |res-ref|_ | + | [#resPreferReturn]_ | | | (complex) | - |res-ref|_ | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | |na| | ``raw`` | ``value`` | 1 | - |res-accept| | | [#resPreferReturn]_ | | | (complex) | - |res-data|_ | @@ -928,8 +928,8 @@ Following is a detailed listing of the expected response structure according to is needed to obtain the *Results* presented in the above table. Note that a `synchronous` execution can also - make use of the :ref:`Results Request ` operation to obtain the outputs again at a later time, - or to request alternate representations, or retrieve additional :term:`Job` information such as logs or metadata. + make use of the :ref:`Results Request ` operation to obtain the outputs again at a later time, to + request alternate output representations, or retrieve additional :term:`Job` information such as logs and metadata. .. note:: Combinations using |none| indicate that the parameter is **omitted entirely** from the request. @@ -2063,6 +2063,7 @@ be the contents of the produced file, or literal value, as applicable according corresponding output in the :ref:`Process Description `. The following result will be obtained if any of the following conditions are encountered: + 1. The result is a :ref:`File Reference ` and the ``Prefer: return=representation`` header was used 2. The result is a :ref:`File Reference ` and the ``transmissionMode: value`` parameter was used 3. The result is a literal data type, whether or not ``Prefer``/``transmissionMode`` were specified with above values. @@ -2073,6 +2074,7 @@ The following result will be obtained if any of the following conditions are enc :name: job-results-raw-single-data The following result will be obtained if any of the following conditions are encountered: + 1. The result is a :ref:`File Reference ` and the ``Prefer: return=minimal`` header was used 2. The result is a :ref:`File Reference ` and the ``transmissionMode: reference`` parameter was used 3. The result is a literal data type, and any above ``Prefer``/``transmissionMode`` value is *explicitly* requested. @@ -2082,7 +2084,7 @@ The following result will be obtained if any of the following conditions are enc :caption: Results for a single output returned directly by reference :name: job-results-raw-single-ref -When the number of *requested* ``outputs``[#n_out]_ is more than one, the response will either be +When the number of *requested* ``outputs`` [#outN]_ is more than one, the response will either be multipart contents or similar to the first ``document`` :term:`JSON` structure, accordingly to the negotiated ``Content-Type``. An example of a multipart representation is shown below. The resolution of the nested outputs within each boundary, either by value or reference, will resolve From 5c3cf1bdb9d88f313aac27f1dcf44b9668359a37 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 16 Sep 2024 12:50:34 -0400 Subject: [PATCH 09/75] uniform casing of doc header --- docs/source/processes.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/source/processes.rst b/docs/source/processes.rst index d74c73c3a..83142f261 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -387,14 +387,14 @@ following request (`DescribeProviderProcess`_): .. _proc_operations: -Managing processes included in Weaver ADES/EMS +Managing Processes included in Weaver ADES/EMS ================================================== Following steps represent the typical steps applied to deploy a process, execute it and retrieve the results. .. _proc_op_deploy: -Register a new process (Deploy) +Register a New Process (Deploy) ----------------------------------------- Deployment of a new process is accomplished through the ``POST {WEAVER_URL}/processes`` |deploy-req|_ request. @@ -437,7 +437,7 @@ For most traditional use cases, properties are mapped between the two interfaces .. _proc_op_getcap: .. _proc_op_describe: -Access registered processes (GetCapabilities, DescribeProcess) +Access Registered Processes (GetCapabilities, DescribeProcess) ------------------------------------------------------------------------ Available processes can all be listed using |getcap-req|_ request. This request will return all locally registered @@ -473,7 +473,7 @@ the |getcap-req|_ request. .. _proc_op_undeploy: .. _proc_op_update: -Modify an existing process (Update, Replace, Undeploy) +Modify an Existing Process (Update, Replace, Undeploy) ----------------------------------------------------------------------------- Since `Weaver` supports |ogc-api-proc-part2|_, it is able to remove a previously registered :term:`Process` using @@ -592,7 +592,7 @@ new :ref:`Deploy ` request. .. _proc_op_execute: -Execution of a process (Execute) +Execution of a Process (Execute) --------------------------------------------------------------------- :term:`Process` execution (i.e.: submitting a :term:`Job`) is accomplished using the |exec-req|_ request. @@ -1952,7 +1952,7 @@ of the polling-based method on the :ref:`Job Status ` endpoint o .. _proc_op_status: .. _proc_op_monitor: -Monitoring of a process execution (GetStatus) +Monitoring of a Process Execution (GetStatus) --------------------------------------------------------------------- Monitoring the execution of a :term:`Job` consists of polling the status ``Location`` provided from the :ref:`Execute` @@ -1990,7 +1990,7 @@ format is employed according to the chosen location. .. _proc_op_result: -Obtaining job results, outputs, logs or errors +Obtaining Job Results, Outputs, Logs or Errors --------------------------------------------------------------------- .. _proc_op_job_outputs: From 8eeb1de8eb71fa25b0380a738adc6f65f2de1c3e Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 16 Sep 2024 14:44:47 -0400 Subject: [PATCH 10/75] more docs updates + examples --- docs/examples/job_inputs.json | 77 ++++++++++++++++ docs/source/processes.rst | 160 ++++++++++++++++++++-------------- 2 files changed, 171 insertions(+), 66 deletions(-) create mode 100644 docs/examples/job_inputs.json diff --git a/docs/examples/job_inputs.json b/docs/examples/job_inputs.json new file mode 100644 index 000000000..b34c7965c --- /dev/null +++ b/docs/examples/job_inputs.json @@ -0,0 +1,77 @@ +{ + "inputs": { + "calc": "4.26 * ((C / A) ** 3.94)", + "band_a": { + "href": "https://example.com/wpsoutputs/weaver/users/23/6f197568-38f5-42f4-851c-0c56d446094c/product/T29SPC_20190601T110621_B02_10m.jp2", + "type": "image/jp2" + }, + "band_c": { + "href": "https://example.com/wpsoutputs/weaver/users/23/977799a0-bf63-4406-a419-6d686c9a8fc9/product/T29SPC_20190601T110621_B04_10m.jp2", + "type": "image/jp2" + }, + "name": "output" + }, + "outputs": { + "result": { + "transmissionMode": "reference" + } + }, + "links": [ + { + "title": "Job status.", + "hreflang": "en-CA", + "href": "https://example.com/weaver/processes/calculate-band/jobs/034151ec-a87e-41ed-8ab4-8afb22b48e96", + "type": "application/json", + "rel": "status" + }, + { + "title": "Job status generic endpoint.", + "hreflang": "en-CA", + "href": "https://example.com/weaver/jobs/034151ec-a87e-41ed-8ab4-8afb22b48e96", + "type": "application/json", + "rel": "alternate" + }, + { + "title": "New job submission endpoint for the corresponding process.", + "hreflang": "en-CA", + "href": "https://example.com/weaver/processes/calculate-band/jobs/execution", + "type": "application/json", + "rel": "http://www.opengis.net/def/rel/ogc/1.0/execute" + }, + { + "title": "Submitted job inputs for process execution.", + "hreflang": "en-CA", + "href": "https://example.com/weaver/processes/calculate-band/jobs/034151ec-a87e-41ed-8ab4-8afb22b48e96/inputs", + "type": "application/json", + "rel": "inputs" + }, + { + "title": "Job outputs of successful process execution (extended outputs with metadata).", + "hreflang": "en-CA", + "href": "https://example.com/weaver/processes/calculate-band/jobs/034151ec-a87e-41ed-8ab4-8afb22b48e96/outputs", + "type": "application/json", + "rel": "outputs" + }, + { + "title": "Job results of successful process execution (direct output values mapping).", + "hreflang": "en-CA", + "href": "https://example.com/weaver/processes/calculate-band/jobs/034151ec-a87e-41ed-8ab4-8afb22b48e96/results", + "type": "application/json", + "rel": "http://www.opengis.net/def/rel/ogc/1.0/results" + }, + { + "title": "Job statistics collected following process execution.", + "hreflang": "en-CA", + "href": "https://example.com/weaver/processes/calculate-band/jobs/034151ec-a87e-41ed-8ab4-8afb22b48e96/statistics", + "type": "application/json", + "rel": "statistics" + }, + { + "title": "List of collected job logs during process execution.", + "hreflang": "en-CA", + "href": "https://example.com/weaver/processes/calculate-band/jobs/034151ec-a87e-41ed-8ab4-8afb22b48e96/logs", + "type": "application/json", + "rel": "logs" + } + ] +} diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 83142f261..6874776b8 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -602,10 +602,10 @@ Execution of a Process (Execute) :term:`OGC API - Processes` compliant endpoint. This section will first describe the basics of this request format (:ref:`proc_exec_body`), and after go into -further details for specific use cases and parametrization of various input/output combinations. - +further details for specific use cases and parametrization of various input/output combinations +(:ref:`proc_exec_mode`, :ref:`proc_exec_results`, etc.). Below are some examples of :term:`JSON` body that can be sent to the :term:`Job` execution endpoint to -better illustrate where each of the mentioned parameters in following section are expected. +better illustrate where each of the mentioned parameters in following sections are expected. .. table:: Example Job Execution Request Body :name: table-exec-body @@ -646,12 +646,11 @@ better illustrate where each of the mentioned parameters in following section ar data (values provided without the nested ``value`` field). The listing representation is the older format employed during previous :term:`OGC` testbed developments. -.. note:: - Other parameters can be added to the request to provide further functionalities. Above fields are the minimum - requirements to request a :term:`Job`. Please refer to the |exec-api|_ definition, as well as following sections, - for all applicable features. - .. seealso:: + Many additional parameters can be used to request further functionalities. The above fields only present the + most common definitions employed to request a :term:`Job`. Please refer to the |exec-api|_ definition, as well + as following sections, for all applicable features. + - :ref:`proc_exec_body`, :ref:`proc_exec_mode` and :ref:`proc_exec_results` sections provide details applicable to `Weaver`, which align with :term:`OGC API - Processes`, but that can also support additional capabilities. @@ -1625,7 +1624,7 @@ section. Also, different combinations of parameters will be supported depending on which remote :term:`API` gets interrogated to resolve the :term:`Collection` contents. The |ogc-api-proc-part3|_ is still under development, and interactions with the various access points of |ogc-api-standards|_ remains to - be evaluated in detail to further explore interoperability concerns between all :term:`API`implementations. + be evaluated in detail to further explore interoperability concerns between all :term:`API` implementations. Refer to :ref:`proc_col_inputs_examples` for potential combinations and additional samples. To determine which *items* should be retrieved from the :term:`Collection`, whether they are obtained by @@ -1889,9 +1888,9 @@ avoid conflicts. Therefore, outputs will be available with the following locatio .. code-block:: - {WPS_OUTPUT_URL}/{JOB_UUID}.xml # status location - {WPS_OUTPUT_URL}/{JOB_UUID}.log # execution logs - {WPS_OUTPUT_URL}/{JOB_UUID}/{output.ext} # results of the job if successful + {WPS_OUTPUT_URL}/{JOB_UUID}.xml # status location + {WPS_OUTPUT_URL}/{JOB_UUID}.log # execution logs + {WPS_OUTPUT_URL}/{JOB_UUID}/{outputID}/{output.ext} # results of the job if successful .. note:: Value ``WPS_OUTPUT_URL`` in above example is resolved accordingly with ``weaver.wps_output_url``, @@ -1904,11 +1903,11 @@ For example, providing ``X-WPS-Output-Context: project/test-1`` will result in o .. code-block:: - {WPS_OUTPUT_URL}/project/test-1/{JOB_UUID}/{output.ext} + {WPS_OUTPUT_URL}/project/test-1/{JOB_UUID}/{outputID}/{output.ext} .. note:: - Values provided by ``X-WPS-Output-Context`` can only contain alphanumeric, hyphens, underscores and path - separators that will result in a valid directory and URL locations. The path is assumed relative by design to be + Values provided by ``X-WPS-Output-Context`` can only contain alphanumeric, hyphens, underscores and path separators + that will result in a valid directory and :term:`URL` locations. The path is assumed relative by design to be resolved under the :term:`WPS` output directory, and will therefore reject any ``.`` or ``..`` path references. The path also **CANNOT** start by ``/``. In such cases, an HTTP error will be immediately raised indicating the symbols that where rejected when detected within ``X-WPS-Output-Context`` header. @@ -1931,16 +1930,16 @@ Notification Subscribers ~~~~~~~~~~~~~~~~~~~~~~~~~~ When submitting a :term:`Job` for execution, it is possible to provide the ``notification_email`` field. -Doing so will tell `Weaver` to send an email to the specified address with successful or failure details +Doing so will tell `Weaver` to send an email to the specified address with *successful* or *failure* details upon :term:`Job` completion. The format of the email is configurable from `weaver.ini.example`_ file with -email-specific settings (see: :ref:`Configuration`). +email-specific settings (see: :ref:`Email Configuration `). Alternatively to ``notification_email``, the ``subscribers`` field of the :term:`API` can be employed during :term:`Job` submission. Using this field will take precedence over ``notification_email`` for corresponding email and status combinations. The :term:`Job` ``subscribers`` allow more fined-grained control over which emails will be sent for the various combinations of :term:`Job` status milestones. -Furthermore, ``subscribers`` allow specifying URLs where HTTP(S) requests will be sent with +Furthermore, ``subscribers`` allow specifying URLs where HTTP(S) callback requests (i.e.: webhooks) will be sent with the :ref:`Job Status ` or :ref:`Job Results ` contents directly in :term:`JSON` format. This allows users and/or servers to directly receive the necessary details using a push-notification mechanism instead of the polling-based method on the :ref:`Job Status ` endpoint otherwise required to obtain updated @@ -1955,10 +1954,10 @@ of the polling-based method on the :ref:`Job Status ` endpoint o Monitoring of a Process Execution (GetStatus) --------------------------------------------------------------------- -Monitoring the execution of a :term:`Job` consists of polling the status ``Location`` provided from the :ref:`Execute` -operation and verifying the indicated ``status`` for the expected result. The ``status`` can correspond to any of the -value defined by :data:`weaver.status.JOB_STATUS_VALUES` accordingly to the internal state of the workers processing -their execution. +Monitoring the execution of a :term:`Job` consists of polling the status ``Location`` provided from the +:ref:`Execute ` operation and verifying the indicated ``status`` for the expected result. +The ``status`` can correspond to any of the value defined by :data:`weaver.status.JOB_STATUS_VALUES` +accordingly to the internal state of the workers processing their execution. When targeting a :term:`Job` submitted to a `Weaver` instance, monitoring is usually accomplished through the :term:`OGC API - Processes` endpoint using |status-req|_, which will return a :term:`JSON` body. @@ -1979,7 +1978,7 @@ format is employed according to the chosen location. - Contents - Location * - :term:`OGC API - Processes` - - JSON + - :term:`JSON` - ``{WEAVER_URL}/jobs/{JobUUID}`` * - :term:`WPS` - :term:`XML` @@ -1990,7 +1989,7 @@ format is employed according to the chosen location. .. _proc_op_result: -Obtaining Job Results, Outputs, Logs or Errors +Obtaining Job Outputs, Results, Logs or Errors --------------------------------------------------------------------- .. _proc_op_job_outputs: @@ -1998,6 +1997,10 @@ Obtaining Job Results, Outputs, Logs or Errors Job Outputs ~~~~~~~~~~~ +.. note:: + This endpoint is a `Weaver`-specific implementation provided for convenience. + For the :term:`OGC API - Processes` compliant endpoint, refer to :ref:`proc_op_job_results`. + In the case of successful :term:`Job` execution, the *outputs* can be retrieved with |outputs-req|_ request to list each corresponding output ``id`` with the generated file reference URL. Keep in mind that the purpose of those URLs are only to fetch the results (not persistent storage), and could therefore be purged after some reasonable amount of time. @@ -2006,33 +2009,41 @@ parameters for the base :term:`WPS` output location: .. literalinclude:: ../examples/job_outputs_listing.json :language: json + :caption: :term:`Job` Outputs Response with Listing Representation + :name: job-outputs-listing -In the event of a :term:`Job` executed with ``response=document`` or ``Prefer: return=minimal``, the contents -of a :ref:`proc_op_job_results` will be very similar to the above :term:`JSON` contents, but using the ``{outputID}`` -mapping representation directly returned, instead of listing them as "output items" under ``outputs``. - -On the other hand, a :term:`Job` submitted with ``response=raw`` or ``Prefer: return=representation`` can produce -many alternative content variations according to :term:`OGC` requirements, the number of requested ``outputs``, -and the respective :term:`Media-Type`, schema or literal data of each output. For this reason, -the :ref:`proc_op_job_outputs` endpoint will always provide all data and file references in the response body -as represented by the above :term:`JSON`, no matter which :ref:`proc_exec_results` parameters where originally -submitted. In other words, the contents of the "``output_netcdf.nc``" file will never be directly returned as -response when using the :ref:`proc_op_job_outputs` endpoint, and will always use the ``document``/``minimal`` links. - -Furthermore, because this response nests the items under ``outputs``, other information can be returned, -such as relevant ``links`` -with references to :ref:`proc_op_job_inputs`, :ref:`proc_op_job_logs`, :ref:`Job Status `, -or the source :ref:`Process Description ` that produced returned :term:`Job` outputs. - -The :ref:`proc_op_job_outputs` endpoint can also receive additional query parameters, -such as ``schema=OGC+strict``, which -allows it to return contents formatted slightly differently, to imitate the :term:`JSON` mapping representation +The :ref:`proc_op_job_outputs` endpoint can receive additional query parameters, +such as ``schema=OGC+strict`` (see :py:class:`weaver.processes.constants.JobInputsOutputsSchema` for other values), +which allows it to return contents formatted slightly differently, to imitate the :term:`JSON` mapping representation (rather than the array) used by the :ref:`proc_exec_results` endpoint as if ``response=document`` was specified during submission of the :term:`Process` execution. However, this :term:`JSON` mapping will still employ a -nested ``outputs`` property, as presented below, in order to allow additional ``links`` information. +nested ``outputs`` property, as presented below. .. literalinclude:: ../examples/job_outputs_mapping.json :language: json + :caption: :term:`Job` Outputs Response with Mapping Representation + :name: job-outputs-mapping + +Because these responses nests the items under ``outputs`` (in contrast to :ref:`proc_op_job_results` +returning ``{outputID}`` directly at the root), other information can be returned, such as relevant ``links`` +with references to :ref:`proc_op_job_inputs`, :ref:`proc_op_job_logs`, :ref:`Job Status `, +or the source :ref:`Process Description ` that produced returned :term:`Job` outputs. + +In the event of a :term:`Job` executed with ``response=document`` or ``Prefer: return=minimal``, the contents +of :ref:`proc_op_job_results` will be very similar to the above :ref:`Output Mapping ` contents, +but with respective ``{outputID}`` returned directly at the root, instead of nesting them under ``outputs``. +On the other hand, a :term:`Job` submitted with ``response=raw`` or ``Prefer: return=representation`` can produce +many alternative content variations (see :ref:`proc_exec_results`) to respect :term:`OGC` compliance requirements, +according to the number of requested ``outputs``, submitted request parameters, and the respective :term:`Media-Type`, +schema or literal data of each output. +For this reason, the :ref:`proc_op_job_outputs` endpoint will always provide all data and file references in the +response body using the minimal representation as shown by above :term:`JSON` examples, no matter which request +parameters where originally submitted to execute the :term:`Job`. +In other words, the contents of the "``output_netcdf.nc``" file will never be directly +returned inline/by-value in the :term:`JSON` response when using the :ref:`proc_op_job_outputs` endpoint, +and will always use the ``document``/``minimal`` file links. This is done to offer a simplified data access mechanism +without having to deal will all possible combinations of data representations potentially returned +by :ref:`proc_exec_results`. .. _proc_op_job_results: @@ -2040,8 +2051,13 @@ Job Results ~~~~~~~~~~~ This corresponds to the :term:`OGC API - Processes` compliant endpoint, using the |results-req| request. -Contrary to :ref:`proc_op_job_outputs`, where the :term:`JSON` representation is always enforced, this endpoint -will respond according to the submitted :term:`Job` parameters, as described in :ref:`proc_exec_results`. +Contrary to :ref:`proc_op_job_outputs`, where the :term:`JSON` ``document`` representation is always enforced, +this endpoint will respond according to the submitted :term:`Job` parameters. + +.. seealso:: + This section presents examples of the most typical result combinations. + For an exhaustive list of expected content results and resolution behaviors, + according to submitted execution parameters, refer to the :ref:`proc_exec_results` section. In the event of a :term:`Job` executed with ``response=document`` or ``Prefer: return=minimal`` with multiple outputs, the contents will typically be a :term:`JSON` mapping representation, where each *requested* ``{outputID}`` can be @@ -2057,16 +2073,19 @@ to :ref:`proc_exec_results`. An example of such results is presented below. The ``{outputID}`` are returned at the root of the contents using this representation, contrary to the :ref:`proc_op_job_outputs` endpoint that nests them under ``outputs``. -When a :term:`Job` is executed with ``response=raw``, or when the *requested* ``outputs``[#n_out]_ consisted only of +When a :term:`Job` is executed with ``response=raw``, or when the *requested* ``outputs`` [#outN]_ consisted only of a single ``{outputID}``, the returned data will directly be the contents of the produced file, or literal value, as applicable according to the ``schema`` definition of the corresponding output in the :ref:`Process Description `. The following result will be obtained if any of the following conditions are encountered: -1. The result is a :ref:`File Reference ` and the ``Prefer: return=representation`` header was used -2. The result is a :ref:`File Reference ` and the ``transmissionMode: value`` parameter was used -3. The result is a literal data type, whether or not ``Prefer``/``transmissionMode`` were specified with above values. +1. The result is a complex :ref:`File Reference ` + and the ``Prefer: return=representation`` header was used +2. The result is a complex :ref:`File Reference ` + and the ``transmissionMode: value`` parameter was used +3. The result is a literal data type, + whether or not ``Prefer``/``transmissionMode`` were specified with above values. .. literalinclude:: ../examples/job_results_raw_single_data.http :language: http @@ -2075,9 +2094,12 @@ The following result will be obtained if any of the following conditions are enc The following result will be obtained if any of the following conditions are encountered: -1. The result is a :ref:`File Reference ` and the ``Prefer: return=minimal`` header was used -2. The result is a :ref:`File Reference ` and the ``transmissionMode: reference`` parameter was used -3. The result is a literal data type, and any above ``Prefer``/``transmissionMode`` value is *explicitly* requested. +1. The result is a complex :ref:`File Reference ` + and the ``Prefer: return=minimal`` header was used +2. The result is a complex :ref:`File Reference ` + and the ``transmissionMode: reference`` parameter was used +3. The result is a literal data type, + and any above ``Prefer``/``transmissionMode`` value is *explicitly* requested. .. literalinclude:: ../examples/job_results_raw_single_ref.http :language: http @@ -2085,10 +2107,10 @@ The following result will be obtained if any of the following conditions are enc :name: job-results-raw-single-ref When the number of *requested* ``outputs`` [#outN]_ is more than one, the response will either be -multipart contents or similar to the first ``document`` :term:`JSON` structure, accordingly to the -negotiated ``Content-Type``. An example of a multipart representation is shown below. +multipart contents or similar to the ``document`` :term:`JSON` structure :ref:`example `, +accordingly to the negotiated ``Accept`` content header. An example of a multipart representation is shown below. The resolution of the nested outputs within each boundary, either by value or reference, will resolve -for each respective output according to the same rules combinations specified above for single output. +for each respective output according to the same rule combinations specified above for single output. .. literalinclude:: ../examples/job_results_raw_multi.http :language: mime @@ -2104,7 +2126,8 @@ provided directly. If the ``transmissionMode: value`` under ``output-file`` in the *requested* ``outputs`` [#outN]_ or ``Prefer: return=representation`` were used, the data of the file would be directly included inline within the -response instead of using ``Content-Location``, similarly to the :ref:`job-results-raw-single-data` example, +response instead of using ``Content-Location``, similarly to +the :ref:`Single Output Value ` example, but nested within its respective ``Content-ID: output-file`` multipart bounds. .. _proc_op_job_inputs: @@ -2112,15 +2135,19 @@ but nested within its respective ``Content-ID: output-file`` multipart bounds. Job Inputs ~~~~~~~~~~~ -In order to better understand the parameters that were submitted during :term:`Job` creation, the |inputs-req|_ -can be employed. This will return both the data and reference ``inputs`` that were submitted, as well as -the *requested* ``outputs`` to retrieve any relevant ``transmissionMode``, ``format``, etc. parameters -that where specified during submission of the :ref:`proc_exec_body`. - +In order to better understand the parameters that were *originally* submitted during :term:`Job` creation, +the |inputs-req|_ can be employed. This will return both the data and reference ``inputs`` that were submitted, +as well as the *requested* ``outputs`` [#outN]_ to retrieve any relevant ``transmissionMode``, ``format``, etc. +parameters that where specified during submission of the :ref:`proc_exec_body`. +For convenience, this endpoint also returns relevant ``links`` applicable for the requested :term:`Job`. -.. fixme: -.. todo:: add job inputs JSON example +.. literalinclude:: ../examples/job_inputs.json + :language: json + :caption: Example :term:`JSON` Response from :term:`Job` Inputs + :name: job-inputs +.. note:: + The ``links`` presented above are not an exhaustive list to keep the example relatively small. .. _proc_op_job_error: .. _proc_op_job_exceptions: @@ -2167,7 +2194,8 @@ Note again that the more the :term:`Process` is verbose, the more tracking will .. literalinclude:: ../../weaver/wps_restapi/examples/job_logs.json :language: json - + :caption: Example :term:`JSON` Representation of :term:`Job` Logs Response + :name: job-logs .. note:: All endpoints to retrieve any of the above information about a :term:`Job` can either be requested directly From dbc6130df73a541741f6292bb92b9758dab09691 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 16 Sep 2024 17:57:23 -0400 Subject: [PATCH 11/75] add doc details about content link headers --- .../examples/job_results_raw_single_data.http | 1 + docs/examples/job_results_raw_single_ref.http | 8 +-- docs/source/processes.rst | 50 +++++++++---------- 3 files changed, 31 insertions(+), 28 deletions(-) diff --git a/docs/examples/job_results_raw_single_data.http b/docs/examples/job_results_raw_single_data.http index 74a8ea20d..9d7e453f3 100644 --- a/docs/examples/job_results_raw_single_data.http +++ b/docs/examples/job_results_raw_single_data.http @@ -1,5 +1,6 @@ HTTP/1.1 200 OK Host: weaver.example.com +Content-ID: output Content-Type: application/x-netcdf diff --git a/docs/examples/job_results_raw_single_ref.http b/docs/examples/job_results_raw_single_ref.http index f6ae1da40..a82be4a77 100644 --- a/docs/examples/job_results_raw_single_ref.http +++ b/docs/examples/job_results_raw_single_ref.http @@ -1,5 +1,7 @@ -HTTP/1.1 200 OK +HTTP/1.1 204 No Content Host: weaver.example.com -Content-Type: application/x-netcdf Content-Length: 0 -Content-Location: https://example.com/wpsoutputs/f93a15be-6e16-11ea-b667-08002752172a/output_netcdf.nc +Content-Type: application/x-netcdf +Content-ID: output +Content-Location: https://example.com/wpsoutputs/f93a15be-6e16-11ea-b667-08002752172a/output/output_netcdf.nc +Link: ; rel="output"; type="application/x-netcdf" diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 6874776b8..086df69a2 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -706,12 +706,6 @@ omit this output from the produced results. .. fixme: .. todo:: - For the time being, because only ``reference`` representation is offered for produced output files, this - filtering is not implemented as it offers no additional advantage for files accessed directly with their - distinct links. - This could be added later if ``Multipart`` raw data representation is required. - Please |submit-issue|_ to request this feature if it is relevant for your use-cases. - Filtering of ``outputs`` not implemented (everything always available). https://github.com/crim-ca/weaver/issues/380 @@ -920,8 +914,9 @@ Following is a detailed listing of the expected response structure according to The actual HTTP *Response* returned from the execution endpoint will depend on the requested :ref:`proc_exec_mode`. A :term:`Job` successfully resolved with `synchronous` execution will return the *Results* shown in the table - directly with a *HTTP 200 OK* status, whereas an `asynchronous` execution will always return a - :ref:`Job Status ` *Response* with *HTTP 201 Created* or *HTTP 202 Accepted* status. + directly with a *HTTP 200 OK* or *HTTP 204 No Content* status (as applicable), whereas an `asynchronous` execution + will always return a :ref:`Job Status ` *Response* with *HTTP 201 Created* or *HTTP 202 Accepted* + status (accordingly if the :term:`Job` started immediately or is still pending). In the case of a successfully completed `asynchronous` execution, a subsequent :ref:`Results Request ` using the :term:`Job` ``Location`` is needed to obtain the *Results* presented in the above table. @@ -1003,23 +998,14 @@ Following is a detailed listing of the expected response structure according to for cases where *every requested output* uses the default matching the specified or resolved ``transmissionMode`` (i.e.: ``value`` for literal data, ``reference`` for complex data). -.. fixme: - reword below, above table results identical for Prefer/mode sync/async, - except that returned directly for sync, and via results endpoint in async - describe that sync can still access results afterward, as if async was used - -The ``response`` parameter defines how to return the results produced by the :term:`Process`. -When ``response=document``, regardless of ``mode=async`` or ``mode=sync``, and regardless of requested -outputs ``transmissionMode=value`` or ``transmissionMode=reference``, the results will be returned in -a :term:`JSON` format containing either literal values or URL references to produced files. If ``mode=async``, -this results *document* is obtained with |results-req|_ request, while ``mode=sync`` returns it directly. -When ``response=raw``, the specific contents (type and quantity), HTTP ``Link`` headers or a mix of those components -depends both on the number of available :term:`Process` outputs, which ones were requested, and how they were -requested (i.e.: ``transmissionMode``). It is also possible that further content negotiation gets involved -accordingly to the ``Accept`` header and available ``Content-Type`` of the outputs if multiple formats are supported -by the :term:`Process`. For more details regarding those combination, the official -|ogc-api-proc-exec-responses-sync|_ and |ogc-api-proc-exec-responses-async|_ should be employed as reference. +In summary, the ``Prefer`` and ``response`` parameters define how to return the results produced by the :term:`Process`. +The ``Prefer`` header is also used by |oap| v2.0 to control how the results are encoded, whereas v1.0 relies on a +separate ``transmissionMode`` parameter. By reducing the amount of parameters involved, v2.0 makes the request easier +to submit with a single header (also used to indicate the :ref:`proc_exec_mode`), but limits certain representation +combinations only possible with v1.0. +.. seealso:: + Examples of typical contents for many of the combinations are provided under the :ref:`proc_op_job_results` section. .. _proc_exec_steps: @@ -2106,8 +2092,22 @@ The following result will be obtained if any of the following conditions are enc :caption: Results for a single output returned directly by reference :name: job-results-raw-single-ref +When results are resolved as ``transmissionMode: reference``, either using ``Prefer: return=minimal`` +or ``response: raw`` parameters, leading to the creation of a :ref:`File Reference ` link +directly returned as above (rather than embedded in a :ref:`Document Result `), +the generated reference will be reported using a HTTP ``Link`` header, for each applicable output, in order to fulfill +|oap| v1.0 `Requirement 30 `_. +However, given that such ``Link`` headers can result into conflicting ``rel: {outputID}`` with other ``Link`` +entries found in the response, and require additional parsing of the value to extract the :term:`URL`, a combination +of ``Content-ID``, ``Content-Type`` and ``Content-Location`` will also be provided. + +.. note:: + For cases where an output would represent an array of :ref:`File References `, returned ``Link`` + headers for each of these links will employ ``rel: "{outputID}.{index}"`` with their respective ``index`` from + the array. + When the number of *requested* ``outputs`` [#outN]_ is more than one, the response will either be -multipart contents or similar to the ``document`` :term:`JSON` structure :ref:`example `, +multipart contents or similar to the :ref:`Document Result ` contents, accordingly to the negotiated ``Accept`` content header. An example of a multipart representation is shown below. The resolution of the nested outputs within each boundary, either by value or reference, will resolve for each respective output according to the same rule combinations specified above for single output. From a8cd906023672b7245c998d4f9d5d89d02627532 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 16 Sep 2024 19:50:26 -0400 Subject: [PATCH 12/75] setup tests for job exec mode/results --- .../EchoResultsTester/deploy.yml | 8 ++ .../EchoResultsTester/package.cwl | 21 ++++ tests/functional/test_wps_package.py | 49 +++++++- tests/test_utils.py | 112 +++++++++++++----- weaver/execute.py | 5 + weaver/utils.py | 12 +- weaver/wps_restapi/swagger_definitions.py | 2 +- 7 files changed, 174 insertions(+), 35 deletions(-) create mode 100644 tests/functional/application-packages/EchoResultsTester/deploy.yml create mode 100644 tests/functional/application-packages/EchoResultsTester/package.cwl diff --git a/tests/functional/application-packages/EchoResultsTester/deploy.yml b/tests/functional/application-packages/EchoResultsTester/deploy.yml new file mode 100644 index 000000000..6aad038e8 --- /dev/null +++ b/tests/functional/application-packages/EchoResultsTester/deploy.yml @@ -0,0 +1,8 @@ +# YAML representation supported by WeaverClient +processDescription: + process: + version: "1.0" # must be string, avoid interpretation as float +executionUnit: + # note: This does not work by itself! The test suite injects the file dynamically. + - href: "tests/functional/application-packages/EchoResultsTester/package.cwl" +deploymentProfileName: "http://www.opengis.net/profiles/eoc/dockerizedApplication" diff --git a/tests/functional/application-packages/EchoResultsTester/package.cwl b/tests/functional/application-packages/EchoResultsTester/package.cwl new file mode 100644 index 000000000..1f54a8f59 --- /dev/null +++ b/tests/functional/application-packages/EchoResultsTester/package.cwl @@ -0,0 +1,21 @@ +cwlVersion: "v1.0" +class: CommandLineTool +baseCommand: echo +requirements: + DockerRequirement: + dockerPull: "debian:stretch-slim" +inputs: + message: + type: string + inputBinding: + position: 1 +outputs: + output_reference: + type: File + outputBinding: + glob: "stdout.log" + output_data: + type: string + outputBinding: + outputEval: $(inputs.message) +stdout: stdout.log diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index beedb58ae..2f0e6de64 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -43,7 +43,13 @@ mocked_wps_output, setup_aws_s3_bucket ) -from weaver.execute import ExecuteCollectionFormat, ExecuteMode, ExecuteResponse, ExecuteTransmissionMode +from weaver.execute import ( + ExecuteCollectionFormat, + ExecuteMode, + ExecuteResponse, + ExecuteReturnPreference, + ExecuteTransmissionMode +) from weaver.formats import ( EDAM_MAPPING, EDAM_NAMESPACE, @@ -3512,6 +3518,44 @@ def test_execute_cwl_enum_schema_combined_type_single_array_from_wps(self, mock_ assert results + def test_execute_single_output_prefer_header_return_representation(self): + body = self.retrieve_payload("EchoResultsTester", "deploy", local=True) + desc = self.deploy_process(body) + + exec_headers = { + "Prefer": f"return={ExecuteReturnPreference.REPRESENTATION}" + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + raise NotImplementedError # FIXME: implement + + def test_execute_single_output_prefer_header_return_minimal(self): + raise NotImplementedError # FIXME: implement + + def test_execute_single_output_response_raw_value(self): + raise NotImplementedError # FIXME: implement + + def test_execute_single_output_response_raw_reference(self): + raise NotImplementedError # FIXME: implement + + def test_execute_multi_output_prefer_header_return_representation(self): + raise NotImplementedError # FIXME: implement + + def test_execute_multi_output_prefer_header_return_minimal(self): + raise NotImplementedError # FIXME: implement + + def test_execute_multi_output_response_raw_value(self): + raise NotImplementedError # FIXME: implement + + def test_execute_multi_output_response_raw_reference(self): + raise NotImplementedError # FIXME: implement + + def test_execute_multi_output_response_raw_mixed(self): + raise NotImplementedError # FIXME: implement + + # FIXME: implement other variations as well... see doc 'Execution Results' combinations + @pytest.mark.functional class WpsPackageAppWithS3BucketTest(WpsConfigBase, ResourcesUtil): @@ -3666,13 +3710,12 @@ def test_execute_application_package_process_with_bucket_results(self): assert not os.path.exists(os.path.join(wps_outdir, wps_uuid, out_file)) assert os.path.isfile(os.path.join(wps_outdir, f"{job_id}.xml")) - # FIXME: implement @pytest.mark.skip(reason="OAS execute parse/validate values not implemented") def test_execute_job_with_oas_validation(self): """ Process with :term:`OpenAPI` I/O definitions validates the schema of the submitted :term:`JSON` data. """ - raise NotImplementedError + raise NotImplementedError # FIXME: implement @mocked_aws_config @mocked_aws_s3 diff --git a/tests/test_utils.py b/tests/test_utils.py index 9d086a845..6d7f1cfd9 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -23,6 +23,7 @@ from beaker.cache import cache_region from mypy_boto3_s3.literals import RegionName from pyramid.httpexceptions import ( + HTTPBadRequest, HTTPConflict, HTTPCreated, HTTPError as PyramidHTTPError, @@ -47,7 +48,7 @@ setup_test_file_hierarchy ) from weaver import xml_util -from weaver.execute import ExecuteControlOption, ExecuteMode +from weaver.execute import ExecuteControlOption, ExecuteMode, ExecuteReturnPreference from weaver.formats import ContentEncoding, ContentType, repr_json from weaver.status import JOB_STATUS_CATEGORIES, STATUS_PYWPS_IDS, STATUS_PYWPS_MAP, Status, StatusCompliant, map_status from weaver.utils import ( @@ -2121,37 +2122,92 @@ def test_parse_kvp(query, params, expected): assert result == expected -@pytest.mark.parametrize(["headers", "support", "expected"], [ - # both modes supported (sync attempted upto max/specified wait time, unless async requested explicitly) - ({}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], - (ExecuteMode.SYNC, 10, {})), - ({"Prefer": ""}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], - (ExecuteMode.SYNC, 10, {})), - ({"Prefer": "respond-async"}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], - (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), - ({"Prefer": "respond-async, wait=4"}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], - (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), - ({"Prefer": "wait=4"}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], - (ExecuteMode.SYNC, 4, {"Preference-Applied": "wait=4"})), - ({"Prefer": "wait=20"}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], - (ExecuteMode.ASYNC, None, {})), # larger than max time - # only supported async (enforced) - original behaviour - ({}, [ExecuteControlOption.ASYNC], - (ExecuteMode.ASYNC, None, {})), - ({"Prefer": ""}, [ExecuteControlOption.ASYNC], - (ExecuteMode.ASYNC, None, {})), - ({"Prefer": "respond-async"}, [ExecuteControlOption.ASYNC], - (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), - ({"Prefer": "respond-async, wait=4"}, [ExecuteControlOption.ASYNC], - (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), - ({"Prefer": "wait=4"}, [ExecuteControlOption.ASYNC], - (ExecuteMode.ASYNC, None, {})), -]) -def test_prefer_header_execute_mode(headers, support, expected): +@pytest.mark.parametrize( + ["headers", "support", "expected", "extra_prefer"], + [ + # both modes supported (sync attempted upto max/specified wait time, unless async requested explicitly) + ({}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], (ExecuteMode.SYNC, 10, {}), ""), + # only supported async (enforced) - original behaviour + ({}, [ExecuteControlOption.ASYNC], (ExecuteMode.ASYNC, None, {}), ""), + ] + + [ + (_headers, _support, _expected, _extra) + for (_headers, _support, _expected), _extra + in itertools.product( + [ + # both modes supported (sync attempted upto max/specified wait time, unless async requested explicitly) + ({"Prefer": ""}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], + (ExecuteMode.SYNC, 10, {})), + ({"Prefer": "respond-async"}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], + (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), + ({"Prefer": "respond-async, wait=4"}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], + (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), + ({"Prefer": "wait=4"}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], + (ExecuteMode.SYNC, 4, {"Preference-Applied": "wait=4"})), + ({"Prefer": "wait=20"}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], + (ExecuteMode.ASYNC, None, {})), # larger than max time + # only supported async (enforced) - original behaviour + ({"Prefer": ""}, [ExecuteControlOption.ASYNC], + (ExecuteMode.ASYNC, None, {})), + ({"Prefer": "respond-async"}, [ExecuteControlOption.ASYNC], + (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), + ({"Prefer": "respond-async, wait=4"}, [ExecuteControlOption.ASYNC], + (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), + ({"Prefer": "wait=4"}, [ExecuteControlOption.ASYNC], + (ExecuteMode.ASYNC, None, {})), + + ], + [ + "", + f"return={ExecuteReturnPreference.MINIMAL}", + f"return={ExecuteReturnPreference.REPRESENTATION}" + # FIXME: + # Support with added ``Prefer: handling=strict`` or ``Prefer: handling=lenient`` + # https://github.com/crim-ca/weaver/issues/701 + ] + ) + ] +) +def test_prefer_header_execute_mode(headers, support, expected, extra_prefer): + if extra_prefer and "Prefer" in headers: + headers["Prefer"] += f", {extra_prefer}" if headers["Prefer"] else extra_prefer result = parse_prefer_header_execute_mode(headers, support) assert result == expected +@pytest.mark.parametrize( + ["headers", "expected"], + [ + # 1st variant is considered as 1 Prefer header with all values supplied simultaneously + # 2nd variant is considered as 2 Prefer headers, each with their respective value + # (this is because urllib, under the hood, concatenates the list of header-values using ';' separator) + ({"Prefer": "respond-async, wait=4"}, (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), + ({"Prefer": "respond-async; wait=4"}, (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), + ] +) +def test_parse_prefer_header_execute_mode_flexible(headers, expected): + """ + Ensure that the ``Prefer`` header supplied multiple times (allowed by :rfc:`7240`) is handled correctly. + """ + result = parse_prefer_header_execute_mode(headers, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC]) + assert result == expected + + +@pytest.mark.parametrize("prefer_header", [ + "wait=10s", + "wait=3.1416", + "wait=yes", + "wait=1,2,3", # technically, gets parsed as 'wait=1' (valid) and other '2', '3' parameters on their own + "wait=1;2;3", + "wait=1, wait=2", + "wait=1; wait=2", +]) +def test_parse_prefer_header_execute_mode_invalid(prefer_header): + headers = {"Prefer": prefer_header} + with pytest.raises(HTTPBadRequest): + parse_prefer_header_execute_mode(headers, [ExecuteControlOption.ASYNC]) + + @pytest.mark.parametrize(["number", "binary", "unit", "expect"], [ (1.234, False, "B", "1.234 B"), (10_000_000, False, "B", "10.000 MB"), diff --git a/weaver/execute.py b/weaver/execute.py index 89fc2d66a..dfb7df14b 100644 --- a/weaver/execute.py +++ b/weaver/execute.py @@ -25,6 +25,11 @@ def values(cls): return [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC] +class ExecuteReturnPreference(Constants): + MINIMAL = "minimal" + REPRESENTATION = "representation" + + class ExecuteResponse(Constants): RAW = "raw" DOCUMENT = "document" diff --git a/weaver/utils.py b/weaver/utils.py index 56ab84be6..246d69a75 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -709,7 +709,7 @@ def parse_kvp(query, # type: str """ Parse key-value pairs using specified separators. - All values are normalized under a list, whether their have an unique or multi-value definition. + All values are normalized under a list, whether they have a unique or multi-value definition. When a key is by itself (without separator and value), the resulting value will be an empty list. When :paramref:`accumulate_keys` is enabled, entries such as ``{key}={val};{key}={val}`` will be joined together @@ -851,6 +851,10 @@ def parse_prefer_header_execute_mode( wait = wait_max if "wait" in params: try: + if any(param.isnumeric() for param in params): + # 'wait=x,y,z' parsed as 'wait=x' and 'y' / 'z' parameters on their own + # since 'wait' is the only referenced that users integers, it is guaranteed to be a misuse + raise ValueError("Invalid 'wait' with comma-separated values.") if not len(params["wait"]) == 1: raise ValueError("Too many values.") wait = params["wait"][0] @@ -881,7 +885,7 @@ def parse_prefer_header_execute_mode( if auto == mode: if auto == ExecuteMode.ASYNC: applied_preferences.append("respond-async") - if wait: + if wait and "wait" in params: applied_preferences.append(f"wait={wait}") # /rec/core/process-execute-honor-prefer (A: async & B: wait) # https://datatracker.ietf.org/doc/html/rfc7240#section-3 @@ -895,8 +899,10 @@ def parse_prefer_header_execute_mode( if len(supported_modes) == 2: if auto == ExecuteMode.ASYNC: return ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"} - if wait: + if wait and "wait" in params: return ExecuteMode.SYNC, wait, {"Preference-Applied": f"wait={wait}"} + if wait: # default used, not a supplied preference + return ExecuteMode.SYNC, wait, {} return ExecuteMode.ASYNC, None, {} diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index df4777b96..9d8d085b5 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -7113,7 +7113,7 @@ class PreferenceAppliedHeader(ExtendedSchemaNode): description = "Applied preferences from submitted 'Prefer' header after validation." name = "Preference-Applied" schema_type = String - example = "wait=10s, respond-async" + example = "wait=10, respond-async" class LocationHeader(URL): From 3c1e4cae5c6b22381d9a698f7af54f161ab9807b Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 17 Sep 2024 14:58:12 -0400 Subject: [PATCH 13/75] fix docs invalid combinations of prefer/response + more docs precisions about results resolution --- docs/source/processes.rst | 68 +++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 24 deletions(-) diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 086df69a2..96e949508 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -832,17 +832,17 @@ Following is a detailed listing of the expected response structure according to +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | |none| | |none| | |none| | 1 | |res-accept| |res-fmt-warn|_ | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ - | ``representation`` | ``raw`` | |none| | 1 | - |res-accept| | - | | | | | - |res-auto| [#resValRef]_ | + | |na| | ``raw`` | |none| | 1 | - |res-accept| | + | [#resPreferReturn]_ | | | | - |res-auto| [#resValRef]_ | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | ``representation`` | ``raw`` | ``value`` | 1 | - |res-accept| | - | [#resPreferReturn]_ | | | (literal) | - |res-data|_ | + | | | | (literal) | - |res-data|_ | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ - | ``representation`` | ``raw`` | ``reference`` | 1 | - |res-accept| | + | |na| | ``raw`` | ``reference`` | 1 | - |res-accept| | | [#resPreferReturn]_ | | | (complex) | - |res-ref|_ | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ - | |na| | ``raw`` | ``value`` | 1 | - |res-accept| | - | [#resPreferReturn]_ | | | (complex) | - |res-data|_ | + | ``representation`` | ``raw`` | ``value`` | 1 | - |res-accept| | + | | | | (complex) | - |res-data|_ | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | |na| | ``raw`` | ``reference`` | 1 | - |res-accept| | | [#resPreferReturn]_ | | | (literal) | - |res-ref|_ | @@ -851,14 +851,22 @@ Following is a detailed listing of the expected response structure according to | | | | | content by default [#resCTypeMulti]_ | | | | | | - otherwise, |res-accept| |res-fmt-warn|_ | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ - | ``representation`` | ``raw`` | |none| | >1 | - :ref:`Multipart ` | - | | | | | content [#resCTypeMulti]_ | + | |na| | ``raw`` | |none| | >1 | - :ref:`Multipart ` | + | [#resPreferReturn]_ | | | | content [#resCTypeMulti]_ | | | | | | - |res-auto| [#resValRef]_ | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | |na| | ``raw`` | ``value`` | >1 | - :ref:`Multipart ` | - | [#resPreferReturn]_ | | *or* | | content [#resCTypeMulti]_ | - | | | ``reference`` | | - using embedded content part data/link | - | | | | | as requested by |out-mode| [#resValRefForce]_ | + | [#resPreferReturn]_ | | *and* | | content [#resCTypeMulti]_ | + | | | ``reference`` | | - using embedded content parts with data/link | + | | | (``mixed``) | | as requested by |out-mode| [#resValRefForce]_ | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ + | ``representation`` | ``raw`` | ``value`` | >1 | - :ref:`Multipart ` | + | | | (for *all*) | | content [#resCTypeMulti]_ | + | | | | | - using embedded content parts with data | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ + | |na| | ``raw`` | ``reference`` | >1 | - :ref:`Multipart ` | + | ``representation`` | | (for *all*) | | content [#resCTypeMulti]_ | + | | | | | - using embedded content parts with data | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | |none| | ``document`` | |none| | |any| | - :ref:`Results ` | | | | | | content | @@ -869,11 +877,11 @@ Following is a detailed listing of the expected response structure according to | | | | | - |res-auto| [#resValRef]_ | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | ``minimal`` | ``document`` | ``value`` | |any| | - :ref:`Results ` | - | [#resPreferReturn]_ | | | (literal) | content | + | | | | (literal) | content | | | | | | - using data included inline | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | ``minimal`` | ``document`` | ``reference`` | |any| | - :ref:`Results ` | - | [#resPreferReturn]_ | | | (complex) | content | + | | | | (complex) | content | | | | | | - using file link reference | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | |na| | ``document`` | ``value`` | |any| | - :ref:`Results ` | @@ -904,7 +912,9 @@ Following is a detailed listing of the expected response structure according to simultaneously (although permitted), since they should be interchangeable in most situations. The table indicates both |oap| v1.0/v2.0 variations to illustrate which combinations lead to the **same result**. If a client happens to use both combination simultaneously, the body parameters will take precedence - over the ``Prefer`` header. + over the ``Prefer`` header for conflicting cases. This is in order to respect the fact that body parameters + are "*hard requirements*", whereas ``Prefer`` is a "*soft requirement*" (i.e.: a preference) that does not + necessarily need to be respected if the server cannot resolve the combination. .. |res-important| replace:: :sup:`(see: important note)` .. _res-important: @@ -962,14 +972,23 @@ Following is a detailed listing of the expected response structure according to the ``transmissionMode`` do not apply by definition. .. [#resCTypeMulti] - The data of the multiple outputs are simultaneously returned, but their encoding depend on the requested ``Accept`` - header. By default, the :ref:`Results ` structure encoded as :term:`JSON` is employed. - However, the :ref:`Results for Multiple Outputs ` example using ``multipart/related`` - contents could also be obtained if requested, or as established by using other parameter combinations. - Other content representations, such as packaging the results under a single ZIP archive, could also be returned - if requested. However, alternate representations might not allow some ``transmissionMode`` combinations according - to their logical representation (e.g.: a ZIP archive could refuse ``transmissionMode: reference`` to only allow - files to be directly included in the ZIP, rather than link references to them). + When data of multiple outputs are simultaneously returned, their encoding depend on the requested ``Accept`` header. + By default (when neither of ``Prefer`` or ``response`` are provided to establish the contents structure to employ), + the :ref:`Results Document ` encoded as :term:`JSON` is employed. A similar + representation using other encoding (e.g.: :term:`XML` or :term:`YAML`) could be returned if requested by + the ``Accept`` header. + + For every other case where a return ``representation`` or ``raw`` results are explicitly requested, + the :ref:`Multipart Results ` structure using ``multipart/related`` contents + is employed by default. The representation of each part (as literal data or link reference [#resValRef]_) + is established by the ``transmissionMode`` parameter combinations, or as applicable according to the ``Accept`` + and the ``Prefer: return`` header. + + Other content representations, such as packaging the results under a single ZIP archive, could also be + returned if requested with the ``Accept`` header and supported according to the :term:`Process` description. + However, alternate representations might not allow certain ``transmissionMode`` combinations according to + their logical representation (e.g.: a ZIP archive could refuse ``transmissionMode: reference`` to only allow + files to be directly included within the ZIP, rather than link references to them). .. [#resValRef] Although the general "*response structure*" is established by other parameters in this case, whether respective @@ -994,8 +1013,9 @@ Following is a detailed listing of the expected response structure according to Using only the |oap| v2.0 ``Prefer: return`` header parameter, it is not always possible to *enforce* every result combination as when using |oap| v1.0 parameters. More specifically, it is not possible to replicate cases where a requested output specifies a ``transmissionMode`` using an *opposite* representation from its - "*default minimum*" representation of literal or complex data. However, ``Prefer: return`` header is equivalent - for cases where *every requested output* uses the default matching the specified or resolved ``transmissionMode`` + "*default*" ``minimum`` or ``representation`` contents of literal or complex data. However, ``Prefer: return`` + header is equivalent for cases where *every requested output* is returned with a representation that matches + the default data type representation of specified by ``transmissionMode`` (i.e.: ``value`` for literal data, ``reference`` for complex data). In summary, the ``Prefer`` and ``response`` parameters define how to return the results produced by the :term:`Process`. From 2b57583b3b0174004a1c576851e318374ae28afd Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 17 Sep 2024 14:59:04 -0400 Subject: [PATCH 14/75] =?UTF-8?q?[wip]=C2=A0define=20test=20combinations?= =?UTF-8?q?=20for=20some=20prefer/return=20combinations?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../EchoResultsTester/package.cwl | 26 +++- tests/functional/test_wps_package.py | 121 +++++++++++++++++- tests/test_formats.py | 4 +- weaver/formats.py | 7 +- weaver/wps_restapi/swagger_definitions.py | 6 +- 5 files changed, 145 insertions(+), 19 deletions(-) diff --git a/tests/functional/application-packages/EchoResultsTester/package.cwl b/tests/functional/application-packages/EchoResultsTester/package.cwl index 1f54a8f59..08d6ace3c 100644 --- a/tests/functional/application-packages/EchoResultsTester/package.cwl +++ b/tests/functional/application-packages/EchoResultsTester/package.cwl @@ -4,18 +4,34 @@ baseCommand: echo requirements: DockerRequirement: dockerPull: "debian:stretch-slim" + InlineJavascriptRequirement: {} + InitialWorkDirRequirement: + listing: + - entryname: result.json + entry: | + {"data":"$(inputs.message)"} + - entryname: result.txt + entry: | + $(inputs.message) inputs: message: type: string inputBinding: position: 1 outputs: - output_reference: - type: File - outputBinding: - glob: "stdout.log" output_data: type: string outputBinding: outputEval: $(inputs.message) -stdout: stdout.log + output_text: + type: File + outputBinding: + glob: result.txt + format: "iana:text/plain" + output_json: + type: File + outputBinding: + glob: result.json + format: "iana:application/json" +$namespaces: + iana: "https://www.iana.org/assignments/media-types/" diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 2f0e6de64..8b28f951c 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -7,11 +7,14 @@ .. seealso:: - :mod:`tests.processes.wps_package`. """ +import inspect + import contextlib import copy import json import logging import os +import re import shutil import tempfile from inspect import cleandoc @@ -71,7 +74,7 @@ ) from weaver.processes.types import ProcessType from weaver.status import Status -from weaver.utils import fetch_file, get_any_value, get_path_kvp, load_file +from weaver.utils import fetch_file, get_any_value, get_path_kvp, load_file, parse_kvp from weaver.wps.utils import get_wps_output_dir, get_wps_output_url, map_wps_output_location from weaver.wps_restapi import swagger_definitions as sd @@ -3519,16 +3522,49 @@ def test_execute_cwl_enum_schema_combined_type_single_array_from_wps(self, mock_ assert results def test_execute_single_output_prefer_header_return_representation(self): - body = self.retrieve_payload("EchoResultsTester", "deploy", local=True) - desc = self.deploy_process(body) + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) exec_headers = { - "Prefer": f"return={ExecuteReturnPreference.REPRESENTATION}" + "Prefer": f"return={ExecuteReturnPreference.REPRESENTATION}, respond-async" + } + exec_headers.update(self.json_headers) + exec_content = { + "inputs": { + "message": "test" + }, + "outputs": { + "output_json": {} # no 'transmissionMode' to auto-resolve 'value' from 'return=representation' + } } with contextlib.ExitStack() as stack: for mock_exec in mocked_execute_celery(): stack.enter_context(mock_exec) - raise NotImplementedError # FIXME: implement + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # request status instead of results since not expecting 'document' JSON in this case + status_url = resp.json["location"] + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.SUCCEEDED + + job_id = status["jobID"] + out_url = get_wps_output_url(self.settings) + results = self.app.get(f"/jobs/{job_id}/results") + assert results.content_type.startswith(ContentType.APP_JSON) + outputs = self.app.get(f"/jobs/{job_id}/outputs") + output_json = json.dumps({"data": "test"}, separators=(",", ":")) + assert results.text == output_json + assert outputs.json == { + "output_json": { + "href": f"{out_url}/{job_id}/output_json/output.json", + "type": ContentType.APP_JSON, + }, + } def test_execute_single_output_prefer_header_return_minimal(self): raise NotImplementedError # FIXME: implement @@ -3540,7 +3576,68 @@ def test_execute_single_output_response_raw_reference(self): raise NotImplementedError # FIXME: implement def test_execute_multi_output_prefer_header_return_representation(self): - raise NotImplementedError # FIXME: implement + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Prefer": f"return={ExecuteReturnPreference.REPRESENTATION}, respond-async" + } + exec_headers.update(self.json_headers) + exec_content = { + "inputs": { + "message": "test" + }, + "outputs": { + # no 'transmissionMode' to auto-resolve 'value' from 'return=representation' + # request multiple outputs, but not 'all', to test filter behavior at the same time + # use 1 expected as 'File' and 1 'string' literal to test conversion to raw 'value' + "output_json": {}, + "output_data": {} + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # request status instead of results since not expecting 'document' JSON in this case + status_url = resp.json["location"] + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.SUCCEEDED + + job_id = status["jobID"] + out_url = get_wps_output_url(self.settings) + results = self.app.get(f"/jobs/{job_id}/results") + assert results.content_type.startswith(ContentType.MULTIPART_RELATED) + boundary = parse_kvp(results.content_type)["boundary"][0] + outputs = self.app.get(f"/jobs/{job_id}/outputs") + output_json = json.dumps({"data": "test"}, separators=(",", ":")) + results_body = inspect.cleandoc(f""" + --{boundary} + Content-Type: {ContentType.TEXT_PLAIN} + Content-ID: output_data + + test + --{boundary} + Content-Type: {ContentType.APP_JSON} + Content-ID: output_json + + {output_json} + --{boundary}-- + """) + assert results.text == results_body + assert outputs.json["outputs"] == { + "output_data": "test", + "output_json": { + "href": f"{out_url}/{job_id}/output_json/output.json", + "type": ContentType.APP_JSON, + }, + } def test_execute_multi_output_prefer_header_return_minimal(self): raise NotImplementedError # FIXME: implement @@ -3554,6 +3651,18 @@ def test_execute_multi_output_response_raw_reference(self): def test_execute_multi_output_response_raw_mixed(self): raise NotImplementedError # FIXME: implement + def test_execute_multi_output_response_document_defaults(self): + """ + Test ``response: document`` with default ``transmissionMode`` resolutions for literal/complex outputs. + """ + raise NotImplementedError # FIXME: implement + + def test_execute_multi_output_response_document_mixed(self): + """ + Test ``response: document`` with ``transmissionMode`` specified to force convertion of literal/complex outputs. + """ + raise NotImplementedError # FIXME: implement + # FIXME: implement other variations as well... see doc 'Execution Results' combinations diff --git a/tests/test_formats.py b/tests/test_formats.py index 26880f242..d381f61a7 100644 --- a/tests/test_formats.py +++ b/tests/test_formats.py @@ -232,7 +232,7 @@ def test_get_format(test_content_type, expected_content_type, expected_content_e [ f.ContentType.APP_OCTET_STREAM, f.ContentType.APP_FORM, - f.ContentType.MULTI_PART_FORM, + f.ContentType.MULTIPART_FORM, ] ) def test_get_format_media_type_no_extension(test_extension): @@ -289,7 +289,7 @@ def test_get_format_media_type_from_schema(test_format, expect_media_type): [ f.ContentType.APP_OCTET_STREAM, f.ContentType.APP_FORM, - f.ContentType.MULTI_PART_FORM, + f.ContentType.MULTIPART_FORM, ] ) ) diff --git a/weaver/formats.py b/weaver/formats.py index 4d516fa9e..0014bb823 100644 --- a/weaver/formats.py +++ b/weaver/formats.py @@ -116,7 +116,8 @@ class ContentType(Constants): IMAGE_GIF = "image/gif" IMAGE_PNG = "image/png" IMAGE_TIFF = "image/tiff" - MULTI_PART_FORM = "multipart/form-data" + MULTIPART_FORM = "multipart/form-data" + MULTIPART_RELATED = "multipart/related" TEXT_ENRICHED = "text/enriched" TEXT_HTML = "text/html" TEXT_PLAIN = "text/plain" @@ -447,12 +448,12 @@ class SchemaRole(Constants): ContentType.APP_DIR: "/", # force href to finish with explicit '/' to mark directory ContentType.APP_OCTET_STREAM: "", ContentType.APP_FORM: "", - ContentType.MULTI_PART_FORM: "", + ContentType.MULTIPART_FORM: "", } _CONTENT_TYPE_EXCLUDE = [ ContentType.APP_OCTET_STREAM, ContentType.APP_FORM, - ContentType.MULTI_PART_FORM, + ContentType.MULTIPART_FORM, ] _EXTENSION_CONTENT_TYPES_OVERRIDES = { ".text": ContentType.TEXT_PLAIN, # common alias to .txt, especially when using format query diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 9d8d085b5..11b46305d 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -766,8 +766,8 @@ class NoContent(ExtendedMappingSchema): class FileUploadHeaders(RequestHeaders): # MUST be multipart for upload content_type = ContentTypeHeader( - example=f"{ContentType.MULTI_PART_FORM}; boundary=43003e2f205a180ace9cd34d98f911ff", - default=ContentType.MULTI_PART_FORM, + example=f"{ContentType.MULTIPART_FORM}; boundary=43003e2f205a180ace9cd34d98f911ff", + default=ContentType.MULTIPART_FORM, description="Desired Content-Type of the file being uploaded.", missing=required) content_length = ContentLengthHeader(description="Uploaded file contents size in bytes.") content_disposition = ContentDispositionHeader(example="form-data; name=\"file\"; filename=\"desired-name.ext\"", @@ -7359,7 +7359,7 @@ class VaultUploadBody(ExtendedSchemaNode): schema_type = String description = "Multipart file contents for upload to the vault." examples = { - ContentType.MULTI_PART_FORM: { + ContentType.MULTIPART_FORM: { "summary": "Upload JSON file to vault as multipart content.", "value": EXAMPLES["vault_file_upload.txt"], } From a8ea2f338de8c89145c31dccbec362a6e4ad4bbb Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 17 Sep 2024 21:49:28 -0400 Subject: [PATCH 15/75] more docs clarifications about job outputs/results --- docs/examples/job_results_raw_multi.http | 8 +- .../examples/job_results_raw_single_data.http | 2 +- docs/examples/job_results_raw_single_ref.http | 2 +- docs/source/processes.rst | 132 ++++++++++-------- 4 files changed, 83 insertions(+), 61 deletions(-) diff --git a/docs/examples/job_results_raw_multi.http b/docs/examples/job_results_raw_multi.http index 34a807649..9ab72c85e 100644 --- a/docs/examples/job_results_raw_multi.http +++ b/docs/examples/job_results_raw_multi.http @@ -1,15 +1,15 @@ HTTP/1.1 200 OK Host: weaver.example.com -Content-Type: multipart/related; boundary=43003e2f205a180ace9cd34d98f911ff; type=application/x-netcdf +Content-Type: multipart/mixed; boundary=43003e2f205a180ace9cd34d98f911ff --43003e2f205a180ace9cd34d98f911ff Content-Type: application/x-netcdf -Content-ID: output-file -Content-Location: https://example.com/wpsoutputs/f93a15be-6e16-11ea-b667-08002752172a/output_netcdf.nc +Content-ID: +Content-Location: https://example.com/wpsoutputs/f93a15be-6e16-11ea-b667-08002752172a/output-file/output_netcdf.nc --43003e2f205a180ace9cd34d98f911ff Content-Type: text/plain -Content-ID: output-data +Content-ID: 3.1416 --43003e2f205a180ace9cd34d98f911ff-- diff --git a/docs/examples/job_results_raw_single_data.http b/docs/examples/job_results_raw_single_data.http index 9d7e453f3..5166dacd7 100644 --- a/docs/examples/job_results_raw_single_data.http +++ b/docs/examples/job_results_raw_single_data.http @@ -1,6 +1,6 @@ HTTP/1.1 200 OK Host: weaver.example.com -Content-ID: output +Content-ID: Content-Type: application/x-netcdf diff --git a/docs/examples/job_results_raw_single_ref.http b/docs/examples/job_results_raw_single_ref.http index a82be4a77..523ceb601 100644 --- a/docs/examples/job_results_raw_single_ref.http +++ b/docs/examples/job_results_raw_single_ref.http @@ -2,6 +2,6 @@ HTTP/1.1 204 No Content Host: weaver.example.com Content-Length: 0 Content-Type: application/x-netcdf -Content-ID: output +Content-ID: Content-Location: https://example.com/wpsoutputs/f93a15be-6e16-11ea-b667-08002752172a/output/output_netcdf.nc Link: ; rel="output"; type="application/x-netcdf" diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 96e949508..7e668bf02 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -18,6 +18,15 @@ .. |<=>| unicode:: 0x21D4 +.. |synchronous| replace:: *synchronous* +.. |synchronously| replace:: *synchronously* +.. |asynchronous| replace:: *asynchronous* +.. |asynchronously| replace:: *asynchronously* +.. _synchronous: processes.html#execution-mode +.. _synchronously: processes.html#execution-mode +.. _asynchronous: processes.html#execution-mode +.. _asynchronously: processes.html#execution-mode + ********** Processes ********** @@ -717,29 +726,29 @@ the following :ref:`proc_exec_mode` and :ref:`proc_exec_results` sections. Execution Mode ~~~~~~~~~~~~~~~~~~~~~ -In order to select how to execute a :term:`Process`, either `synchronously` or `asynchronously`, the ``Prefer`` header -should be specified. If omitted, `Weaver` defaults to `asynchronous` execution. To execute `asynchronously` explicitly, -``Prefer: respond-async`` should be used. Otherwise, the `synchronous` execution can be requested +In order to select how to execute a :term:`Process`, either |synchronously|_ or |asynchronously|_, the ``Prefer`` header +should be specified. If omitted, `Weaver` defaults to |asynchronous|_ execution. To execute |asynchronously|_ explicitly, +``Prefer: respond-async`` should be used. Otherwise, the |synchronous|_ execution can be requested with ``Prefer: wait=X`` where ``X`` is the duration in seconds to wait for a response. If no worker becomes available within that time, or if this value is greater than the ``weaver.execute_sync_max_wait`` setting (see :ref:`detail `), the :term:`Job` will -resume `asynchronously` and the response will be returned. Furthermore, `synchronous` and `asynchronous` execution of +resume |asynchronously|_ and the response will be returned. Furthermore, |synchronous|_ and |asynchronous|_ execution of a :term:`Process` can only be requested for corresponding ``jobControlOptions`` it reports as supported in its :ref:`Process Description `. It is important to provide the ``jobControlOptions`` parameter with applicable modes when :ref:`Deploying a Process ` to allow it to run as desired. By default, `Weaver` -will assume that deployed processes are only `asynchronous` to handle longer operations. +will assume that deployed processes are only |asynchronous|_ to handle longer operations. .. versionchanged:: 4.15 By default, every :ref:`proc_builtin` :term:`Process` can accept both modes. - All previously deployed processes will only allow `asynchronous` execution, as only this one was supported. + All previously deployed processes will only allow |asynchronous|_ execution, as only this one was supported. This should be reported in their ``jobControlOptions``. .. warning:: It is important to remember that the ``Prefer`` header is indeed a *preference*. If `Weaver` deems it cannot - allocate a worker to execute the task `synchronously` within a reasonable delay, it can enforce the `asynchronous` - execution. The `asynchronous` mode is also *prioritized* for running longer :term:`Job` submitted over the task + allocate a worker to execute the task |synchronously|_ within a reasonable delay, it can enforce the |asynchronous|_ + execution. The |asynchronous|_ mode is also *prioritized* for running longer :term:`Job` submitted over the task queue, as this allows `Weaver` to offer better availability for all requests submitted by its users. - The `synchronous` mode should be reserved only for very quick and relatively low computation intensive operations. + The |synchronous|_ mode should be reserved only for very quick and relatively low computation intensive operations. .. fixme: .. todo:: @@ -764,27 +773,27 @@ becomes available, it will pick any leftover queued :term:`Job` to execute it. with other services using the same standard. The ``mode`` field is deprecated and preserved only for backward compatibility purpose. -When requesting a `synchronous` execution, and provided a worker was available to pick and complete the task before +When requesting a |synchronous|_ execution, and provided a worker was available to pick and complete the task before the maximum ``wait`` time was reached, the final status will be directly returned. Therefore, the contents obtained this way will be identical to any following :ref:`Job Status ` request. If no worker is available, or if the worker that picked the :term:`Job` cannot complete it in time (either because it takes too long to execute or had -to wait on resources for too long), the :term:`Job` execution will automatically switch to `asynchronous` mode. +to wait on resources for too long), the :term:`Job` execution will automatically switch to |asynchronous|_ mode. -The distinction between an `asynchronous` or `synchronous` response when executing a :term:`Job` can be +The distinction between an |asynchronous|_ or |synchronous|_ response when executing a :term:`Job` can be observed in multiple ways. The easiest is with the HTTP status code of the response, 200 being for a :term:`Job` *entirely completed* synchronously, and 201 for a created :term:`Job` that should be :ref:`monitored ` asynchronously. Another method is to observe the ``"status"`` value. -Effectively, a :term:`Job` that is executed `asynchronously` will return status information contents, while -a `synchronous` :term:`Job` will return the results directly, along a ``Location`` header referring to the -equivalent contents returned by :ref:`GetStatus ` as in the case of `asynchronous` :term:`Job`. +Effectively, a :term:`Job` that is executed |asynchronously|_ will return status information contents, while +a |synchronous|_ :term:`Job` will return the results directly, along a ``Location`` header referring to the +equivalent contents returned by :ref:`GetStatus ` as in the case of |asynchronous|_ :term:`Job`. It is also possible to extract the ``Preference-Applied`` response header which will clearly indicate if the submitted ``Prefer`` header was respected (because it could be with available worker resources) or not. In general, this means that if the :term:`Job` submission request was not provided with ``Prefer: wait=X`` **AND** replied with the same ``Preference-Applied`` value, it is safe to assume `Weaver` decided to queue the :term:`Job` -for `asynchronous` execution. That :term:`Job` could be executed immediately, or at a later time, according to +for |asynchronous|_ execution. That :term:`Job` could be executed immediately, or at a later time, according to worker availability. -It is also possible that a ``failed`` :term:`Job`, even when `synchronous`, will respond with equivalent contents +It is also possible that a ``failed`` :term:`Job`, even when |synchronous|_, will respond with equivalent contents to the status location instead of results. This is because it is impossible for `Weaver` to return the result(s) as outputs would not be generated by the incomplete :term:`Job`. @@ -865,7 +874,7 @@ Following is a detailed listing of the expected response structure according to | | | | | - using embedded content parts with data | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | |na| | ``raw`` | ``reference`` | >1 | - :ref:`Multipart ` | - | ``representation`` | | (for *all*) | | content [#resCTypeMulti]_ | + | [#resPreferReturn]_ | | (for *all*) | | content [#resCTypeMulti]_ | | | | | | - using embedded content parts with data | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | |none| | ``document`` | |none| | |any| | - :ref:`Results ` | @@ -923,15 +932,15 @@ Following is a detailed listing of the expected response structure according to It is important not to confuse expected *Results* above with *Responses*. The actual HTTP *Response* returned from the execution endpoint will depend on the requested :ref:`proc_exec_mode`. - A :term:`Job` successfully resolved with `synchronous` execution will return the *Results* shown in the table - directly with a *HTTP 200 OK* or *HTTP 204 No Content* status (as applicable), whereas an `asynchronous` execution + A :term:`Job` successfully resolved with |synchronous|_ execution will return the *Results* shown in the table + directly with a *HTTP 200 OK* or *HTTP 204 No Content* status (as applicable), whereas an |asynchronous|_ execution will always return a :ref:`Job Status ` *Response* with *HTTP 201 Created* or *HTTP 202 Accepted* status (accordingly if the :term:`Job` started immediately or is still pending). - In the case of a successfully completed `asynchronous` execution, a + In the case of a successfully completed |asynchronous|_ execution, a subsequent :ref:`Results Request ` using the :term:`Job` ``Location`` is needed to obtain the *Results* presented in the above table. - Note that a `synchronous` execution can also + Note that a |synchronous|_ execution can also make use of the :ref:`Results Request ` operation to obtain the outputs again at a later time, to request alternate output representations, or retrieve additional :term:`Job` information such as logs and metadata. @@ -979,10 +988,15 @@ Following is a detailed listing of the expected response structure according to the ``Accept`` header. For every other case where a return ``representation`` or ``raw`` results are explicitly requested, - the :ref:`Multipart Results ` structure using ``multipart/related`` contents - is employed by default. The representation of each part (as literal data or link reference [#resValRef]_) + the :ref:`Multipart Results ` structure + using ``multipart`` contents (:rfc:`2046#section-5.1`) is employed by default. + The representation of each part (as literal data or link reference [#resValRef]_) is established by the ``transmissionMode`` parameter combinations, or as applicable according to the ``Accept`` - and the ``Prefer: return`` header. + and the ``Prefer: return`` headers. Alternatively to requesting ``representation`` or ``raw`` results, + the :ref:`Multipart Results ` structure *could* also be requested explicitly + using ``Accept: multipart/*`` or ``Accept: multipart/mixed``. However, this :term:`Media-Type` will only + be respected for the response's contents if the :term:`Job` is executed |synchronously|_, since |asynchronous|_ + execution **MUST** respond with a :ref:`Job Status `, which is typically encoded in :term:`JSON`. Other content representations, such as packaging the results under a single ZIP archive, could also be returned if requested with the ``Accept`` header and supported according to the :term:`Process` description. @@ -1039,7 +1053,7 @@ parametrization details, etc.), followed by ``running`` when effectively reachin :term:`Application Package` operation. This status will remain as such until the operation completes, either with ``succeeded`` or ``failed`` status. -At any moment during `asynchronous` execution, the :term:`Job` status can be requested using |status-req|_. Note that +At any moment during |asynchronous|_ execution, the :term:`Job` status can be requested using |status-req|_. Note that depending on the timing at which the user executes this request and the availability of task workers, it could be possible that the :term:`Job` be already in ``running`` state, or even ``failed`` in case of early problem detected. @@ -1342,7 +1356,7 @@ respected. - |aws_s3_obj_key_names|_ When using |vault_ref| references, the resulting file name will be obtained from the ``filename`` specified in -the ``Content-Disposition`` within the uploaded content of the ``multipart/form-data`` request. +the ``Content-Disposition`` within the uploaded content of the ``multipart/form-data`` (:rfc:`7578`) request. .. seealso:: - :ref:`vault_upload` @@ -2030,7 +2044,7 @@ nested ``outputs`` property, as presented below. :caption: :term:`Job` Outputs Response with Mapping Representation :name: job-outputs-mapping -Because these responses nests the items under ``outputs`` (in contrast to :ref:`proc_op_job_results` +Because these responses nests the results under ``outputs`` (in contrast to :ref:`proc_op_job_results` returning ``{outputID}`` directly at the root), other information can be returned, such as relevant ``links`` with references to :ref:`proc_op_job_inputs`, :ref:`proc_op_job_logs`, :ref:`Job Status `, or the source :ref:`Process Description ` that produced returned :term:`Job` outputs. @@ -2038,18 +2052,25 @@ or the source :ref:`Process Description ` that produced return In the event of a :term:`Job` executed with ``response=document`` or ``Prefer: return=minimal``, the contents of :ref:`proc_op_job_results` will be very similar to the above :ref:`Output Mapping ` contents, but with respective ``{outputID}`` returned directly at the root, instead of nesting them under ``outputs``. -On the other hand, a :term:`Job` submitted with ``response=raw`` or ``Prefer: return=representation`` can produce -many alternative content variations (see :ref:`proc_exec_results`) to respect :term:`OGC` compliance requirements, -according to the number of requested ``outputs``, submitted request parameters, and the respective :term:`Media-Type`, -schema or literal data of each output. -For this reason, the :ref:`proc_op_job_outputs` endpoint will always provide all data and file references in the -response body using the minimal representation as shown by above :term:`JSON` examples, no matter which request -parameters where originally submitted to execute the :term:`Job`. -In other words, the contents of the "``output_netcdf.nc``" file will never be directly -returned inline/by-value in the :term:`JSON` response when using the :ref:`proc_op_job_outputs` endpoint, -and will always use the ``document``/``minimal`` file links. This is done to offer a simplified data access mechanism -without having to deal will all possible combinations of data representations potentially returned -by :ref:`proc_exec_results`. +On the other hand, a :term:`Job` submitted with ``response=raw``, ``Prefer: return=representation`` or other +combinations of ``Accept`` headers and ``transmissionMode`` parameters, can produce +many alternative content variations (see :ref:`proc_exec_results`) to respect :term:`OGC` compliance requirements. +The structure of contents received from :ref:`proc_op_job_results` responses can also surprisingly vary according to +the number of requested ``outputs``, the submitted request parameters, and the alternative :term:`Media-Type`, schema +or literal data supported by each respective output. +For this reason, the :ref:`proc_op_job_outputs` endpoint will **always** provide all data and links in the +response body using the ``minimal`` representation as shown by above :term:`JSON` examples, +**no matter which request parameters** where originally submitted to execute the :term:`Job`. +In other words, the contents of a complex :ref:`File Reference ` (such as the "|output_netcdf|_") +will never be directly returned inline/by-value in the :term:`JSON` response when using the :ref:`proc_op_job_outputs` +endpoint, and will always use the ``document``/``minimal`` file link. Similarly, a literal data value will never be +returned by link reference, nor be returned directly as the response contents. An output of literal data will +always have its value included inline in the :term:`JSON` document. This behavior is performed to offer a +simplified data access mechanism without having to deal will all possible combinations of data representations +potentially returned by :ref:`proc_exec_results`. + +.. |output_netcdf| replace:: ``output_netcdf.nc`` +.. _output_netcdf: processes.html#job-outputs-mapping .. _proc_op_job_results: @@ -2118,19 +2139,23 @@ directly returned as above (rather than embedded in a :ref:`Document Result `_. However, given that such ``Link`` headers can result into conflicting ``rel: {outputID}`` with other ``Link`` -entries found in the response, and require additional parsing of the value to extract the :term:`URL`, a combination -of ``Content-ID``, ``Content-Type`` and ``Content-Location`` will also be provided. +entries found in the response, and that they require additional parsing of the value to extract the :term:`URL`, +a combination of ``Content-ID``, ``Content-Type`` and ``Content-Location`` will also be provided. .. note:: For cases where an output would represent an array of :ref:`File References `, returned ``Link`` headers for each of these links will employ ``rel: "{outputID}.{index}"`` with their respective ``index`` from the array. -When the number of *requested* ``outputs`` [#outN]_ is more than one, the response will either be -multipart contents or similar to the :ref:`Document Result ` contents, -accordingly to the negotiated ``Accept`` content header. An example of a multipart representation is shown below. + To respect :rfc:`2392` definitions, ``Content-ID`` will use pattern ``<{outputID}@{jobID}>`` as unique identifier, + and ``<{outputID}.{index}@{jobID}>`` in the case of an array of :ref:`File References `. + +When the number of *requested* ``outputs`` [#outN]_ is more than one, the response will +either be ``multipart`` contents (:rfc:`2046#section-5.1`) or similar to +the :ref:`Document Result ` contents, +accordingly to the negotiated ``Accept`` content header. An example of a ``multipart`` representation is shown below. The resolution of the nested outputs within each boundary, either by value or reference, will resolve -for each respective output according to the same rule combinations specified above for single output. +for each respective output according to the same rule conditions specified above for single output. .. literalinclude:: ../examples/job_results_raw_multi.http :language: mime @@ -2140,15 +2165,12 @@ for each respective output according to the same rule combinations specified abo Note that, in the above response, the ``Content-Location`` is used for the ``output-file``, whereas the data is directly returned for the ``output-data``. This is based on `Weaver` auto-resolving ``transmissionMode: reference`` for a :ref:`File Reference ` result, while using ``transmissionMode: value`` by default for literal -data types. This is equivalent to requesting the :term:`Job` execution with ``Prefer: return=minimal``, since the -most succinct *response contents* for a file is obtained by using a link reference, whereas literal data types can be -provided directly. - -If the ``transmissionMode: value`` under ``output-file`` in the *requested* ``outputs`` [#outN]_ -or ``Prefer: return=representation`` were used, the data of the file would be directly included inline within the -response instead of using ``Content-Location``, similarly to -the :ref:`Single Output Value ` example, -but nested within its respective ``Content-ID: output-file`` multipart bounds. +data types. This also assumes that ``response: raw`` was requested, and that no ``transmissionMode`` were specified. +If ``transmissionMode: value`` under ``output-file`` in the *requested* ``outputs`` [#outN]_ was used +(or alternatively, if ``Prefer: return=representation`` was specified), +the data of the file would be directly included inline within the response instead of using ``Content-Location``, +similarly to the :ref:`Single Output Value ` example, +but with its contents nested within its respective boundaries for the corresponding ``Content-ID``. .. _proc_op_job_inputs: From c54f113db7acdb96bb6dcf528cb69e761848ede7 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 17 Sep 2024 21:49:59 -0400 Subject: [PATCH 16/75] [wip] more test combinations for job results/outputs responses --- tests/functional/test_wps_package.py | 920 +++++++++++++++++++++++++-- weaver/formats.py | 6 +- 2 files changed, 888 insertions(+), 38 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 8b28f951c..7b2fdb9e5 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -3521,7 +3521,48 @@ def test_execute_cwl_enum_schema_combined_type_single_array_from_wps(self, mock_ assert results - def test_execute_single_output_prefer_header_return_representation(self): + def test_execute_single_output_prefer_header_return_representation_literal(self): + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Prefer": f"return={ExecuteReturnPreference.REPRESENTATION}, respond-async" + } + exec_headers.update(self.json_headers) + exec_content = { + "inputs": { + "message": "test" + }, + "outputs": { + "output_data": {} # no 'transmissionMode' to auto-resolve 'value' from 'return=representation' + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # request status instead of results since not expecting 'document' JSON in this case + status_url = resp.json["location"] + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.SUCCEEDED + + job_id = status["jobID"] + results = self.app.get(f"/jobs/{job_id}/results") + assert results.content_type.startswith(ContentType.TEXT_PLAIN) + assert results.text == "test" + outputs = self.app.get(f"/jobs/{job_id}/outputs") + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json == { + "output_data": "test", + } + + def test_execute_single_output_prefer_header_return_representation_complex(self): proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) @@ -3555,46 +3596,607 @@ def test_execute_single_output_prefer_header_return_representation(self): job_id = status["jobID"] out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") - assert results.content_type.startswith(ContentType.APP_JSON) + output_json = json.dumps({"data": "test"}, separators=(",", ":")) + assert results.content_type.startswith(ContentType.APP_JSON) + assert results.text == output_json + outputs = self.app.get(f"/jobs/{job_id}/outputs") + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json == { + "output_json": { + "href": f"{out_url}/{job_id}/output_json/output.json", + "type": ContentType.APP_JSON, + }, + } + + def test_execute_single_output_prefer_header_return_minimal_literal(self): + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Prefer": f"return={ExecuteReturnPreference.MINIMAL}, respond-async" + } + exec_headers.update(self.json_headers) + exec_content = { + "inputs": { + "message": "test" + }, + "outputs": { + "output_data": {} # no 'transmissionMode' to auto-resolve 'value' from 'return=minimal' + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # request status instead of results since not expecting 'document' JSON in this case + status_url = resp.json["location"] + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.SUCCEEDED + + job_id = status["jobID"] + out_url = get_wps_output_url(self.settings) + results = self.app.get(f"/jobs/{job_id}/results") + assert results.content_type.startswith(ContentType.TEXT_PLAIN) + assert results.text == "test" + outputs = self.app.get(f"/jobs/{job_id}/outputs") + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json == { + "output_data": "test", + } + + def test_execute_single_output_prefer_header_return_minimal_complex(self): + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Prefer": f"return={ExecuteReturnPreference.MINIMAL}, respond-async" + } + exec_headers.update(self.json_headers) + exec_content = { + "inputs": { + "message": "test" + }, + "outputs": { + "output_json": {} # no 'transmissionMode' to auto-resolve 'reference' from 'return=minimal' + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # request status instead of results since not expecting 'document' JSON in this case + status_url = resp.json["location"] + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.SUCCEEDED + + job_id = status["jobID"] + out_url = get_wps_output_url(self.settings) + results = self.app.get(f"/jobs/{job_id}/results") + output_json = json.dumps({"data": "test"}, separators=(",", ":")) + assert results.status_code == 204, "No contents expected for minimal reference result." + assert results.content_type.startswith(ContentType.APP_JSON) + assert results.text == output_json + outputs = self.app.get(f"/jobs/{job_id}/outputs") + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json == { + "output_json": { + "href": f"{out_url}/{job_id}/output_json/output.json", + "type": ContentType.APP_JSON, + }, + } + + def test_execute_single_output_response_raw_value_literal(self): + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Prefer": "respond-async" + } + exec_headers.update(self.json_headers) + exec_content = { + "response": ExecuteResponse.RAW, + "inputs": { + "message": "test" + }, + "outputs": { + "output_data": {}, # should use 'transmissionMode: value' by default + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # request status instead of results since not expecting 'document' JSON in this case + status_url = resp.json["location"] + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.SUCCEEDED + + job_id = status["jobID"] + results = self.app.get(f"/jobs/{job_id}/results") + assert results.content_type.startswith(ContentType.TEXT_PLAIN) + assert results.text == "test" + outputs = self.app.get(f"/jobs/{job_id}/outputs") + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_data": "test", + } + + def test_execute_single_output_response_raw_value_complex(self): + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Prefer": "respond-async" + } + exec_headers.update(self.json_headers) + exec_content = { + "response": ExecuteResponse.RAW, + "inputs": { + "message": "test" + }, + "outputs": { + "output_json": {"transmissionMode": ExecuteTransmissionMode.VALUE}, + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # request status instead of results since not expecting 'document' JSON in this case + status_url = resp.json["location"] + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.SUCCEEDED + + job_id = status["jobID"] + results = self.app.get(f"/jobs/{job_id}/results") + assert results.content_type.startswith(ContentType.APP_JSON) + assert results.json == {"data": "test"} + outputs = self.app.get(f"/jobs/{job_id}/outputs") + output_json = json.dumps({"data": "test"}, separators=(",", ":")) + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_json": { + "value": output_json, + "type": ContentType.APP_JSON, + }, + } + + def test_execute_single_output_response_raw_reference_literal(self): + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Prefer": "respond-async" + } + exec_headers.update(self.json_headers) + exec_content = { + "response": ExecuteResponse.RAW, + "inputs": { + "message": "test" + }, + "outputs": { + "output_data": {"transmissionMode": ExecuteTransmissionMode.REFERENCE}, + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # request status instead of results since not expecting 'document' JSON in this case + status_url = resp.json["location"] + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.SUCCEEDED + + job_id = status["jobID"] + out_url = get_wps_output_url(self.settings) + results = self.app.get(f"/jobs/{job_id}/results") + assert results.status_code == 204, "No contents expected for single reference result." + assert results.body == b"" + assert results.content_type.startswith(ContentType.TEXT_PLAIN) + assert results.headers["Content-Location"] == f"{out_url}/{job_id}/output_data/output.txt" + outputs = self.app.get(f"/jobs/{job_id}/outputs") + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_data": "test", + } + + def test_execute_single_output_response_raw_reference_complex(self): + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Prefer": "respond-async" + } + exec_headers.update(self.json_headers) + exec_content = { + "response": ExecuteResponse.RAW, + "inputs": { + "message": "test" + }, + "outputs": { + "output_json": {}, # should use 'transmissionMode: reference' by default + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # request status instead of results since not expecting 'document' JSON in this case + status_url = resp.json["location"] + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.SUCCEEDED + + job_id = status["jobID"] + out_url = get_wps_output_url(self.settings) + results = self.app.get(f"/jobs/{job_id}/results") + assert results.status_code == 204, "No contents expected for single reference result." + assert results.body == b"" + assert results.content_type.startswith(ContentType.APP_JSON) + assert results.headers["Content-Location"] == f"{out_url}/{job_id}/output_json/output.json" + outputs = self.app.get(f"/jobs/{job_id}/outputs") + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_json": { + "href": f"{out_url}/{job_id}/output_json/output.json", + "type": ContentType.APP_JSON, + }, + } + + # FIXME: Should this be permitted? Technically, a multipart of 1 bounded contents is valid... + def test_execute_single_output_multipart_error(self): + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + # NOTE: + # no 'response' nor 'Prefer: return' to ensure resolution is done by 'Accept' header + # without 'Accept' using multipart, it is expected that JSON document is used + exec_headers = { + "Accept": ContentType.MULTIPART_MIXED, + "Content-Type": ContentType.APP_JSON, + } + exec_content = { + "mode": ExecuteMode.SYNC, # WARNING: force sync to make sure JSON job status is not returned instead + "inputs": { + "message": "test" + }, + "outputs": { + "output_data": {} + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 406, f"Expected error. Instead got: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.content_type == ContentType.APP_JSON, "Expect JSON instead of Multipart because of error." + assert resp.json["detail"] == "Multipart is not acceptable for single output results." + assert resp.json["value"] == ContentType.MULTIPART_MIXED + assert resp.json["cause"] == { + "name": "Accept", + "in": "headers", + } + + @parameterized.expand([ + ContentType.MULTIPART_ANY, + ContentType.MULTIPART_MIXED, + ]) + def test_execute_multi_output_multipart_accept(self, multipart_header): + """ + Requesting ``multipart`` explicitly should return it instead of default :term:`JSON` ``document`` response. + """ + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + # NOTE: + # no 'response' nor 'Prefer: return' to ensure resolution is done by 'Accept' header + # without 'Accept' using multipart, it is expected that JSON document is used + exec_headers = { + "Accept": multipart_header, + "Content-Type": ContentType.APP_JSON, + "Prefer": "respond-async", + } + exec_content = { + "inputs": { + "message": "test" + }, + "outputs": { + # no 'transmissionMode' to auto-resolve 'value' from 'return=representation' + # request multiple outputs, but not 'all', to test filter behavior at the same time + # use 1 expected as 'File' and 1 'string' literal to test conversion to raw 'value' + "output_json": {}, + "output_data": {} + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # request status instead of results since not expecting 'document' JSON in this case + status_url = resp.json["location"] + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.SUCCEEDED + + job_id = status["jobID"] + out_url = get_wps_output_url(self.settings) + results = self.app.get(f"/jobs/{job_id}/results") + boundary = parse_kvp(results.content_type)["boundary"][0] + results_body = inspect.cleandoc(f""" + --{boundary} + Content-Type: {ContentType.TEXT_PLAIN} + Content-ID: + + test + --{boundary} + Content-Type: {ContentType.APP_JSON} + Content-ID: + Content-Location: {out_url}/{job_id}/output_json/output.json + --{boundary}-- + """) + assert results.content_type.startswith(ContentType.MULTIPART_MIXED) + assert results.text == results_body + outputs = self.app.get(f"/jobs/{job_id}/outputs") + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_data": "test", + "output_json": { + "href": f"{out_url}/{job_id}/output_json/output.json", + "type": ContentType.APP_JSON, + }, + } + + def test_execute_multi_output_prefer_header_return_representation(self): + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Prefer": f"return={ExecuteReturnPreference.REPRESENTATION}, respond-async" + } + exec_headers.update(self.json_headers) + exec_content = { + "inputs": { + "message": "test" + }, + "outputs": { + # no 'transmissionMode' to auto-resolve 'value' from 'return=representation' + # request multiple outputs, but not 'all', to test filter behavior at the same time + # use 1 expected as 'File' and 1 'string' literal to test conversion to raw 'value' + "output_json": {}, + "output_data": {} + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # request status instead of results since not expecting 'document' JSON in this case + status_url = resp.json["location"] + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.SUCCEEDED + + job_id = status["jobID"] + out_url = get_wps_output_url(self.settings) + results = self.app.get(f"/jobs/{job_id}/results") + boundary = parse_kvp(results.content_type)["boundary"][0] + output_json = json.dumps({"data": "test"}, separators=(",", ":")) + results_body = inspect.cleandoc(f""" + --{boundary} + Content-Type: {ContentType.TEXT_PLAIN} + Content-ID: + + test + --{boundary} + Content-Type: {ContentType.APP_JSON} + Content-ID: + + {output_json} + --{boundary}-- + """) + assert results.content_type.startswith(ContentType.MULTIPART_MIXED) + assert results.text == results_body + outputs = self.app.get(f"/jobs/{job_id}/outputs") + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_data": "test", + "output_json": { + "href": f"{out_url}/{job_id}/output_json/output.json", + "type": ContentType.APP_JSON, + }, + } + + def test_execute_multi_output_response_raw_value(self): + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Prefer": "respond-async" + } + exec_headers.update(self.json_headers) + exec_content = { + "response": ExecuteResponse.RAW, + "inputs": { + "message": "test" + }, + "outputs": { + "output_json": {"transmissionMode": ExecuteTransmissionMode.VALUE}, + "output_data": {} # should use 'value' by default + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # request status instead of results since not expecting 'document' JSON in this case + status_url = resp.json["location"] + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.SUCCEEDED + + job_id = status["jobID"] + out_url = get_wps_output_url(self.settings) + results = self.app.get(f"/jobs/{job_id}/results") + boundary = parse_kvp(results.content_type)["boundary"][0] + output_json = json.dumps({"data": "test"}, separators=(",", ":")) + results_body = inspect.cleandoc(f""" + --{boundary} + Content-Type: {ContentType.TEXT_PLAIN} + Content-ID: + + test + --{boundary} + Content-Type: {ContentType.APP_JSON} + Content-ID: + + {output_json} + --{boundary}-- + """) + assert results.content_type.startswith(ContentType.MULTIPART_MIXED) + assert results.text == results_body + outputs = self.app.get(f"/jobs/{job_id}/outputs") + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_data": "test", + "output_json": { + "href": f"{out_url}/{job_id}/output_json/output.json", + "type": ContentType.APP_JSON, + }, + } + + def test_execute_multi_output_response_raw_reference(self): + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Prefer": "respond-async" + } + exec_headers.update(self.json_headers) + exec_content = { + "response": ExecuteResponse.RAW, + "inputs": { + "message": "test" + }, + "outputs": { + "output_json": {}, # should use 'reference' by default + "output_data": {"transmissionMode": ExecuteTransmissionMode.REFERENCE}, + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # request status instead of results since not expecting 'document' JSON in this case + status_url = resp.json["location"] + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.SUCCEEDED + + job_id = status["jobID"] + out_url = get_wps_output_url(self.settings) + results = self.app.get(f"/jobs/{job_id}/results") + boundary = parse_kvp(results.content_type)["boundary"][0] + results_body = inspect.cleandoc(f""" + --{boundary} + Content-Type: {ContentType.TEXT_PLAIN} + Content-ID: + Content-Location: {out_url}/{job_id}/output_data/output.txt + --{boundary} + Content-Type: {ContentType.APP_JSON} + Content-ID: + Content-Location: {out_url}/{job_id}/output_json/output.json + --{boundary}-- + """) + assert results.content_type.startswith(ContentType.MULTIPART_MIXED) + assert results.text == results_body outputs = self.app.get(f"/jobs/{job_id}/outputs") - output_json = json.dumps({"data": "test"}, separators=(",", ":")) - assert results.text == output_json - assert outputs.json == { + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_data": "test", "output_json": { "href": f"{out_url}/{job_id}/output_json/output.json", "type": ContentType.APP_JSON, }, } - def test_execute_single_output_prefer_header_return_minimal(self): - raise NotImplementedError # FIXME: implement - - def test_execute_single_output_response_raw_value(self): - raise NotImplementedError # FIXME: implement - - def test_execute_single_output_response_raw_reference(self): - raise NotImplementedError # FIXME: implement - - def test_execute_multi_output_prefer_header_return_representation(self): + def test_execute_multi_output_response_raw_mixed(self): proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) exec_headers = { - "Prefer": f"return={ExecuteReturnPreference.REPRESENTATION}, respond-async" + "Prefer": "respond-async" } exec_headers.update(self.json_headers) exec_content = { + "response": ExecuteResponse.RAW, "inputs": { "message": "test" }, "outputs": { - # no 'transmissionMode' to auto-resolve 'value' from 'return=representation' - # request multiple outputs, but not 'all', to test filter behavior at the same time - # use 1 expected as 'File' and 1 'string' literal to test conversion to raw 'value' - "output_json": {}, - "output_data": {} + "output_data": {}, # should use 'value' by default + "output_text": {}, # should use 'reference' by default + "output_json": {"transmissionMode": ExecuteTransmissionMode.VALUE}, # force 'value' } } with contextlib.ExitStack() as stack: @@ -3613,57 +4215,303 @@ def test_execute_multi_output_prefer_header_return_representation(self): job_id = status["jobID"] out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") - assert results.content_type.startswith(ContentType.MULTIPART_RELATED) boundary = parse_kvp(results.content_type)["boundary"][0] - outputs = self.app.get(f"/jobs/{job_id}/outputs") output_json = json.dumps({"data": "test"}, separators=(",", ":")) results_body = inspect.cleandoc(f""" --{boundary} Content-Type: {ContentType.TEXT_PLAIN} - Content-ID: output_data + Content-ID: test --{boundary} + Content-Type: {ContentType.TEXT_PLAIN} + Content-ID: + Content-Location: {out_url}/{job_id}/output_text/output.txt + --{boundary} Content-Type: {ContentType.APP_JSON} - Content-ID: output_json + Content-ID: {output_json} --{boundary}-- """) + assert results.content_type.startswith(ContentType.MULTIPART_MIXED) assert results.text == results_body + outputs = self.app.get(f"/jobs/{job_id}/outputs") + assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { "output_data": "test", + "output_text": { + "href": f"{out_url}/{job_id}/output_text/output.txt", + "type": ContentType.TEXT_PLAIN, + }, "output_json": { "href": f"{out_url}/{job_id}/output_json/output.json", "type": ContentType.APP_JSON, }, } - def test_execute_multi_output_prefer_header_return_minimal(self): - raise NotImplementedError # FIXME: implement + def test_execute_multi_output_prefer_header_return_minimal_defaults(self): + """ + Test ``Prefer: return=minimal`` with default ``transmissionMode`` resolutions for literal/complex outputs. + """ + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) - def test_execute_multi_output_response_raw_value(self): - raise NotImplementedError # FIXME: implement + exec_headers = { + "Prefer": f"return={ExecuteReturnPreference.MINIMAL}, respond-async" + } + exec_headers.update(self.json_headers) + exec_content = { + "inputs": { + "message": "test" + }, + "outputs": { + # no 'transmissionMode' to auto-resolve 'value' based on literal/complex output + # request multiple outputs, but not 'all', to test filter behavior at the same time + # use 1 expected as 'File' and 1 'string' literal to test respective auto-resolution on their own + "output_json": {}, + "output_data": {} + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" - def test_execute_multi_output_response_raw_reference(self): - raise NotImplementedError # FIXME: implement + status_url = resp.json["location"] + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.SUCCEEDED - def test_execute_multi_output_response_raw_mixed(self): - raise NotImplementedError # FIXME: implement + job_id = status["jobID"] + out_url = get_wps_output_url(self.settings) + results = self.app.get(f"/jobs/{job_id}/results") + assert results.content_type.startswith(ContentType.APP_JSON) + assert results.json == { + "output_data": "test", + "output_json": { + "href": f"{out_url}/{job_id}/output_json/output.json", + "type": ContentType.APP_JSON, + }, + } + outputs = self.app.get(f"/jobs/{job_id}/outputs") + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_data": "test", + "output_json": { + "href": f"{out_url}/{job_id}/output_json/output.json", + "type": ContentType.APP_JSON, + }, + } + + def test_execute_multi_output_prefer_header_return_minimal_override_transmission(self): + """ + Test ``Prefer: return=minimal`` with ``transmissionMode`` overrides. + + .. note:: + From a technical standpoint, this response will not really be "minimal" since the values are + embedded inline. However, this respects the *preference* vs *enforced* property requirements. + """ + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Prefer": f"return={ExecuteReturnPreference.MINIMAL}, respond-async" + } + exec_headers.update(self.json_headers) + exec_content = { + "inputs": { + "message": "test" + }, + "outputs": { + # force inline data for file instead of minimal link reference + "output_json": {"transmissionMode": ExecuteTransmissionMode.VALUE}, + # force reference creation for literal data instead of minimal contents + "output_data": {"transmissionMode": ExecuteTransmissionMode.REFERENCE}, + # auto-resolution for this file, to test that 'minimal' still applies with a link reference + "output_text": {}, + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + status_url = resp.json["location"] + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.SUCCEEDED + + job_id = status["jobID"] + out_url = get_wps_output_url(self.settings) + results = self.app.get(f"/jobs/{job_id}/results") + output_json = json.dumps({"data": "test"}, separators=(",", ":")) + assert results.content_type.startswith(ContentType.APP_JSON) + assert results.json == { + "output_data": { + "href": f"{out_url}/{job_id}/output_text/output.txt", + "type": ContentType.TEXT_PLAIN, + }, + "output_json": { + "value": output_json, + "type": ContentType.APP_JSON, + }, + "output_text": { + "href": f"{out_url}/{job_id}/output_text/output.txt", + "type": ContentType.TEXT_PLAIN, + }, + } + outputs = self.app.get(f"/jobs/{job_id}/outputs") + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_data": "test", + "output_json": { + "href": f"{out_url}/{job_id}/output_json/output.json", + "type": ContentType.APP_JSON, + }, + "output_text": { + "href": f"{out_url}/{job_id}/output_text/output.txt", + "type": ContentType.TEXT_PLAIN, + }, + } def test_execute_multi_output_response_document_defaults(self): """ Test ``response: document`` with default ``transmissionMode`` resolutions for literal/complex outputs. """ - raise NotImplementedError # FIXME: implement + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Prefer": f"return={ExecuteReturnPreference.MINIMAL}, respond-async" + } + exec_headers.update(self.json_headers) + exec_content = { + "inputs": { + "message": "test" + }, + "outputs": { + # no 'transmissionMode' to auto-resolve 'value' based on literal/complex output + # request multiple outputs, but not 'all', to test filter behavior at the same time + # use 1 expected as 'File' and 1 'string' literal to test respective auto-resolution on their own + "output_json": {}, + "output_data": {} + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + status_url = resp.json["location"] + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.SUCCEEDED + + job_id = status["jobID"] + out_url = get_wps_output_url(self.settings) + results = self.app.get(f"/jobs/{job_id}/results") + assert results.content_type.startswith(ContentType.APP_JSON) + assert results.json == { + "output_data": "test", + "output_json": { + "href": f"{out_url}/{job_id}/output_json/output.json", + "type": ContentType.APP_JSON, + }, + } + outputs = self.app.get(f"/jobs/{job_id}/outputs") + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_data": "test", + "output_json": { + "href": f"{out_url}/{job_id}/output_json/output.json", + "type": ContentType.APP_JSON, + }, + } def test_execute_multi_output_response_document_mixed(self): """ Test ``response: document`` with ``transmissionMode`` specified to force convertion of literal/complex outputs. """ - raise NotImplementedError # FIXME: implement + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Prefer": "respond-async" + } + exec_headers.update(self.json_headers) + exec_content = { + "response": ExecuteResponse.DOCUMENT, + "inputs": { + "message": "test" + }, + "outputs": { + # force inline data for file instead of minimal link reference + "output_json": {"transmissionMode": ExecuteTransmissionMode.VALUE}, + # force reference creation for literal data instead of minimal contents + "output_data": {"transmissionMode": ExecuteTransmissionMode.REFERENCE}, + # auto-resolution for this file, to test that 'minimal' still applies with a link reference + "output_text": {}, + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" - # FIXME: implement other variations as well... see doc 'Execution Results' combinations + status_url = resp.json["location"] + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.SUCCEEDED + + job_id = status["jobID"] + out_url = get_wps_output_url(self.settings) + results = self.app.get(f"/jobs/{job_id}/results") + output_json = json.dumps({"data": "test"}, separators=(",", ":")) + assert results.content_type.startswith(ContentType.APP_JSON) + assert results.json == { + "output_data": { + "href": f"{out_url}/{job_id}/output_text/output.txt", + "type": ContentType.TEXT_PLAIN, + }, + "output_json": { + "value": output_json, + "type": ContentType.APP_JSON, + }, + "output_text": { + "href": f"{out_url}/{job_id}/output_text/output.txt", + "type": ContentType.TEXT_PLAIN, + }, + } + outputs = self.app.get(f"/jobs/{job_id}/outputs") + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_data": "test", + "output_json": { + "href": f"{out_url}/{job_id}/output_json/output.json", + "type": ContentType.APP_JSON, + }, + "output_text": { + "href": f"{out_url}/{job_id}/output_text/output.txt", + "type": ContentType.TEXT_PLAIN, + }, + } @pytest.mark.functional diff --git a/weaver/formats.py b/weaver/formats.py index 0014bb823..84025e39c 100644 --- a/weaver/formats.py +++ b/weaver/formats.py @@ -116,8 +116,10 @@ class ContentType(Constants): IMAGE_GIF = "image/gif" IMAGE_PNG = "image/png" IMAGE_TIFF = "image/tiff" - MULTIPART_FORM = "multipart/form-data" - MULTIPART_RELATED = "multipart/related" + MULTIPART_ANY = "multipart/*" + MULTIPART_FORM = "multipart/form-data" # data/file upload + MULTIPART_MIXED = "multipart/mixed" # content of various types + MULTIPART_RELATED = "multipart/related" # content that contain cross-references with Content-ID (CID) TEXT_ENRICHED = "text/enriched" TEXT_HTML = "text/html" TEXT_PLAIN = "text/plain" From b25385c90dd1d85e2dba903db418eb4a03fdeb54 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 19 Sep 2024 15:04:10 -0400 Subject: [PATCH 17/75] [wip] job results representation negotiation --- docs/source/processes.rst | 4 + tests/functional/test_wps_package.py | 36 ++++-- tests/functional/utils.py | 12 +- tests/test_execute.py | 92 ++++++++++++++++ tests/test_utils.py | 89 --------------- weaver/execute.py | 128 +++++++++++++++++++++- weaver/processes/execution.py | 3 +- weaver/typedefs.py | 10 ++ weaver/utils.py | 107 ------------------ weaver/wps_restapi/jobs/utils.py | 76 ++++++++++--- weaver/wps_restapi/quotation/quotes.py | 4 +- weaver/wps_restapi/swagger_definitions.py | 21 +++- 12 files changed, 351 insertions(+), 231 deletions(-) create mode 100644 tests/test_execute.py diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 7e668bf02..9602330c8 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -2007,6 +2007,10 @@ format is employed according to the chosen location. .. seealso:: For the :term:`WPS` endpoint, refer to :ref:`conf_settings`. +.. _proc_op_job_status: +.. fixme: add example +.. fixme: describe minimum fields and extra fields + .. _proc_op_result: Obtaining Job Outputs, Results, Logs or Errors diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 7b2fdb9e5..c5f4b52e3 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -14,7 +14,6 @@ import json import logging import os -import re import shutil import tempfile from inspect import cleandoc @@ -124,10 +123,6 @@ def setUpClass(cls) -> None: def setUp(self) -> None: self.process_store.clear_processes() - @classmethod - def request(cls, method, url, *args, **kwargs): - raise NotImplementedError # not used - def test_deploy_cwl_label_as_process_title(self): title = "This process title comes from the CWL label" cwl = { @@ -3521,6 +3516,31 @@ def test_execute_cwl_enum_schema_combined_type_single_array_from_wps(self, mock_ assert results + +@pytest.mark.functional +class WpsPackageAppTestResultResponses(WpsConfigBase, ResourcesUtil): + """ + Tests to evaluate the various combinations of results response representations. + + .. seealso:: + - :ref:`proc_exec_results` + - :ref:`proc_op_job_results` + """ + @classmethod + def setUpClass(cls) -> None: + cls.settings = { + "weaver.wps": True, + "weaver.wps_path": "/ows/wps", + "weaver.wps_restapi_path": "/", + "weaver.wps_output_path": "/wpsoutputs", + "weaver.wps_output_url": "http://localhost/wpsoutputs", + "weaver.wps_output_dir": "/tmp/weaver-test/wps-outputs", # nosec: B108 # don't care hardcoded for test + } + super(WpsPackageAppTestResultResponses, cls).setUpClass() + + def setUp(self) -> None: + self.process_store.clear_processes() + def test_execute_single_output_prefer_header_return_representation_literal(self): proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) @@ -3781,7 +3801,7 @@ def test_execute_single_output_response_raw_value_complex(self): assert outputs.json["outputs"] == { "output_json": { "value": output_json, - "type": ContentType.APP_JSON, + "mediaType": ContentType.APP_JSON, }, } @@ -4362,7 +4382,7 @@ def test_execute_multi_output_prefer_header_return_minimal_override_transmission }, "output_json": { "value": output_json, - "type": ContentType.APP_JSON, + "mediaType": ContentType.APP_JSON, }, "output_text": { "href": f"{out_url}/{job_id}/output_text/output.txt", @@ -4492,7 +4512,7 @@ def test_execute_multi_output_response_document_mixed(self): }, "output_json": { "value": output_json, - "type": ContentType.APP_JSON, + "mediaType": ContentType.APP_JSON, }, "output_text": { "href": f"{out_url}/{job_id}/output_text/output.txt", diff --git a/tests/functional/utils.py b/tests/functional/utils.py index 6f14739df..dbdfe31bb 100644 --- a/tests/functional/utils.py +++ b/tests/functional/utils.py @@ -48,6 +48,7 @@ AnyUUID, CWL, ExecutionResults, + JobStatusResponse, JSON, ProcessDeployment, ProcessDescription, @@ -433,7 +434,7 @@ def deploy_process(cls, return info # type: ignore def _try_get_logs(self, status_url): - _resp = self.app.get(f"{status_url}/logs", headers=self.json_headers) + _resp = self.app.get(f"{status_url}/logs", headers=dict(self.json_headers)) if _resp.status_code == 200: _text = "\n".join(_resp.json) return f"Error logs:\n{_text}" @@ -445,6 +446,11 @@ def fully_qualified_test_process_name(self, name=""): test_name = f"{class_name}.{self._testMethodName}{extra_name}".replace(".", "-") return test_name + @overload + def monitor_job(self, status_url, return_status=False, **__): + # type: (str, Literal[True], **Any) -> JobStatusResponse + ... + def monitor_job(self, status_url, # type: str timeout=None, # type: Optional[int] @@ -452,7 +458,7 @@ def monitor_job(self, return_status=False, # type: bool wait_for_status=None, # type: Optional[str] expect_failed=False, # type: bool - ): # type: (...) -> ExecutionResults + ): # type: (...) -> Union[ExecutionResults, JobStatusResponse] """ Job polling of status URL until completion or timeout. @@ -505,7 +511,7 @@ def check_job_status(_resp, running=False): return resp.json def get_outputs(self, status_url): - resp = self.app.get(f"{status_url}/outputs", headers=self.json_headers) + resp = self.app.get(f"{status_url}/outputs", headers=dict(self.json_headers)) body = resp.json pretty = json.dumps(body, indent=2, ensure_ascii=False) assert resp.status_code == 200, f"Get outputs failed:\n{pretty}\n{self._try_get_logs(status_url)}" diff --git a/tests/test_execute.py b/tests/test_execute.py new file mode 100644 index 000000000..4cace41a0 --- /dev/null +++ b/tests/test_execute.py @@ -0,0 +1,92 @@ +import itertools + +import pytest +from pyramid.httpexceptions import HTTPBadRequest + +from weaver.execute import ExecuteControlOption, ExecuteMode, ExecuteReturnPreference, parse_prefer_header_execute_mode + + +@pytest.mark.parametrize( + ["headers", "support", "expected", "extra_prefer"], + [ + # both modes supported (sync attempted upto max/specified wait time, unless async requested explicitly) + ({}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], (ExecuteMode.SYNC, 10, {}), ""), + # only supported async (enforced) - original behaviour + ({}, [ExecuteControlOption.ASYNC], (ExecuteMode.ASYNC, None, {}), ""), + ] + + [ + (_headers, _support, _expected, _extra) + for (_headers, _support, _expected), _extra + in itertools.product( + [ + # both modes supported (sync attempted upto max/specified wait time, unless async requested explicitly) + ({"Prefer": ""}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], + (ExecuteMode.SYNC, 10, {})), + ({"Prefer": "respond-async"}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], + (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), + ({"Prefer": "respond-async, wait=4"}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], + (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), + ({"Prefer": "wait=4"}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], + (ExecuteMode.SYNC, 4, {"Preference-Applied": "wait=4"})), + ({"Prefer": "wait=20"}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], + (ExecuteMode.ASYNC, None, {})), # larger than max time + # only supported async (enforced) - original behaviour + ({"Prefer": ""}, [ExecuteControlOption.ASYNC], + (ExecuteMode.ASYNC, None, {})), + ({"Prefer": "respond-async"}, [ExecuteControlOption.ASYNC], + (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), + ({"Prefer": "respond-async, wait=4"}, [ExecuteControlOption.ASYNC], + (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), + ({"Prefer": "wait=4"}, [ExecuteControlOption.ASYNC], + (ExecuteMode.ASYNC, None, {})), + + ], + [ + "", + f"return={ExecuteReturnPreference.MINIMAL}", + f"return={ExecuteReturnPreference.REPRESENTATION}" + # FIXME: + # Support with added ``Prefer: handling=strict`` or ``Prefer: handling=lenient`` + # https://github.com/crim-ca/weaver/issues/701 + ] + ) + ] +) +def test_prefer_header_execute_mode(headers, support, expected, extra_prefer): + if extra_prefer and "Prefer" in headers: + headers["Prefer"] += f", {extra_prefer}" if headers["Prefer"] else extra_prefer + result = parse_prefer_header_execute_mode(headers, support) + assert result == expected + + +@pytest.mark.parametrize( + ["headers", "expected"], + [ + # 1st variant is considered as 1 Prefer header with all values supplied simultaneously + # 2nd variant is considered as 2 Prefer headers, each with their respective value + # (this is because urllib, under the hood, concatenates the list of header-values using ';' separator) + ({"Prefer": "respond-async, wait=4"}, (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), + ({"Prefer": "respond-async; wait=4"}, (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), + ] +) +def test_parse_prefer_header_execute_mode_flexible(headers, expected): + """ + Ensure that the ``Prefer`` header supplied multiple times (allowed by :rfc:`7240`) is handled correctly. + """ + result = parse_prefer_header_execute_mode(headers, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC]) + assert result == expected + + +@pytest.mark.parametrize("prefer_header", [ + "wait=10s", + "wait=3.1416", + "wait=yes", + "wait=1,2,3", # technically, gets parsed as 'wait=1' (valid) and other '2', '3' parameters on their own + "wait=1;2;3", + "wait=1, wait=2", + "wait=1; wait=2", +]) +def test_parse_prefer_header_execute_mode_invalid(prefer_header): + headers = {"Prefer": prefer_header} + with pytest.raises(HTTPBadRequest): + parse_prefer_header_execute_mode(headers, [ExecuteControlOption.ASYNC]) diff --git a/tests/test_utils.py b/tests/test_utils.py index 6d7f1cfd9..d046f9481 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -23,7 +23,6 @@ from beaker.cache import cache_region from mypy_boto3_s3.literals import RegionName from pyramid.httpexceptions import ( - HTTPBadRequest, HTTPConflict, HTTPCreated, HTTPError as PyramidHTTPError, @@ -48,7 +47,6 @@ setup_test_file_hierarchy ) from weaver import xml_util -from weaver.execute import ExecuteControlOption, ExecuteMode, ExecuteReturnPreference from weaver.formats import ContentEncoding, ContentType, repr_json from weaver.status import JOB_STATUS_CATEGORIES, STATUS_PYWPS_IDS, STATUS_PYWPS_MAP, Status, StatusCompliant, map_status from weaver.utils import ( @@ -83,7 +81,6 @@ null, parse_kvp, parse_number_with_unit, - parse_prefer_header_execute_mode, pass_http_error, request_extra, resolve_s3_from_http, @@ -2122,92 +2119,6 @@ def test_parse_kvp(query, params, expected): assert result == expected -@pytest.mark.parametrize( - ["headers", "support", "expected", "extra_prefer"], - [ - # both modes supported (sync attempted upto max/specified wait time, unless async requested explicitly) - ({}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], (ExecuteMode.SYNC, 10, {}), ""), - # only supported async (enforced) - original behaviour - ({}, [ExecuteControlOption.ASYNC], (ExecuteMode.ASYNC, None, {}), ""), - ] + - [ - (_headers, _support, _expected, _extra) - for (_headers, _support, _expected), _extra - in itertools.product( - [ - # both modes supported (sync attempted upto max/specified wait time, unless async requested explicitly) - ({"Prefer": ""}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], - (ExecuteMode.SYNC, 10, {})), - ({"Prefer": "respond-async"}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], - (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), - ({"Prefer": "respond-async, wait=4"}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], - (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), - ({"Prefer": "wait=4"}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], - (ExecuteMode.SYNC, 4, {"Preference-Applied": "wait=4"})), - ({"Prefer": "wait=20"}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], - (ExecuteMode.ASYNC, None, {})), # larger than max time - # only supported async (enforced) - original behaviour - ({"Prefer": ""}, [ExecuteControlOption.ASYNC], - (ExecuteMode.ASYNC, None, {})), - ({"Prefer": "respond-async"}, [ExecuteControlOption.ASYNC], - (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), - ({"Prefer": "respond-async, wait=4"}, [ExecuteControlOption.ASYNC], - (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), - ({"Prefer": "wait=4"}, [ExecuteControlOption.ASYNC], - (ExecuteMode.ASYNC, None, {})), - - ], - [ - "", - f"return={ExecuteReturnPreference.MINIMAL}", - f"return={ExecuteReturnPreference.REPRESENTATION}" - # FIXME: - # Support with added ``Prefer: handling=strict`` or ``Prefer: handling=lenient`` - # https://github.com/crim-ca/weaver/issues/701 - ] - ) - ] -) -def test_prefer_header_execute_mode(headers, support, expected, extra_prefer): - if extra_prefer and "Prefer" in headers: - headers["Prefer"] += f", {extra_prefer}" if headers["Prefer"] else extra_prefer - result = parse_prefer_header_execute_mode(headers, support) - assert result == expected - - -@pytest.mark.parametrize( - ["headers", "expected"], - [ - # 1st variant is considered as 1 Prefer header with all values supplied simultaneously - # 2nd variant is considered as 2 Prefer headers, each with their respective value - # (this is because urllib, under the hood, concatenates the list of header-values using ';' separator) - ({"Prefer": "respond-async, wait=4"}, (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), - ({"Prefer": "respond-async; wait=4"}, (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), - ] -) -def test_parse_prefer_header_execute_mode_flexible(headers, expected): - """ - Ensure that the ``Prefer`` header supplied multiple times (allowed by :rfc:`7240`) is handled correctly. - """ - result = parse_prefer_header_execute_mode(headers, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC]) - assert result == expected - - -@pytest.mark.parametrize("prefer_header", [ - "wait=10s", - "wait=3.1416", - "wait=yes", - "wait=1,2,3", # technically, gets parsed as 'wait=1' (valid) and other '2', '3' parameters on their own - "wait=1;2;3", - "wait=1, wait=2", - "wait=1; wait=2", -]) -def test_parse_prefer_header_execute_mode_invalid(prefer_header): - headers = {"Prefer": prefer_header} - with pytest.raises(HTTPBadRequest): - parse_prefer_header_execute_mode(headers, [ExecuteControlOption.ASYNC]) - - @pytest.mark.parametrize(["number", "binary", "unit", "expect"], [ (1.234, False, "B", "1.234 B"), (10_000_000, False, "B", "10.000 MB"), diff --git a/weaver/execute.py b/weaver/execute.py index dfb7df14b..4a1e427be 100644 --- a/weaver/execute.py +++ b/weaver/execute.py @@ -1,9 +1,17 @@ +import logging from typing import TYPE_CHECKING +from pyramid.httpexceptions import HTTPBadRequest + from weaver.base import Constants +from weaver.utils import get_header, parse_kvp if TYPE_CHECKING: - from typing import List + from typing import List, Optional, Tuple + + from weaver.typedefs import AnyHeadersContainer, HeadersType + +LOGGER = logging.getLogger(__name__) class ExecuteMode(Constants): @@ -74,3 +82,121 @@ class ExecuteCollectionFormat(Constants): ExecuteCollectionFormat.OGC_MAP, ExecuteCollectionFormat.GEOJSON, ] + + +def parse_prefer_header_return(headers): + # type: (AnyHeadersContainer) -> Optional[ExecuteReturnPreference] + """ + Get the return preference if specified. + """ + prefer_header = get_header("prefer", headers) + prefer_params = parse_kvp(prefer_header) + prefer_return = prefer_params.get("return") + if prefer_return: + return ExecuteReturnPreference.get(prefer_return[0]) + + +def parse_prefer_header_execute_mode( + header_container, # type: AnyHeadersContainer + supported_modes=None, # type: Optional[List[AnyExecuteControlOption]] + wait_max=10, # type: int +): # type: (...) -> Tuple[AnyExecuteMode, Optional[int], HeadersType] + """ + Obtain execution preference if provided in request headers. + + .. seealso:: + - :term:`OGC API - Processes`: Core, Execution mode < + https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execution_mode>`_. + This defines all conditions how to handle ``Prefer`` against applicable :term:`Process` description. + - :rfc:`7240#section-4.1` HTTP Prefer header ``respond-async`` + + .. seealso:: + If ``Prefer`` format is valid, but server decides it cannot be respected, it can be transparently ignored + (:rfc:`7240#section-2`). The server must respond with ``Preference-Applied`` indicating preserved preferences + it decided to respect. + + :param header_container: Request headers to retrieve preference, if any available. + :param supported_modes: + Execute modes that are permitted for the operation that received the ``Prefer`` header. + Resolved mode will respect this constraint following specification requirements of :term:`OGC API - Processes`. + :param wait_max: + Maximum wait time enforced by the server. If requested wait time is greater, ``wait`` preference will not be + applied and will fall back to asynchronous response. + :return: + Tuple of resolved execution mode, wait time if specified, and header of applied preferences if possible. + Maximum wait time indicates duration until synchronous response should fall back to asynchronous response. + :raises HTTPBadRequest: If contents of ``Prefer`` are not valid. + """ + + prefer = get_header("prefer", header_container) + relevant_modes = {ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC} + supported_modes = list(set(supported_modes or []).intersection(relevant_modes)) + + if not prefer: + # /req/core/process-execute-default-execution-mode (A & B) + if not supported_modes: + return ExecuteMode.ASYNC, None, {} # Weaver's default + if len(supported_modes) == 1: + mode = ExecuteMode.ASYNC if supported_modes[0] == ExecuteControlOption.ASYNC else ExecuteMode.SYNC + wait = None if mode == ExecuteMode.ASYNC else wait_max + return mode, wait, {} + # /req/core/process-execute-default-execution-mode (C) + return ExecuteMode.SYNC, wait_max, {} + + params = parse_kvp(prefer, pair_sep=",", multi_value_sep=None) + wait = wait_max + if "wait" in params: + try: + if any(param.isnumeric() for param in params): + # 'wait=x,y,z' parsed as 'wait=x' and 'y' / 'z' parameters on their own + # since 'wait' is the only referenced that users integers, it is guaranteed to be a misuse + raise ValueError("Invalid 'wait' with comma-separated values.") + if not len(params["wait"]) == 1: + raise ValueError("Too many values.") + wait = params["wait"][0] + if not str.isnumeric(wait) or "." in wait or wait.startswith("-"): + raise ValueError("Invalid integer for 'wait' in seconds.") + wait = int(wait) + except (TypeError, ValueError) as exc: + raise HTTPBadRequest(json={ + "code": "InvalidParameterValue", + "description": "HTTP Prefer header contains invalid 'wait' definition.", + "error": type(exc).__name__, + "cause": str(exc), + "value": str(params["wait"]), + }) + + if wait > wait_max: + LOGGER.info("Requested Prefer wait header too large (%ss > %ss), revert to async execution.", wait, wait_max) + return ExecuteMode.ASYNC, None, {} + + auto = ExecuteMode.ASYNC if "respond-async" in params else ExecuteMode.SYNC + applied_preferences = [] + # /req/core/process-execute-auto-execution-mode (A & B) + if len(supported_modes) == 1: + # supported mode is enforced, only indicate if it matches preferences to honour them + # otherwise, server is allowed to discard preference since it cannot be honoured + mode = ExecuteMode.ASYNC if supported_modes[0] == ExecuteControlOption.ASYNC else ExecuteMode.SYNC + wait = None if mode == ExecuteMode.ASYNC else wait_max + if auto == mode: + if auto == ExecuteMode.ASYNC: + applied_preferences.append("respond-async") + if wait and "wait" in params: + applied_preferences.append(f"wait={wait}") + # /rec/core/process-execute-honor-prefer (A: async & B: wait) + # https://datatracker.ietf.org/doc/html/rfc7240#section-3 + applied = {} + if applied_preferences: + applied = {"Preference-Applied": ", ".join(applied_preferences)} + return mode, wait, applied + + # Weaver's default, at server's discretion when both mode are supported + # /req/core/process-execute-auto-execution-mode (C) + if len(supported_modes) == 2: + if auto == ExecuteMode.ASYNC: + return ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"} + if wait and "wait" in params: + return ExecuteMode.SYNC, wait, {"Preference-Applied": f"wait={wait}"} + if wait: # default used, not a supplied preference + return ExecuteMode.SYNC, wait, {} + return ExecuteMode.ASYNC, None, {} diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index e63f9bd20..ade7181d1 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -15,7 +15,7 @@ from weaver.database import get_db from weaver.datatype import Process, Service -from weaver.execute import ExecuteControlOption, ExecuteMode +from weaver.execute import ExecuteControlOption, ExecuteMode, parse_prefer_header_execute_mode from weaver.formats import AcceptLanguage, ContentType, clean_media_type_format, map_cwl_media_type, repr_json from weaver.notify import map_job_subscribers, notify_job_subscribers from weaver.owsexceptions import OWSInvalidParameterValue, OWSNoApplicableCode @@ -43,7 +43,6 @@ now, parse_kvp, parse_number_with_unit, - parse_prefer_header_execute_mode, raise_on_xml_exception, wait_secs ) diff --git a/weaver/typedefs.py b/weaver/typedefs.py index ae77096d5..4ad1469b6 100644 --- a/weaver/typedefs.py +++ b/weaver/typedefs.py @@ -523,6 +523,16 @@ class CWL_SchemaName(Protocol): "successEmail": NotRequired[str], "inProgressEmail": NotRequired[str], }, total=True) + JobStatusResponse = TypedDict("JobStatusResponse", { + "status": Required[AnyStatusType], + "type": Required[Literal["process", "provider"]], + "id": NotRequired[str], # TBD alternative to 'jobID' considered by SWG + "jobID": Required[str], + "processID": Required[str], + "providerID": NotRequired[Optional[str]], + "links": NotRequired[List[Link]], + # many other fields... only listing accessed ones in code + }, total=False) # when schema='weaver.processes.constants.ProcessSchema.OGC' ExecutionInputsMap = Dict[str, Union[AnyValueType, JobValueObject, List[JobValueObject]]] diff --git a/weaver/utils.py b/weaver/utils.py index 246d69a75..d6d39086d 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -65,7 +65,6 @@ from weaver.base import Constants, ExtendedEnum from weaver.compat import Version from weaver.exceptions import WeaverException -from weaver.execute import ExecuteControlOption, ExecuteMode from weaver.formats import ContentType, get_content_type, get_extension, get_format, repr_json from weaver.status import map_status from weaver.warning import TimeZoneInfoAlreadySetWarning, UndefinedContainerWarning @@ -800,112 +799,6 @@ def parse_kvp(query, # type: str return kvp -def parse_prefer_header_execute_mode( - header_container, # type: AnyHeadersContainer - supported_modes=None, # type: Optional[List[AnyExecuteControlOption]] - wait_max=10, # type: int -): # type: (...) -> Tuple[AnyExecuteMode, Optional[int], HeadersType] - """ - Obtain execution preference if provided in request headers. - - .. seealso:: - - :term:`OGC API - Processes`: Core, Execution mode < - https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execution_mode>`_. - This defines all conditions how to handle ``Prefer`` against applicable :term:`Process` description. - - :rfc:`7240#section-4.1` HTTP Prefer header ``respond-async`` - - .. seealso:: - If ``Prefer`` format is valid, but server decides it cannot be respected, it can be transparently ignored - (:rfc:`7240#section-2`). The server must respond with ``Preference-Applied`` indicating preserved preferences - it decided to respect. - - :param header_container: Request headers to retrieve preference, if any available. - :param supported_modes: - Execute modes that are permitted for the operation that received the ``Prefer`` header. - Resolved mode will respect this constraint following specification requirements of :term:`OGC API - Processes`. - :param wait_max: - Maximum wait time enforced by the server. If requested wait time is greater, ``wait`` preference will not be - applied and will fall back to asynchronous response. - :return: - Tuple of resolved execution mode, wait time if specified, and header of applied preferences if possible. - Maximum wait time indicates duration until synchronous response should fall back to asynchronous response. - :raises HTTPBadRequest: If contents of ``Prefer`` are not valid. - """ - - prefer = get_header("prefer", header_container) - relevant_modes = {ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC} - supported_modes = list(set(supported_modes or []).intersection(relevant_modes)) - - if not prefer: - # /req/core/process-execute-default-execution-mode (A & B) - if not supported_modes: - return ExecuteMode.ASYNC, None, {} # Weaver's default - if len(supported_modes) == 1: - mode = ExecuteMode.ASYNC if supported_modes[0] == ExecuteControlOption.ASYNC else ExecuteMode.SYNC - wait = None if mode == ExecuteMode.ASYNC else wait_max - return mode, wait, {} - # /req/core/process-execute-default-execution-mode (C) - return ExecuteMode.SYNC, wait_max, {} - - params = parse_kvp(prefer, pair_sep=",", multi_value_sep=None) - wait = wait_max - if "wait" in params: - try: - if any(param.isnumeric() for param in params): - # 'wait=x,y,z' parsed as 'wait=x' and 'y' / 'z' parameters on their own - # since 'wait' is the only referenced that users integers, it is guaranteed to be a misuse - raise ValueError("Invalid 'wait' with comma-separated values.") - if not len(params["wait"]) == 1: - raise ValueError("Too many values.") - wait = params["wait"][0] - if not str.isnumeric(wait) or "." in wait or wait.startswith("-"): - raise ValueError("Invalid integer for 'wait' in seconds.") - wait = int(wait) - except (TypeError, ValueError) as exc: - raise HTTPBadRequest(json={ - "code": "InvalidParameterValue", - "description": "HTTP Prefer header contains invalid 'wait' definition.", - "error": type(exc).__name__, - "cause": str(exc), - "value": str(params["wait"]), - }) - - if wait > wait_max: - LOGGER.info("Requested Prefer wait header too large (%ss > %ss), revert to async execution.", wait, wait_max) - return ExecuteMode.ASYNC, None, {} - - auto = ExecuteMode.ASYNC if "respond-async" in params else ExecuteMode.SYNC - applied_preferences = [] - # /req/core/process-execute-auto-execution-mode (A & B) - if len(supported_modes) == 1: - # supported mode is enforced, only indicate if it matches preferences to honour them - # otherwise, server is allowed to discard preference since it cannot be honoured - mode = ExecuteMode.ASYNC if supported_modes[0] == ExecuteControlOption.ASYNC else ExecuteMode.SYNC - wait = None if mode == ExecuteMode.ASYNC else wait_max - if auto == mode: - if auto == ExecuteMode.ASYNC: - applied_preferences.append("respond-async") - if wait and "wait" in params: - applied_preferences.append(f"wait={wait}") - # /rec/core/process-execute-honor-prefer (A: async & B: wait) - # https://datatracker.ietf.org/doc/html/rfc7240#section-3 - applied = {} - if applied_preferences: - applied = {"Preference-Applied": ", ".join(applied_preferences)} - return mode, wait, applied - - # Weaver's default, at server's discretion when both mode are supported - # /req/core/process-execute-auto-execution-mode (C) - if len(supported_modes) == 2: - if auto == ExecuteMode.ASYNC: - return ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"} - if wait and "wait" in params: - return ExecuteMode.SYNC, wait, {"Preference-Applied": f"wait={wait}"} - if wait: # default used, not a supplied preference - return ExecuteMode.SYNC, wait, {} - return ExecuteMode.ASYNC, None, {} - - def get_url_without_query(url): # type: (Union[str, ParseResult]) -> str """ diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 2f9c4e321..e9a9f4f33 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -31,7 +31,7 @@ ServiceNotAccessible, ServiceNotFound ) -from weaver.execute import ExecuteResponse, ExecuteTransmissionMode +from weaver.execute import ExecuteResponse, ExecuteTransmissionMode, parse_prefer_header_return, ExecuteReturnPreference from weaver.formats import ContentType, get_format, repr_json from weaver.owsexceptions import OWSNoApplicableCode, OWSNotFound from weaver.processes.constants import JobInputsOutputsSchema @@ -57,7 +57,7 @@ from weaver.wps_restapi.providers.utils import forbid_local_only if TYPE_CHECKING: - from typing import Dict, List, Optional, Tuple, Union + from typing import Any, Dict, List, Optional, Tuple, Union from weaver.processes.constants import JobInputsOutputsSchemaType from weaver.typedefs import ( @@ -441,24 +441,69 @@ def get_results( # pylint: disable=R1260 return outputs, headers -def get_job_results_response(job, container, headers=None): - # type: (Job, AnySettingsContainer, Optional[AnyHeadersContainer]) -> AnyResponseType +def get_job_return(job, body=None, headers=None): + # type: (Job, Optional[JSON], Optional[AnyHeadersContainer]) -> ExecuteResponse + """ + Obtain the :term:`Job` result representation based on the resolution order of preferences and request parameters. + """ + body = body or {} + resp = ExecuteResponse.get(body.get("response")) + if resp: + return resp + + pref = parse_prefer_header_return(headers) + if pref == ExecuteReturnPreference.MINIMAL: + return ExecuteResponse.DOCUMENT + if pref == ExecuteReturnPreference.REPRESENTATION: + return ExecuteResponse.RAW + + return job.execution_response + + +def get_job_results_response( + job, # type: Job + container, # type: AnySettingsContainer + *, # type: Any + headers=None, # type: Optional[AnyHeadersContainer] + results_headers=None, # type: Optional[AnyHeadersContainer] + results_contents=None, # type: Optional[JSON] +): # type: (...) -> AnyResponseType """ Generates the :term:`OGC` compliant :term:`Job` results response according to submitted execution parameters. Parameters that impact the format of the response are: - - Amount of outputs to be returned. - - Parameter ``response: raw|document`` - - Parameter ``transmissionMode: value|reference`` per output if ``response: raw``. + - Body parameter ``outputs`` with the amount of *requested outputs* to be returned. + - Body parameter ``response: raw|document`` for content representation. + - Body parameter ``transmissionMode: value|reference`` per output. + - Header parameter ``Prefer: return=representation|minimal`` for content representation. + - Overrides, for any of the previous parameters, allowing request of an alternate representation. + + Resolution order/priority: + + 1. :paramref:`override_contents` + 2. :paramref:`override_headers` + 3. :paramref:`job` definitions + + The logic of the resolution order is that any body parameters resolving to an equivalent information provided + by header parameters will be more important, since ``Prefer`` are *soft* requirements, whereas body parameters + are *hard* requirements. The parameters stored in the :paramref:`job` are defined during :term:`Job` submission, + which become the "default" results representation if requested as is. If further parameters are provided to + override during the results request, they modify the "default" results representation. In this case, an header + provided in the results request overrides the body parameters from the original :term:`Job`, since their results + request context is "closer" than the ones at the time of the :term:`Job` submission. .. seealso:: More details available for each combination: - https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execute_response - https://docs.ogc.org/is/18-062r2/18-062r2.html#_response_7 + - :ref:`proc_op_job_results` + - :ref:`proc_exec_results` - :param job: Job for which to generate the results response. + :param job: Job for which to generate the results response, which contains the originally submitted parameters. :param container: Application settings. :param headers: Additional headers to provide in the response. + :param results_headers: Headers that override originally submitted job parameters when requesting results. + :param results_contents: Body contents that override originally submitted job parameters when requesting results. """ raise_job_dismissed(job, container) raise_job_bad_status(job, container) @@ -467,7 +512,7 @@ def get_job_results_response(job, container, headers=None): # See: # - https://docs.ogc.org/is/18-062r2/18-062r2.html#_response_7 (/req/core/job-results-async-document) # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-document - is_raw = job.execution_response == ExecuteResponse.RAW + is_raw = get_job_return(job, results_contents, results_headers) == ExecuteResponse.RAW results, refs = get_results(job, container, value_key="value", schema=JobInputsOutputsSchema.OGC, # not strict to provide more format details link_references=is_raw) @@ -477,7 +522,7 @@ def get_job_results_response(job, container, headers=None): if not is_raw: try: - results_schema = sd.Result() + results_schema = sd.ResultsDocument() results_json = results_schema.deserialize(results) if len(results_json) != len(results): # pragma: no cover # ensure no outputs silently dismissed raise colander.Invalid( @@ -547,18 +592,21 @@ def get_job_results_response(job, container, headers=None): def get_job_submission_response(body, headers, error=False): - # type: (JSON, AnyHeadersContainer, bool) -> Union[HTTPOk, HTTPCreated] + # type: (JSON, AnyHeadersContainer, bool) -> Union[HTTPOk, HTTPCreated, HTTPBadRequest] """ - Generates the successful response from contents returned by :term:`Job` submission process. + Generates the response contents returned by :term:`Job` submission process. If :term:`Job` already finished processing within requested ``Prefer: wait=X`` seconds delay (and if allowed by the :term:`Process` ``jobControlOptions``), return the successful status immediately instead of created status. + If the status is not successful, return the failed :term:`Job` status response. + Otherwise, return the status monitoring location of the created :term:`Job` to be monitored asynchronously. .. seealso:: - :func:`weaver.processes.execution.submit_job` - :func:`weaver.processes.execution.submit_job_handler` + - :func:`weaver.processes.execution.submit_job` + - :func:`weaver.processes.execution.submit_job_handler` + - :ref:`proc_op_job_status` """ # convert headers to pass as list to avoid any duplicate Content-related headers # otherwise auto-added by JSON handling when provided by dict-like structure diff --git a/weaver/wps_restapi/quotation/quotes.py b/weaver/wps_restapi/quotation/quotes.py index 49fd4c828..8cc0c3165 100644 --- a/weaver/wps_restapi/quotation/quotes.py +++ b/weaver/wps_restapi/quotation/quotes.py @@ -10,7 +10,7 @@ from weaver.database import get_db from weaver.datatype import Bill, Quote from weaver.exceptions import log_unhandled_exceptions -from weaver.execute import ExecuteMode +from weaver.execute import ExecuteMode, parse_prefer_header_execute_mode from weaver.formats import ContentType, OutputFormat from weaver.owsexceptions import OWSInvalidParameterValue from weaver.processes.execution import validate_process_io @@ -24,7 +24,7 @@ ) from weaver.sort import Sort from weaver.store.base import StoreBills, StoreProcesses, StoreQuotes -from weaver.utils import as_int, get_header, get_settings, parse_prefer_header_execute_mode +from weaver.utils import as_int, get_header, get_settings from weaver.wps_restapi import swagger_definitions as sd from weaver.wps_restapi.processes.processes import submit_local_job from weaver.wps_restapi.quotation.utils import get_quote diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 11b46305d..29816937e 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -5924,10 +5924,8 @@ class ResultData(OneOfKeywordSchema): ] -class Result(ExtendedMappingSchema): - """ - Result outputs obtained from a successful process job execution. - """ +class ResultsDocument(ExtendedMappingSchema): + description = "Results representation as JSON document." _schema = f"{OGC_API_PROC_PART1_SCHEMAS}/results.yaml" output_id = ResultData( variable="{output-id}", title="ResultData", @@ -5938,6 +5936,19 @@ class Result(ExtendedMappingSchema): ) +class ResultsContent(ExtendedSchemaNode): + description = "Results representation as literal contents." + schema_type = String() + + +class ResultsBody(OneOfKeywordSchema): + description = "Results obtained from a successful process job execution." + _one_of = [ + ResultsDocument(), + ResultsContent(), + ] + + class JobInputsBody(ExecuteInputOutputs): links = LinkList(missing=drop) @@ -7240,7 +7251,7 @@ class RedirectResultResponse(ExtendedMappingSchema): class OkGetJobResultsResponse(ExtendedMappingSchema): _schema = f"{OGC_API_PROC_PART1_RESPONSES}/Results.yaml" header = ResponseHeaders() - body = Result() + body = ResultsBody() class NoContentJobResultsHeaders(NoContent): From 46cccee4fd3465191938847a98d8d0527736b6c6 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 19 Sep 2024 22:00:20 -0400 Subject: [PATCH 18/75] [wip] more job result tests + validate accept header on job submit + planned test for result by ID with accept (relates to #18) --- tests/functional/test_celery.py | 14 +- tests/functional/test_cli.py | 7 +- tests/functional/test_wps_package.py | 363 +++++++++++++++++++--- tests/wps_restapi/test_jobs.py | 6 +- weaver/datatype.py | 12 + weaver/processes/execution.py | 57 +++- weaver/store/base.py | 1 + weaver/store/mongodb.py | 2 + weaver/wps_restapi/processes/processes.py | 16 +- weaver/wps_restapi/swagger_definitions.py | 7 +- 10 files changed, 414 insertions(+), 71 deletions(-) diff --git a/tests/functional/test_celery.py b/tests/functional/test_celery.py index 92ae3261f..ad810f155 100644 --- a/tests/functional/test_celery.py +++ b/tests/functional/test_celery.py @@ -50,16 +50,10 @@ def test_celery_registry_resolution(): settings = get_settings_from_testapp(webapp) wps_url = get_wps_url(settings) job_store = get_db(settings).get_store("jobs") - job1 = job_store.save_job( - task_id="tmp", - process="jsonarray2netcdf", - inputs={"input": {"href": "http://random-dont-care.com/fake.json"}}, - ) - job2 = job_store.save_job( - task_id="tmp", - process="jsonarray2netcdf", - inputs={"input": {"href": "http://random-dont-care.com/fake.json"}}, - ) + job1 = job_store.save_job(task_id="tmp", process="jsonarray2netcdf", + inputs={"input": {"href": "http://random-dont-care.com/fake.json"}}) + job2 = job_store.save_job(task_id="tmp", process="jsonarray2netcdf", + inputs={"input": {"href": "http://random-dont-care.com/fake.json"}}) with contextlib.ExitStack() as stack: celery_mongo_broker = f"""mongodb://{settings["mongodb.host"]}:{settings["mongodb.port"]}/celery-test""" diff --git a/tests/functional/test_cli.py b/tests/functional/test_cli.py index a34341626..5fb99a7be 100644 --- a/tests/functional/test_cli.py +++ b/tests/functional/test_cli.py @@ -770,8 +770,8 @@ def test_jobs_search_multi_status(self): class TestWeaverCLI(TestWeaverClientBase): def setUp(self): super(TestWeaverCLI, self).setUp() - job = self.job_store.save_job(task_id="12345678-1111-2222-3333-111122223333", - process="fake-process", access=Visibility.PUBLIC) + job = self.job_store.save_job(task_id="12345678-1111-2222-3333-111122223333", process="fake-process", + access=Visibility.PUBLIC) job.status = Status.SUCCEEDED self.test_job = self.job_store.update_job(job) @@ -2001,8 +2001,7 @@ def test_jobs_filter_process_provider(self): svc = self.service_store.save_service(Service(name="random", url="https://random.com", public=True)) proc = self.test_process["Echo"] job1 = self.job_store.save_job(task_id=uuid.uuid4(), process=proc, access=Visibility.PUBLIC) - job2 = self.job_store.save_job(task_id=uuid.uuid4(), process=proc, service=svc.name, - access=Visibility.PUBLIC) + job2 = self.job_store.save_job(task_id=uuid.uuid4(), process=proc, service=svc.name, access=Visibility.PUBLIC) self.job_store.save_job(task_id=uuid.uuid4(), process="CatFile", access=Visibility.PUBLIC) lines = mocked_sub_requests( diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index c5f4b52e3..8c82464f9 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -69,11 +69,12 @@ CWL_REQUIREMENT_INLINE_JAVASCRIPT, CWL_REQUIREMENT_RESOURCE, CWL_REQUIREMENT_SECRETS, + JobInputsOutputsSchema, ProcessSchema ) from weaver.processes.types import ProcessType from weaver.status import Status -from weaver.utils import fetch_file, get_any_value, get_path_kvp, load_file, parse_kvp +from weaver.utils import fetch_file, get_any_value, get_header, get_path_kvp, is_uuid, load_file, parse_kvp from weaver.wps.utils import get_wps_output_dir, get_wps_output_url, map_wps_output_location from weaver.wps_restapi import swagger_definitions as sd @@ -3540,6 +3541,22 @@ def setUpClass(cls) -> None: def setUp(self) -> None: self.process_store.clear_processes() + + @staticmethod + def remove_result_format(results): + """ + Remove the results ``format`` property to simplify test comparions. + + For backward compatibility, the ``format`` property is inserted in result definitions when represented + as :term:`JSON`, on top of the :term:`OGC` compliant ``type``, ``mediaType``, etc. of the "format" schema + for qualified values and link references. + """ + if not results or not isinstance(results, dict): + return results + for result in results.values(): + if isinstance(result, dict): + result.pop("format", None) + return results def test_execute_single_output_prefer_header_return_representation_literal(self): proc = "EchoResultsTester" @@ -3576,7 +3593,7 @@ def test_execute_single_output_prefer_header_return_representation_literal(self) results = self.app.get(f"/jobs/{job_id}/results") assert results.content_type.startswith(ContentType.TEXT_PLAIN) assert results.text == "test" - outputs = self.app.get(f"/jobs/{job_id}/outputs") + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json == { "output_data": "test", @@ -3619,7 +3636,7 @@ def test_execute_single_output_prefer_header_return_representation_complex(self) output_json = json.dumps({"data": "test"}, separators=(",", ":")) assert results.content_type.startswith(ContentType.APP_JSON) assert results.text == output_json - outputs = self.app.get(f"/jobs/{job_id}/outputs") + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json == { "output_json": { @@ -3660,11 +3677,10 @@ def test_execute_single_output_prefer_header_return_minimal_literal(self): assert status["status"] == Status.SUCCEEDED job_id = status["jobID"] - out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") assert results.content_type.startswith(ContentType.TEXT_PLAIN) assert results.text == "test" - outputs = self.app.get(f"/jobs/{job_id}/outputs") + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json == { "output_data": "test", @@ -3704,11 +3720,11 @@ def test_execute_single_output_prefer_header_return_minimal_complex(self): job_id = status["jobID"] out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") - output_json = json.dumps({"data": "test"}, separators=(",", ":")) assert results.status_code == 204, "No contents expected for minimal reference result." + assert results.body == b"" assert results.content_type.startswith(ContentType.APP_JSON) - assert results.text == output_json - outputs = self.app.get(f"/jobs/{job_id}/outputs") + assert results.headers["Content-Location"] == f"{out_url}/{job_id}/output_json/output.json" + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json == { "output_json": { @@ -3753,7 +3769,7 @@ def test_execute_single_output_response_raw_value_literal(self): results = self.app.get(f"/jobs/{job_id}/results") assert results.content_type.startswith(ContentType.TEXT_PLAIN) assert results.text == "test" - outputs = self.app.get(f"/jobs/{job_id}/outputs") + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { "output_data": "test", @@ -3795,7 +3811,7 @@ def test_execute_single_output_response_raw_value_complex(self): results = self.app.get(f"/jobs/{job_id}/results") assert results.content_type.startswith(ContentType.APP_JSON) assert results.json == {"data": "test"} - outputs = self.app.get(f"/jobs/{job_id}/outputs") + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) output_json = json.dumps({"data": "test"}, separators=(",", ":")) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { @@ -3844,7 +3860,7 @@ def test_execute_single_output_response_raw_reference_literal(self): assert results.body == b"" assert results.content_type.startswith(ContentType.TEXT_PLAIN) assert results.headers["Content-Location"] == f"{out_url}/{job_id}/output_data/output.txt" - outputs = self.app.get(f"/jobs/{job_id}/outputs") + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { "output_data": "test", @@ -3889,7 +3905,7 @@ def test_execute_single_output_response_raw_reference_complex(self): assert results.body == b"" assert results.content_type.startswith(ContentType.APP_JSON) assert results.headers["Content-Location"] == f"{out_url}/{job_id}/output_json/output.json" - outputs = self.app.get(f"/jobs/{job_id}/outputs") + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { "output_json": { @@ -3898,8 +3914,18 @@ def test_execute_single_output_response_raw_reference_complex(self): }, } - # FIXME: Should this be permitted? Technically, a multipart of 1 bounded contents is valid... - def test_execute_single_output_multipart_error(self): + def test_execute_single_output_multipart_accept_data(self): + """ + Validate that requesting multipart for a single output is permitted. + + Although somewhat counter-productive to wrap a single output as multipart, this is technically permitted. + This can be used to "normalize" the response to always be multipart, regardless of the amount outputs + produced by the process job. The output format should be contained within the part. + + .. seealso:: + - :func:`test_execute_single_output_multipart_accept_link` + - :func:`test_execute_single_output_multipart_accept_alt_format` + """ proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) @@ -3918,7 +3944,7 @@ def test_execute_single_output_multipart_error(self): "message": "test" }, "outputs": { - "output_data": {} + "output_json": {"transmissionMode": ExecuteTransmissionMode.VALUE} } } with contextlib.ExitStack() as stack: @@ -3927,14 +3953,189 @@ def test_execute_single_output_multipart_error(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 406, f"Expected error. Instead got: [{resp.status_code}]\nReason:\n{resp.json}" - assert resp.content_type == ContentType.APP_JSON, "Expect JSON instead of Multipart because of error." - assert resp.json["detail"] == "Multipart is not acceptable for single output results." - assert resp.json["value"] == ContentType.MULTIPART_MIXED - assert resp.json["cause"] == { - "name": "Accept", - "in": "headers", + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # rely on location that should be provided to find the job ID + results_url = get_header("Content-Location", resp.headers) + assert results_url, ( + "Content-Location should have been provided in" + "results response pointing at where they can be found." + ) + job_id = results_url.rsplit("/results")[0].rsplit("/jobs/")[-1] + assert is_uuid(job_id), f"Failed to retrieve the job ID: [{job_id}] is not a UUID" + + # validate the results based on original execution request + results = resp + assert ContentType.MULTIPART_MIXED in results.content_type + boundary = parse_kvp(results.content_type)["boundary"][0] + output_json = json.dumps({"data": "test"}, separators=(",", ":")) + results_body = inspect.cleandoc(f""" + --{boundary} + Content-Type: {ContentType.APP_JSON} + Content-ID: + + {output_json} + --{boundary}-- + """) + assert results.text == results_body + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_json": { + "href": f"{out_url}/{job_id}/output_json/output.json", + "type": ContentType.APP_JSON, + }, + } + + def test_execute_single_output_multipart_accept_link(self): + """ + Validate that requesting multipart for a single output is permitted. + + Embedded part contains the link instead of the data contents. + + .. seealso:: + - :func:`test_execute_single_output_multipart_accept_data` + - :func:`test_execute_single_output_multipart_accept_alt_format` + """ + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + # NOTE: + # no 'response' nor 'Prefer: return' to ensure resolution is done by 'Accept' header + # without 'Accept' using multipart, it is expected that JSON document is used + exec_headers = { + "Accept": ContentType.MULTIPART_MIXED, + "Content-Type": ContentType.APP_JSON, + } + exec_content = { + "mode": ExecuteMode.SYNC, # WARNING: force sync to make sure JSON job status is not returned instead + "inputs": { + "message": "test" + }, + "outputs": { + "output_json": {"transmissionMode": ExecuteTransmissionMode.REFERENCE} + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # rely on location that should be provided to find the job ID + results_url = get_header("Content-Location", resp.headers) + assert results_url, ( + "Content-Location should have been provided in" + "results response pointing at where they can be found." + ) + job_id = results_url.rsplit("/results")[0].rsplit("/jobs/")[-1] + assert is_uuid(job_id), f"Failed to retrieve the job ID: [{job_id}] is not a UUID" + out_url = get_wps_output_url(self.settings) + + # validate the results based on original execution request + results = resp + assert ContentType.MULTIPART_MIXED in results.content_type + boundary = parse_kvp(results.content_type)["boundary"][0] + results_body = inspect.cleandoc(f""" + --{boundary} + Content-Type: {ContentType.APP_JSON} + Content-ID: + Content-Location: {out_url}/{job_id}/output_json/output.json + --{boundary}-- + """) + assert results.text == results_body + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_json": { + "href": f"{out_url}/{job_id}/output_json/output.json", + "type": ContentType.APP_JSON, + }, + } + + # FIXME: implement (https://github.com/crim-ca/weaver/pull/548) + @pytest.mark.xfail(reason="not implemented") + def test_execute_single_output_multipart_accept_alt_format(self): + """ + Validate the returned contents combining an ``Accept`` header as ``multipart`` and a ``format`` in ``outputs``. + + The main contents of the response should be ``multipart``, but the nested contents should be the transformed + output representation, based on the ``format`` definition. + """ + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Accept": ContentType.MULTIPART_MIXED, + "Content-Type": ContentType.APP_JSON, + } + exec_content = { + "mode": ExecuteMode.SYNC, # WARNING: force sync to make sure JSON job status is not returned instead + "inputs": { + "message": "test" + }, + "outputs": { + "output_json": { + "transmissionMode": ExecuteTransmissionMode.VALUE, # embed in the part contents + "format": {"mediaType": ContentType.APP_YAML}, # request alternate output format + } } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # rely on location that should be provided to find the job ID + results_url = get_header("Content-Location", resp.headers) + assert results_url, ( + "Content-Location should have been provided in" + "results response pointing at where they can be found." + ) + job_id = results_url.rsplit("/results")[0].rsplit("/jobs/")[-1] + assert is_uuid(job_id), f"Failed to retrieve the job ID: [{job_id}] is not a UUID" + out_url = get_wps_output_url(self.settings) + + # validate the results based on original execution request + results = resp + assert ContentType.MULTIPART_MIXED in results.content_type + boundary = parse_kvp(results.content_type)["boundary"][0] + output_json_as_yaml = yaml.safe_dump({"data": "test"}) + results_body = inspect.cleandoc(f""" + --{boundary} + Content-Type: {ContentType.APP_YAML} + Content-ID: + + {output_json_as_yaml} + --{boundary}-- + """) + assert results.content_type.startswith(ContentType.MULTIPART_MIXED) + assert results.text == results_body + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_data": "test", + "output_json": { + "href": f"{out_url}/{job_id}/output_json/output.yml", + "type": ContentType.APP_YAML, + }, + } + + # validate the results can be obtained with the "real" representation + result_json = self.app.get(f"/jobs/{job_id}/results/output_json", headers=self.json_headers) + output_json = json.dumps({"data": "test"}, separators=(",", ":")) + assert result_json.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert result_json.content_type == ContentType.APP_JSON + assert result_json.text == output_json @parameterized.expand([ ContentType.MULTIPART_ANY, @@ -3943,6 +4144,10 @@ def test_execute_single_output_multipart_error(self): def test_execute_multi_output_multipart_accept(self, multipart_header): """ Requesting ``multipart`` explicitly should return it instead of default :term:`JSON` ``document`` response. + + .. seealso:: + - :func:`test_execute_multi_output_multipart_accept_async_alt_acceptable` + - :func:`test_execute_multi_output_multipart_accept_async_not_acceptable` """ proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) @@ -3950,12 +4155,13 @@ def test_execute_multi_output_multipart_accept(self, multipart_header): self.deploy_process(body, process_id=p_id) # NOTE: - # no 'response' nor 'Prefer: return' to ensure resolution is done by 'Accept' header + # No 'response' nor 'Prefer: return' to ensure resolution is done by 'Accept' header # without 'Accept' using multipart, it is expected that JSON document is used + # Also, use 'Prefer: wait' to avoid 'respond-async', since async always respond with the Job status. exec_headers = { "Accept": multipart_header, "Content-Type": ContentType.APP_JSON, - "Prefer": "respond-async", + "Prefer": "wait=5", } exec_content = { "inputs": { @@ -4000,7 +4206,7 @@ def test_execute_multi_output_multipart_accept(self, multipart_header): """) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) assert results.text == results_body - outputs = self.app.get(f"/jobs/{job_id}/outputs") + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { "output_data": "test", @@ -4010,6 +4216,85 @@ def test_execute_multi_output_multipart_accept(self, multipart_header): }, } + def test_execute_multi_output_multipart_accept_async_not_acceptable(self): + """ + When executing the process asynchronously, ``Accept`` with multipart (strictly) is not acceptable. + + Because async requires to respond a Job Status, the ``Accept`` actually refers to that response, + rather than a results response as returned directly in sync. + + .. seealso:: + - :func:`test_execute_multi_output_multipart_accept` + - :func:`test_execute_multi_output_multipart_accept_async_alt_acceptable` + """ + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Accept": ContentType.MULTIPART_MIXED, + "Content-Type": ContentType.APP_JSON, + "Prefer": "respond-async", + } + exec_content = { + "inputs": { + "message": "test" + }, + "outputs": {} + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 406, f"Expected error. Instead got: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.content_type == ContentType.APP_JSON, "Expect JSON instead of Multipart because of error." + assert "Accept header" in resp.json["detail"] + assert resp.json["value"] == ContentType.MULTIPART_MIXED + assert resp.json["cause"] == { + "name": "Accept", + "in": "headers", + } + + def test_execute_multi_output_multipart_accept_async_alt_acceptable(self): + """ + When executing the process asynchronously, ``Accept`` with multipart and an alternative is acceptable. + + Because async requires to respond a Job Status, the ``Accept`` actually refers to that response, + rather than a results response as returned directly in sync. + + .. seealso:: + - :func:`test_execute_multi_output_multipart_accept` + - :func:`test_execute_multi_output_multipart_accept_async_not_acceptable` + """ + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Accept": f"{ContentType.MULTIPART_MIXED}, {ContentType.APP_JSON}", + "Content-Type": ContentType.APP_JSON, + "Prefer": "respond-async", + } + exec_content = { + "inputs": { + "message": "test" + }, + "outputs": {} + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.content_type == ContentType.APP_JSON, "Expect JSON instead of Multipart because of error." + assert "status" in resp.json, "Expected a JSON Job Status response." + def test_execute_multi_output_prefer_header_return_representation(self): proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) @@ -4065,7 +4350,7 @@ def test_execute_multi_output_prefer_header_return_representation(self): """) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) assert results.text == results_body - outputs = self.app.get(f"/jobs/{job_id}/outputs") + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { "output_data": "test", @@ -4128,7 +4413,7 @@ def test_execute_multi_output_response_raw_value(self): """) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) assert results.text == results_body - outputs = self.app.get(f"/jobs/{job_id}/outputs") + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { "output_data": "test", @@ -4188,7 +4473,7 @@ def test_execute_multi_output_response_raw_reference(self): """) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) assert results.text == results_body - outputs = self.app.get(f"/jobs/{job_id}/outputs") + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { "output_data": "test", @@ -4256,7 +4541,7 @@ def test_execute_multi_output_response_raw_mixed(self): """) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) assert results.text == results_body - outputs = self.app.get(f"/jobs/{job_id}/outputs") + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { "output_data": "test", @@ -4310,15 +4595,16 @@ def test_execute_multi_output_prefer_header_return_minimal_defaults(self): job_id = status["jobID"] out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") + results_json = self.remove_result_format(results.json) assert results.content_type.startswith(ContentType.APP_JSON) - assert results.json == { + assert results_json == { "output_data": "test", "output_json": { "href": f"{out_url}/{job_id}/output_json/output.json", "type": ContentType.APP_JSON, }, } - outputs = self.app.get(f"/jobs/{job_id}/outputs") + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { "output_data": "test", @@ -4373,9 +4659,10 @@ def test_execute_multi_output_prefer_header_return_minimal_override_transmission job_id = status["jobID"] out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") + results_json = self.remove_result_format(results.json) output_json = json.dumps({"data": "test"}, separators=(",", ":")) assert results.content_type.startswith(ContentType.APP_JSON) - assert results.json == { + assert results_json == { "output_data": { "href": f"{out_url}/{job_id}/output_text/output.txt", "type": ContentType.TEXT_PLAIN, @@ -4389,7 +4676,7 @@ def test_execute_multi_output_prefer_header_return_minimal_override_transmission "type": ContentType.TEXT_PLAIN, }, } - outputs = self.app.get(f"/jobs/{job_id}/outputs") + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { "output_data": "test", @@ -4443,15 +4730,16 @@ def test_execute_multi_output_response_document_defaults(self): job_id = status["jobID"] out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") + results_json = self.remove_result_format(results.json) assert results.content_type.startswith(ContentType.APP_JSON) - assert results.json == { + assert results_json == { "output_data": "test", "output_json": { "href": f"{out_url}/{job_id}/output_json/output.json", "type": ContentType.APP_JSON, }, } - outputs = self.app.get(f"/jobs/{job_id}/outputs") + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { "output_data": "test", @@ -4503,9 +4791,10 @@ def test_execute_multi_output_response_document_mixed(self): job_id = status["jobID"] out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") + results_json = self.remove_result_format(results.json) output_json = json.dumps({"data": "test"}, separators=(",", ":")) assert results.content_type.startswith(ContentType.APP_JSON) - assert results.json == { + assert results_json == { "output_data": { "href": f"{out_url}/{job_id}/output_text/output.txt", "type": ContentType.TEXT_PLAIN, @@ -4519,7 +4808,7 @@ def test_execute_multi_output_response_document_mixed(self): "type": ContentType.TEXT_PLAIN, }, } - outputs = self.app.get(f"/jobs/{job_id}/outputs") + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { "output_data": "test", diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index 726a85480..b83151fb9 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -179,7 +179,7 @@ def make_job(self, if isinstance(created, str): created = date_parser.parse(created) job = self.job_store.save_job(task_id=task_id, process=process, service=service, is_workflow=False, - user_id=user_id, execute_async=True, access=access, created=created) + execute_async=True, user_id=user_id, access=access, created=created) job.status = status if status != Status.ACCEPTED: job.started = job.created + datetime.timedelta(seconds=offset if offset is not None else 0) @@ -1538,7 +1538,7 @@ def test_jobs_inputs_outputs_validations(self): job_none.pop("$id", None) assert job_none == { "inputs": {}, - "outputs": {}, + "outputs": None, "mode": ExecuteMode.AUTO, "response": ExecuteResponse.DOCUMENT } @@ -1578,7 +1578,7 @@ def test_jobs_inputs_outputs_validations(self): job_out_none.pop("$id", None) assert job_out_none == { "inputs": {"random": "ok"}, - "outputs": {}, + "outputs": None, "mode": ExecuteMode.AUTO, "response": ExecuteResponse.DOCUMENT } diff --git a/weaver/datatype.py b/weaver/datatype.py index bbff1a33f..2900c9944 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -1001,6 +1001,18 @@ def subscribers(self, subscribers): ) self["subscribers"] = subscribers or None + @property + def accept_type(self): + # type: () -> Optional[str] + return self.get("accept_type") + + @accept_type.setter + def accept_type(self, content_type): + # type: (Optional[Union[str]]) -> None + if not isinstance(content_type, str): + raise TypeError(f"Type 'str' is required for '{self.__name__}.accept_type'") + self["accept_type"] = content_type + @property def accept_language(self): # type: () -> Optional[str] diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index ade7181d1..c1d8740dd 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -75,6 +75,7 @@ from weaver.status import StatusType from weaver.typedefs import ( AnyAcceptLanguageHeader, + AnyHeadersContainer, AnyProcessRef, AnyResponseType, AnyServiceRef, @@ -697,7 +698,16 @@ def submit_job(request, reference, tags=None): accepts_lang = request.accept_language # type: AnyAcceptLanguageHeader matched_lang = accepts_lang.lookup(support_lang, default="") or None if lang and not matched_lang: - raise HTTPNotAcceptable(f"Requested language [{lang}] not in supported languages [{sorted(support_lang)}].") + raise HTTPNotAcceptable( + json=sd.ErrorJsonResponseBodySchema(schema_include=True).deserialize({ + "type": "NotAcceptable", + "title": "Execution request is not acceptable.", + "detail": f"Requested language [{lang}] not in supported languages [{sorted(support_lang)}].", + "status": HTTPNotAcceptable.code, + "cause": {"name": "Accept-Language", "in": "headers"}, + "value": repr_json(lang, force_string=False), + }) + ) lang = matched_lang elif isinstance(reference, Service): service_url = reference.url @@ -776,22 +786,25 @@ def submit_job_handler(payload, # type: ProcessExecution exec_max_wait = settings.get("weaver.execute_sync_max_wait", settings.get("weaver.exec_sync_max_wait")) exec_max_wait = as_int(exec_max_wait, default=20) mode, wait, applied = parse_prefer_header_execute_mode(headers, job_ctl_opts, exec_max_wait) - get_header("prefer", headers, pop=True) # don't care about value, just ensure removed with any header container if not applied: # whatever returned is a default, consider 'mode' in body as alternative is_execute_async = ExecuteMode.get(json_body.get("mode")) != ExecuteMode.SYNC # convert auto to async else: # as per https://datatracker.ietf.org/doc/html/rfc7240#section-2 # Prefer header not resolved with a valid value should still resume without error is_execute_async = mode != ExecuteMode.SYNC + accept_type = validate_job_accept_header(headers, mode) + get_header("prefer", headers, pop=True) # don't care about value, just ensure removed with any header container + exec_resp = json_body.get("response") subscribers = map_job_subscribers(json_body, settings) - + job_inputs = json_body.get("inputs") + job_outputs = json_body.get("outputs") store = db.get_store(StoreJobs) # type: StoreJobs job = store.save_job(task_id=Status.ACCEPTED, process=process, service=provider_id, - inputs=json_body.get("inputs"), outputs=json_body.get("outputs"), - is_local=is_local, is_workflow=is_workflow, access=visibility, user_id=user, context=context, - execute_async=is_execute_async, execute_response=exec_resp, - custom_tags=tags, accept_language=language, subscribers=subscribers) + inputs=job_inputs, outputs=job_outputs, is_workflow=is_workflow, is_local=is_local, + execute_async=is_execute_async, execute_response=exec_resp, custom_tags=tags, user_id=user, + access=visibility, context=context, subscribers=subscribers, + accept_type=accept_type, accept_language=language) job.save_log(logger=LOGGER, message="Job task submitted for execution.", status=Status.ACCEPTED, progress=0) job = store.update_job(job) location_url = job.status_url(settings) @@ -839,6 +852,36 @@ def submit_job_handler(payload, # type: ProcessExecution return resp +def validate_job_accept_header(headers, execution_mode): + # type: (AnyHeadersContainer, ExecuteMode) -> Optional[str] + """ + Validate that the submitted ``Accept`` header is permitted. + """ + accept = get_header("accept", headers) + if not accept: + return + # compare with 'in' to allow alternate types, one of which must be JSON for async + if ContentType.APP_JSON in accept: + return ContentType.APP_JSON + # anything always allowed in sync, since results returned directly + if execution_mode == ExecuteMode.SYNC: + return accept + raise HTTPNotAcceptable( + json=sd.ErrorJsonResponseBodySchema(schema_include=True).deserialize({ + "type": "NotAcceptable", + "title": "Execution request is not acceptable.", + "detail": ( + "When running asynchronously, the Accept header must correspond" + "to the Job Status response instead of the desired Result response" + "returned when executing synchronously." + ), + "status": HTTPNotAcceptable.code, + "cause": {"name": "Accept", "in": "headers"}, + "value": repr_json(accept, force_string=False), + }) + ) + + def validate_process_io(process, payload): # type: (Process, ProcessExecution) -> None """ diff --git a/weaver/store/base.py b/weaver/store/base.py index f4ada87e6..624557117 100644 --- a/weaver/store/base.py +++ b/weaver/store/base.py @@ -181,6 +181,7 @@ def save_job(self, access=None, # type: Optional[AnyVisibility] context=None, # type: Optional[str] subscribers=None, # type: Optional[ExecutionSubscribers] + accept_type=None, # type: Optional[str] accept_language=None, # type: Optional[str] created=None, # type: Optional[datetime.datetime] ): # type: (...) -> Job diff --git a/weaver/store/mongodb.py b/weaver/store/mongodb.py index 3b7c80cc4..568e0df08 100644 --- a/weaver/store/mongodb.py +++ b/weaver/store/mongodb.py @@ -797,6 +797,7 @@ def save_job(self, access=None, # type: Optional[AnyVisibility] context=None, # type: Optional[str] subscribers=None, # type: Optional[ExecutionSubscribers] + accept_type=None, # type: Optional[str] accept_language=None, # type: Optional[str] created=None, # type: Optional[datetime.datetime] ): # type: (...) -> Job @@ -837,6 +838,7 @@ def save_job(self, "access": access, "context": context, "subscribers": subscribers, + "accept_type": accept_type, "accept_language": accept_language, }) self.collection.insert_one(new_job.params()) diff --git a/weaver/wps_restapi/processes/processes.py b/weaver/wps_restapi/processes/processes.py index 05248e626..2ccf2f550 100644 --- a/weaver/wps_restapi/processes/processes.py +++ b/weaver/wps_restapi/processes/processes.py @@ -456,18 +456,18 @@ def delete_local_process(request): renderer=OutputFormat.JSON, response_schemas=sd.post_process_jobs_responses, ) -@sd.process_jobs_service.post( +@sd.process_execution_service.post( tags=[sd.TAG_PROCESSES, sd.TAG_EXECUTE, sd.TAG_JOBS], - content_type=list(ContentType.ANY_XML), - schema=sd.PostProcessJobsEndpointXML(), - accept=ContentType.APP_JSON, + content_type=ContentType.APP_JSON, + schema=sd.PostProcessJobsEndpointJSON(), + # accept=ContentType.APP_JSON, # allow anything for single-output or multipart combinations, validated downstream renderer=OutputFormat.JSON, response_schemas=sd.post_process_jobs_responses, ) -@sd.process_execution_service.post( +@sd.process_jobs_service.post( tags=[sd.TAG_PROCESSES, sd.TAG_EXECUTE, sd.TAG_JOBS], - content_type=ContentType.APP_JSON, - schema=sd.PostProcessJobsEndpointJSON(), + content_type=list(ContentType.ANY_XML), + schema=sd.PostProcessJobsEndpointXML(), accept=ContentType.APP_JSON, renderer=OutputFormat.JSON, response_schemas=sd.post_process_jobs_responses, @@ -476,7 +476,7 @@ def delete_local_process(request): tags=[sd.TAG_PROCESSES, sd.TAG_EXECUTE, sd.TAG_JOBS], content_type=ContentType.APP_JSON, schema=sd.PostProcessJobsEndpointJSON(), - accept=ContentType.APP_JSON, + # accept=ContentType.APP_JSON, # allow anything for single-output or multipart combinations, validated downstream renderer=OutputFormat.JSON, response_schemas=sd.post_process_jobs_responses, ) diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 29816937e..8aa202222 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -4130,7 +4130,7 @@ class ExecuteInputOutputs(ExtendedMappingSchema): # It is **VERY** important to use 'default={}' and not 'missing=drop' contrary to other optional fields. # Using 'drop' causes and invalid input definition to be ignored/removed and not be validated for expected schema. # We want to ensure format is validated if present to rapidly report the issue and not move on to full execution. - # If 'inputs' are indeed omitted, the default with match against and empty 'ExecuteInputMapValues' schema. + # If 'inputs' are indeed omitted, the default will match against an empty 'ExecuteInputMapValues' schema. # If 'inputs' are explicitly provided as '{}' or '[]', it will also behave the right way for no-inputs process. # # See tests validating both cases (incorrect schema vs optionals inputs): @@ -4146,7 +4146,10 @@ class ExecuteInputOutputs(ExtendedMappingSchema): "the 'response' type, and the execution 'mode' provided " f"(see for more details: {DOC_URL}/processes.html#execution-body)." ), - default={} + # NOTE: + # Explicitly submitted {} or [] means that *no outputs* are requested. + # This must be distinguished from 'all outputs' requested, which is done by omiting 'outputs' field entirely. + default=None, ) From 8624a9039a0c96688275cf966b9436760ef7d17d Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 20 Sep 2024 00:38:41 -0400 Subject: [PATCH 19/75] [wip] setup multipart function --- weaver/datatype.py | 6 +-- weaver/formats.py | 1 + weaver/processes/convert.py | 11 +++-- weaver/wps_restapi/jobs/utils.py | 70 +++++++++++++++++++++++++------- 4 files changed, 66 insertions(+), 22 deletions(-) diff --git a/weaver/datatype.py b/weaver/datatype.py index 2900c9944..68db96b67 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -898,7 +898,7 @@ def type(self): return "provider" def _get_inputs(self): - # type: () -> Optional[ExecutionInputs] + # type: () -> ExecutionInputs if self.get("inputs") is None: return {} return dict.__getitem__(self, "inputs") @@ -912,9 +912,7 @@ def _set_inputs(self, inputs): def _get_outputs(self): # type: () -> Optional[ExecutionOutputs] - if self.get("outputs") is None: - return {} - return dict.__getitem__(self, "outputs") + return self.get("outputs") def _set_outputs(self, outputs): # type: (Optional[ExecutionOutputs]) -> None diff --git a/weaver/formats.py b/weaver/formats.py index 84025e39c..f223b6000 100644 --- a/weaver/formats.py +++ b/weaver/formats.py @@ -137,6 +137,7 @@ class ContentType(Constants): } ANY_CWL = {APP_CWL, APP_CWL_JSON, APP_CWL_YAML, APP_CWL_X} ANY_XML = {APP_XML, TEXT_XML} + ANY_MULTIPART = {MULTIPART_ANY, MULTIPART_FORM, MULTIPART_MIXED, MULTIPART_RELATED} ANY = "*/*" diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index dc9c749de..00abd9ea5 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -150,6 +150,7 @@ ExecutionOutputsList, ExecutionOutputsMap, JobValueFile, + JobValueItem, JSON, OpenAPISchema, OpenAPISchemaArray, @@ -1865,7 +1866,7 @@ def convert_input_values_schema(inputs, schema): raise ValueError(f"Unknown conversion method to schema [{schema}] for inputs of type [{name}]: {inputs}") if schema == JobInputsOutputsSchema.OGC: input_dict = {} - for input_item in inputs: + for input_item in inputs: # type: JobValueItem input_id = get_any_id(input_item, pop=True) input_val = get_any_value(input_item) input_key = get_any_value(input_item, key=True, data=True, file=False) @@ -1909,18 +1910,18 @@ def convert_input_values_schema(inputs, schema): @overload def convert_output_params_schema(inputs, schema): - # type: (ExecutionOutputs, JobInputsOutputsSchema.OGC) -> ExecutionOutputsMap + # type: (Optional[ExecutionOutputs], JobInputsOutputsSchema.OGC) -> Optional[ExecutionOutputsMap] ... @overload def convert_output_params_schema(inputs, schema): - # type: (ExecutionOutputs, JobInputsOutputsSchema.OLD) -> ExecutionOutputsList + # type: (Optional[ExecutionOutputs], JobInputsOutputsSchema.OLD) -> Optional[ExecutionOutputsList] ... def convert_output_params_schema(outputs, schema): - # type: (ExecutionOutputs, JobInputsOutputsSchemaType) -> ExecutionOutputs + # type: (Optional[ExecutionOutputs], JobInputsOutputsSchemaType) -> Optional[ExecutionOutputs] """ Convert execution output parameters between equivalent formats. @@ -1936,6 +1937,8 @@ def convert_output_params_schema(outputs, schema): :param schema: Desired schema. :return: Converted outputs. """ + if outputs is None: + return outputs if isinstance(schema, str): schema = schema.lower().split("+")[0] if ( diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index e9a9f4f33..e43d5fde2 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -2,6 +2,8 @@ import os import shutil from copy import deepcopy +from email.message import MIMEPart +from email.mime.multipart import MIMEMultipart from typing import TYPE_CHECKING, cast import colander @@ -13,11 +15,11 @@ HTTPInternalServerError, HTTPNoContent, HTTPNotFound, - HTTPNotImplemented, HTTPOk ) from pyramid.response import FileResponse from pyramid_celery import celery_app +from requests.structures import CaseInsensitiveDict from weaver.database import get_db from weaver.datatype import Job, Process @@ -352,6 +354,15 @@ def get_results( # pylint: disable=R1260 out_ref = convert_output_params_schema(job.outputs, JobInputsOutputsSchema.OGC) if link_references else {} references = {} for result in job.results: + # Filter outputs not requested, unless 'all' requested by omitting + out_id = get_any_id(result) + if ( + (isinstance(job.outputs, dict) and out_id not in job.outputs) or + (isinstance(job.outputs, list) and not any(get_any_id(out) == out_id for out in job.outputs)) + ): + LOGGER.debug("Removing [%s] from %s results response because not requested.", out_id, job) + continue + # Complex result could contain both 'data' and a reference (eg: JSON file and its direct representation). # Literal result is only by itself. Therefore, find applicable field by non 'data' match. rtype = "href" if get_any_value(result, key=True, file=True, data=False) else "data" @@ -375,7 +386,6 @@ def get_results( # pylint: disable=R1260 rtype = "href" if get_any_value(val_item, key=True, file=True, data=False) else "data" val_data = get_any_value(val_item, file=True, data=False) out_key = rtype - out_id = get_any_id(result) out_mode = out_ref.get(out_id, {}).get("transmissionMode") as_ref = link_references and out_mode == ExecuteTransmissionMode.REFERENCE if rtype == "href" and isinstance(val_data, str): @@ -516,9 +526,11 @@ def get_job_results_response( results, refs = get_results(job, container, value_key="value", schema=JobInputsOutputsSchema.OGC, # not strict to provide more format details link_references=is_raw) - headers = headers or {} - if "location" not in headers: - headers["Location"] = job.status_url(container) + + headers = CaseInsensitiveDict(headers or {}) + if "Location" in headers: + headers.setdefault("Content-Location", headers.pop("Location")) + headers.setdefault("Content-Location", job.status_url(container)) if not is_raw: try: @@ -561,15 +573,12 @@ def get_job_results_response( out_type = get_any_value(out_info, key=True) out_data = get_any_value(out_info) - # FIXME: https://github.com/crim-ca/weaver/issues/376 - # implement multipart, both for multi-output IDs and array-output under same ID - if len(results) > 1 or (isinstance(out_data, list) and len(out_data) > 1): - # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-multi - raise HTTPNotImplemented(json={ - "code": "NotImplemented", - "type": "NotImplemented", - "detail": "Multipart results with 'transmissionMode=value' and 'response=raw' not implemented.", - }) + if ( + len(results) > 1 or + (isinstance(out_data, list) and len(out_data) > 1) or + (isinstance(job.accept_type, str) and any(ctype in job.accept_type for ctype in ContentType.ANY_MULTIPART)) + ): + return get_job_results_multipart(job, results) # single value only out_data = out_data[0] if isinstance(out_data, list) else out_data @@ -591,6 +600,39 @@ def get_job_results_response( return resp +def get_job_results_multipart(job, results): + # type: (Job, ExecutionResults) -> HTTPOk + """ + Generates the :term:`Job` results multipart response from available or requested outputs. + + .. seealso:: + Function :func:`get_results` should be used to avoid re-processing all output format combinations. + + :param job: + :param results: Pre-filtered and pre-processed results in a normalized format structure. + """ + # FIXME: https://github.com/crim-ca/weaver/issues/376 + # implement multipart, both for multi-output IDs and array-output under same ID + multi = MIMEMultipart() + for res_id, result in results.items(): + part = MIMEPart() + part.add_header() # other ? content-disposition filename from output ID? + # ctype header + part.set_type() + part.set_charset() + part.set_param() # in ctype + # data + part.set_payload() + multi.attach(part) + + resp = HTTPOk( + detail=f"Multipart Response for {job}", + headers={"Content-Type": multi.get_content_type()}, + ) + resp.body = multi.as_bytes() + return resp + + def get_job_submission_response(body, headers, error=False): # type: (JSON, AnyHeadersContainer, bool) -> Union[HTTPOk, HTTPCreated, HTTPBadRequest] """ From 884abf18373890e1c9cc9c2e8d44dac176ced40a Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 20 Sep 2024 15:57:29 -0400 Subject: [PATCH 20/75] [wip] multipart result + typing fixes --- tests/test_formats.py | 24 ++++++- weaver/datatype.py | 53 ++++++++++----- weaver/execute.py | 111 ++++++++++++++++++------------- weaver/formats.py | 2 +- weaver/utils.py | 27 +++++--- weaver/wps_restapi/jobs/utils.py | 18 +++-- 6 files changed, 155 insertions(+), 80 deletions(-) diff --git a/tests/test_formats.py b/tests/test_formats.py index d381f61a7..874716f35 100644 --- a/tests/test_formats.py +++ b/tests/test_formats.py @@ -4,6 +4,7 @@ import itertools import os import uuid +from typing import TYPE_CHECKING from urllib.error import URLError from urllib.request import urlopen @@ -18,6 +19,9 @@ from weaver import formats as f from weaver.utils import null, request_extra +if TYPE_CHECKING: + from weaver.formats import AnyContentEncoding, FileModeSteamType + _ALLOWED_MEDIA_TYPE_CATEGORIES = [ "application", "archives", @@ -204,6 +208,7 @@ def test_content_encoding_is_binary(encoding, expect): ) ) def test_content_encoding_open_parameters(encoding, mode): + # type: (AnyContentEncoding, FileModeSteamType) -> None result = f.ContentEncoding.open_parameters(encoding, mode) if encoding == f.ContentEncoding.UTF_8: assert result[0] == mode @@ -230,7 +235,6 @@ def test_get_format(test_content_type, expected_content_type, expected_content_e @pytest.mark.parametrize( "test_extension", [ - f.ContentType.APP_OCTET_STREAM, f.ContentType.APP_FORM, f.ContentType.MULTIPART_FORM, ] @@ -287,7 +291,6 @@ def test_get_format_media_type_from_schema(test_format, expect_media_type): itertools.product( ["", None], [ - f.ContentType.APP_OCTET_STREAM, f.ContentType.APP_FORM, f.ContentType.MULTIPART_FORM, ] @@ -299,6 +302,23 @@ def test_get_format_default_no_extension(test_extension, default_content_type): assert fmt.extension == "" +@pytest.mark.parametrize( + ["test_extension", "default_extension"], + [ + ("", f.ContentType.APP_OCTET_STREAM), + (None, f.ContentType.APP_OCTET_STREAM), + (f.ContentType.APP_OCTET_STREAM, None), + ] +) +def test_get_format_binary_extension(test_extension, default_extension): + """ + .. versionchanged:: 5.10.0 + """ + fmt = f.get_format(test_extension, default=default_extension) + assert fmt == Format(f.ContentType.APP_OCTET_STREAM, extension=".bin") + assert fmt.extension == ".bin" + + @pytest.mark.parametrize( ["cwl_format", "expect_media_type"], [ diff --git a/weaver/datatype.py b/weaver/datatype.py index 68db96b67..eba5acd96 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -19,7 +19,7 @@ from io import BytesIO from logging import ERROR, INFO, getLevelName, getLogger from secrets import compare_digest, token_hex -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, cast from urllib.parse import urljoin, urlparse import colander @@ -210,11 +210,14 @@ class Data(AutoBase): """ def __new__(cls, *args, **kwargs): extra_props = set(dir(cls)) - set(dir(DictBase)) - auto_cls = DictBase.__new__(cls, *args, **kwargs) + auto_cls = cast( + "AutoBase", + DictBase.__new__(cls, *args, **kwargs) + ) for prop in extra_props: prop_func = property( - lambda self, key: dict.__getitem__(self, key), - lambda self, key, value: dict.__setattr__(self, key, value) + lambda self, key: dict.__getitem__(self, key), # type: ignore + lambda self, key, value: dict.__setattr__(self, key, value) # type: ignore ) default = getattr(auto_cls, prop, None) setattr(auto_cls, prop, prop_func) @@ -908,7 +911,11 @@ def _set_inputs(self, inputs): self["inputs"] = inputs # allows to correctly update list by ref using 'job.inputs.extend()' - inputs = property(_get_inputs, _set_inputs, doc="Input values and reference submitted for execution.") + inputs = property( + _get_inputs, # type: ignore + _set_inputs, # type: ignore + doc="Input values and reference submitted for execution.", + ) def _get_outputs(self): # type: () -> Optional[ExecutionOutputs] @@ -918,7 +925,11 @@ def _set_outputs(self, outputs): # type: (Optional[ExecutionOutputs]) -> None self["outputs"] = outputs - outputs = property(_get_outputs, _set_outputs, doc="Output transmission modes submitted for execution.") + outputs = property( + _get_outputs, # type: ignore + _set_outputs, # type: ignore + doc="Output transmission modes submitted for execution.", + ) @property def user_id(self): @@ -1119,7 +1130,7 @@ def _get_updated(self): created = LocalizedDateTimeProperty(default_now=True) started = LocalizedDateTimeProperty() finished = LocalizedDateTimeProperty() - updated = LocalizedDateTimeProperty(fget=_get_updated) + updated = LocalizedDateTimeProperty(fget=_get_updated) # type: ignore @property def duration(self): @@ -1179,7 +1190,11 @@ def _set_results(self, results): self["results"] = results # allows to correctly update list by ref using 'job.results.extend()' - results = property(_get_results, _set_results, doc="Output values and references that resulted from execution.") + results = property( + _get_results, # type: ignore + _set_results, # type: ignore + doc="Output values and references that resulted from execution.", + ) def _get_exceptions(self): # type: () -> List[Union[str, Dict[str, str]]] @@ -1194,7 +1209,7 @@ def _set_exceptions(self, exceptions): self["exceptions"] = exceptions # allows to correctly update list by ref using 'job.exceptions.extend()' - exceptions = property(_get_exceptions, _set_exceptions) + exceptions = property(_get_exceptions, _set_exceptions) # type: ignore def _get_logs(self): # type: () -> List[str] @@ -1209,7 +1224,7 @@ def _set_logs(self, logs): self["logs"] = logs # allows to correctly update list by ref using 'job.logs.extend()' - logs = property(_get_logs, _set_logs) + logs = property(_get_logs, _set_logs) # type: ignore def _get_tags(self): # type: () -> List[Optional[str]] @@ -1224,7 +1239,7 @@ def _set_tags(self, tags): self["tags"] = tags # allows to correctly update list by ref using 'job.tags.extend()' - tags = property(_get_tags, _set_tags) + tags = property(_get_tags, _set_tags) # type: ignore @property def access(self): @@ -1431,7 +1446,7 @@ def json(self, container=None): # pylint: disable=W0221,arguments-differ "estimatedCompletion": None, "percentCompleted": self.progress, # new name as per OGC-API, enforced integer - # https://github.com/opengeospatial/ogcapi-processes/blob/master/openapi/schemas/processes-core/statusInfo.yaml + # https://schemas.opengis.net/ogcapi/processes/part1/1.0/openapi/schemas/statusInfo.yaml "progress": int(self.progress), "links": self.links(settings, self_link="status") } @@ -1956,9 +1971,11 @@ def _set_id(self, _id): # type: (str) -> None self["id"] = _id - id = identifier = property(fget=_get_id, fset=_set_id, doc=( - "Unique process identifier with optional version number if it corresponds to an older revision." - )) + id = identifier = property( + _get_id, # type: ignore + _set_id, # type: ignore + doc="Unique process identifier with optional version number if it corresponds to an older revision.", + ) @classmethod def split_version(cls, process_id): @@ -2031,7 +2048,11 @@ def _set_desc(self, description): # type: (str) -> None self["abstract"] = description - description = abstract = property(fget=_get_desc, fset=_set_desc, doc="Process description.") + description = abstract = property( + fget=_get_desc, # type: ignore + fset=_set_desc, # type: ignore + doc="Process description.", + ) @property def keywords(self): diff --git a/weaver/execute.py b/weaver/execute.py index 4a1e427be..a3649ca63 100644 --- a/weaver/execute.py +++ b/weaver/execute.py @@ -7,22 +7,67 @@ from weaver.utils import get_header, parse_kvp if TYPE_CHECKING: - from typing import List, Optional, Tuple + from typing import List, Optional, Union, Tuple - from weaver.typedefs import AnyHeadersContainer, HeadersType + from weaver.typedefs import AnyHeadersContainer, HeadersType, Literal + + ExecutionModeAutoType = Literal["auto"] + ExecutionModeAsyncType = Literal["async"] + ExecutionModeSyncType = Literal["sync"] + AnyExecuteMode = Union[ + ExecutionModeAutoType, + ExecutionModeAsyncType, + ExecutionModeSyncType, + ] + ExecuteControlOptionAsyncType = Literal["async-execute"] + ExecuteControlOptionSyncType = Literal["sync-execute"] + AnyExecuteControlOption = Union[ + ExecuteControlOptionAsyncType, + ExecuteControlOptionSyncType, + ] + ExecuteReturnPreferenceMinimalType = Literal["minimal"] + ExecuteReturnPreferenceRepresentationType = Literal["representation"] + AnyExecuteReturnPreference = Union[ + ExecuteReturnPreferenceMinimalType, + ExecuteReturnPreferenceRepresentationType, + ] + ExecuteResponseDocumentType = Literal["document"] + ExecuteResponseRawType = Literal["raw"] + AnyExecuteResponse = Union[ + ExecuteResponseDocumentType, + ExecuteResponseRawType, + ] + ExecuteTransmissionModeReferenceType = Literal["reference"] + ExecuteTransmissionModeValueType = Literal["value"] + AnyExecuteTransmissionMode = Union[ + ExecuteTransmissionModeReferenceType, + ExecuteTransmissionModeValueType, + ] + ExecuteCollectionFormatType_STAC = Literal["stac-collection"] + ExecuteCollectionFormatType_OGC_COVERAGE = Literal["ogc-coverage-collection"] + ExecuteCollectionFormatType_OGC_FEATURES = Literal["ogc-features-collection"] + ExecuteCollectionFormatType_OGC_MAP = Literal["ogc-map-collection"] + ExecuteCollectionFormatType_GEOJSON = Literal["geojson-feature-collection"] + AnyExecuteCollectionFormat = Union[ + ExecuteCollectionFormatType_STAC, + ExecuteCollectionFormatType_OGC_COVERAGE, + ExecuteCollectionFormatType_OGC_FEATURES, + ExecuteCollectionFormatType_OGC_MAP, + ExecuteCollectionFormatType_GEOJSON, + ] LOGGER = logging.getLogger(__name__) class ExecuteMode(Constants): - AUTO = "auto" - ASYNC = "async" - SYNC = "sync" + AUTO = "auto" # type: ExecutionModeAutoType + ASYNC = "async" # type: ExecutionModeAsyncType + SYNC = "sync" # type: ExecutionModeSyncType class ExecuteControlOption(Constants): - ASYNC = "async-execute" - SYNC = "sync-execute" + ASYNC = "async-execute" # type: ExecuteControlOptionAsyncType + SYNC = "sync-execute" # type: ExecuteControlOptionSyncType @classmethod def values(cls): @@ -34,58 +79,30 @@ def values(cls): class ExecuteReturnPreference(Constants): - MINIMAL = "minimal" - REPRESENTATION = "representation" + MINIMAL = "minimal" # type: ExecuteReturnPreferenceMinimalType + REPRESENTATION = "representation" # type: ExecuteReturnPreferenceRepresentationType class ExecuteResponse(Constants): - RAW = "raw" - DOCUMENT = "document" + RAW = "raw" # type: ExecuteResponseRawType + DOCUMENT = "document" # type: ExecuteResponseDocumentType class ExecuteTransmissionMode(Constants): - VALUE = "value" - REFERENCE = "reference" + VALUE = "value" # type: ExecuteTransmissionModeValueType + REFERENCE = "reference" # type: ExecuteTransmissionModeReferenceType class ExecuteCollectionFormat(Constants): - STAC = "stac-collection" - OGC_COVERAGE = "ogc-coverage-collection" - OGC_FEATURES = "ogc-features-collection" - OGC_MAP = "ogc-map-collection" - GEOJSON = "geojson-feature-collection" - - -if TYPE_CHECKING: - from weaver.typedefs import Literal - - AnyExecuteMode = Literal[ - ExecuteMode.ASYNC, - ExecuteMode.SYNC, - ] - AnyExecuteControlOption = Literal[ - ExecuteControlOption.ASYNC, - ExecuteControlOption.SYNC, - ] - AnyExecuteResponse = Literal[ - ExecuteResponse.DOCUMENT, - ExecuteResponse.RAW, - ] - AnyExecuteTransmissionMode = Literal[ - ExecuteTransmissionMode.REFERENCE, - ExecuteTransmissionMode.VALUE, - ] - AnyExecuteCollectionFormat = Literal[ - ExecuteCollectionFormat.STAC, - ExecuteCollectionFormat.OGC_COVERAGE, - ExecuteCollectionFormat.OGC_FEATURES, - ExecuteCollectionFormat.OGC_MAP, - ExecuteCollectionFormat.GEOJSON, - ] + STAC = "stac-collection" # type: ExecuteCollectionFormatType_STAC + OGC_COVERAGE = "ogc-coverage-collection" # type: ExecuteCollectionFormatType_OGC_COVERAGE + OGC_FEATURES = "ogc-features-collection" # type: ExecuteCollectionFormatType_OGC_FEATURES + OGC_MAP = "ogc-map-collection" # type: ExecuteCollectionFormatType_OGC_MAP + GEOJSON = "geojson-feature-collection" # type: ExecuteCollectionFormatType_GEOJSON def parse_prefer_header_return(headers): - # type: (AnyHeadersContainer) -> Optional[ExecuteReturnPreference] + # type: (AnyHeadersContainer) -> Optional[AnyExecuteReturnPreference] """ Get the return preference if specified. """ diff --git a/weaver/formats.py b/weaver/formats.py index f223b6000..aae073b73 100644 --- a/weaver/formats.py +++ b/weaver/formats.py @@ -449,7 +449,7 @@ class SchemaRole(Constants): ContentType.IMAGE_TIFF: ".tif", # common alternate to .tiff ContentType.ANY: ".*", # any for glob ContentType.APP_DIR: "/", # force href to finish with explicit '/' to mark directory - ContentType.APP_OCTET_STREAM: "", + ContentType.APP_OCTET_STREAM: ".bin", ContentType.APP_FORM: "", ContentType.MULTIPART_FORM: "", } diff --git a/weaver/utils.py b/weaver/utils.py index d6d39086d..f3bdba199 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -1184,21 +1184,23 @@ def get_file_header_datetime(dt): return dt_str -def get_href_headers(path, # type: str - download_headers=False, # type: bool - location_headers=True, # type: bool - content_headers=False, # type: bool - content_type=None, # type: Optional[str] - settings=None, # type: Optional[SettingsType] - **option_kwargs, # type: Unpack[Union[SchemeOptions, RequestOptions]] - ): # type: (...) -> MetadataResult +def get_href_headers( + path, # type: str + download_headers=False, # type: bool + location_headers=True, # type: bool + content_headers=False, # type: bool + content_type=None, # type: Optional[str] + content_disposition_type="attachment", # type: Literal["attachment", "inline"] + settings=None, # type: Optional[SettingsType] + **option_kwargs, # type: Unpack[Union[SchemeOptions, RequestOptions]] + ): # type: (...) -> MetadataResult """ Obtain headers applicable for the provided file or directory reference. :rtype: object :param path: File to describe. Either a local path or remote URL. :param download_headers: - If enabled, add the ``Content-Disposition`` header with attachment filename for downloading the file. + If enabled, add the ``Content-Disposition`` header with attachment/inline filename for downloading the file. If the reference is a directory, this parameter is ignored, since files must be retrieved individually. :param location_headers: If enabled, add the ``Content-Location`` header referring to the input location. @@ -1207,6 +1209,8 @@ def get_href_headers(path, # type: str Explicit ``Content-Type`` to provide. Otherwise, use default guessed by file system (often ``application/octet-stream``). If the reference is a directory, this parameter is ignored and ``application/directory`` will be enforced. + :param content_disposition_type: + Whether ``inline`` or ``attachment`` should be used, when enabled by :paramref:`download_headers`. :param settings: Application settings to pass down to relevant utility functions. :return: Headers for the reference. """ @@ -1272,8 +1276,11 @@ def get_href_headers(path, # type: str "Content-Length": str(f_size), }) if download_headers: + if os.path.splitext(path)[-1] in ["", "."]: + f_ext = get_extension(f_type, dot=True) + path = f"{path}{f_ext}" headers.update({ - "Content-Disposition": f"attachment; filename=\"{os.path.basename(path)}\"", + "Content-Disposition": f"{content_disposition_type}; filename=\"{os.path.basename(path)}\"", }) f_current = get_file_header_datetime(now()) f_modified = get_file_header_datetime(f_modified) diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index e43d5fde2..631e1a2e6 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -61,6 +61,7 @@ if TYPE_CHECKING: from typing import Any, Dict, List, Optional, Tuple, Union + from weaver.execute import AnyExecuteResponse from weaver.processes.constants import JobInputsOutputsSchemaType from weaver.typedefs import ( AnyHeadersContainer, @@ -452,7 +453,7 @@ def get_results( # pylint: disable=R1260 def get_job_return(job, body=None, headers=None): - # type: (Job, Optional[JSON], Optional[AnyHeadersContainer]) -> ExecuteResponse + # type: (Job, Optional[JSON], Optional[AnyHeadersContainer]) -> AnyExecuteResponse """ Obtain the :term:`Job` result representation based on the resolution order of preferences and request parameters. """ @@ -615,10 +616,19 @@ def get_job_results_multipart(job, results): # implement multipart, both for multi-output IDs and array-output under same ID multi = MIMEMultipart() for res_id, result in results.items(): + key = get_any_value(result, key=True) + val = get_any_value(result) + if key == "href": + typ = result.get("type") or ContentType.APP_OCTET_STREAM + res_headers = get_href_headers(val, download_headers=True, content_headers=True, content_type=typ) + else: + typ = ContentType.TEXT_PLAIN + + get_href_headers() + fmt = get_format() part = MIMEPart() - part.add_header() # other ? content-disposition filename from output ID? - # ctype header - part.set_type() + part.add_header("Content-Disposition", "inline", filename=f"{res_id}{ext}") + part.set_type(typ) part.set_charset() part.set_param() # in ctype # data From fddb3b3815837dddbebd895f0bd0cae76a3ff9c8 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Sat, 21 Sep 2024 03:42:15 -0400 Subject: [PATCH 21/75] [wip] more setup of job results with multipart --- tests/functional/test_wps_package.py | 67 +++++++------- weaver/datatype.py | 25 ++++- weaver/processes/execution.py | 5 +- weaver/utils.py | 40 +++++++- weaver/wps_restapi/jobs/utils.py | 134 +++++++++++++++++++-------- 5 files changed, 192 insertions(+), 79 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 8c82464f9..6c8d43ebb 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -3486,7 +3486,7 @@ def test_execute_cwl_enum_schema_combined_type_single_array_from_wps(self, mock_ } status_path = os.path.join(resources.FUNCTIONAL_APP_PKG, "Finch_EnsembleGridPointWetdays/status.xml") status_url = f"{resources.TEST_REMOTE_SERVER_URL}/status.xml" - output_log_url = f"{resources.TEST_REMOTE_SERVER_URL}/output.txt" + output_log_url = f"{resources.TEST_REMOTE_SERVER_URL}/result.txt" output_zip_url = f"{resources.TEST_REMOTE_SERVER_URL}/output.zip" with open(status_path, mode="r", encoding="utf-8") as status_file: status_body = status_file.read().format( @@ -3640,7 +3640,7 @@ def test_execute_single_output_prefer_header_return_representation_complex(self) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json == { "output_json": { - "href": f"{out_url}/{job_id}/output_json/output.json", + "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, }, } @@ -3723,12 +3723,12 @@ def test_execute_single_output_prefer_header_return_minimal_complex(self): assert results.status_code == 204, "No contents expected for minimal reference result." assert results.body == b"" assert results.content_type.startswith(ContentType.APP_JSON) - assert results.headers["Content-Location"] == f"{out_url}/{job_id}/output_json/output.json" + assert results.headers["Content-Location"] == f"{out_url}/{job_id}/output_json/result.json" outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json == { "output_json": { - "href": f"{out_url}/{job_id}/output_json/output.json", + "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, }, } @@ -3859,7 +3859,7 @@ def test_execute_single_output_response_raw_reference_literal(self): assert results.status_code == 204, "No contents expected for single reference result." assert results.body == b"" assert results.content_type.startswith(ContentType.TEXT_PLAIN) - assert results.headers["Content-Location"] == f"{out_url}/{job_id}/output_data/output.txt" + assert results.headers["Content-Location"] == f"{out_url}/{job_id}/output_data/result.txt" outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { @@ -3904,12 +3904,12 @@ def test_execute_single_output_response_raw_reference_complex(self): assert results.status_code == 204, "No contents expected for single reference result." assert results.body == b"" assert results.content_type.startswith(ContentType.APP_JSON) - assert results.headers["Content-Location"] == f"{out_url}/{job_id}/output_json/output.json" + assert results.headers["Content-Location"] == f"{out_url}/{job_id}/output_json/result.json" outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { "output_json": { - "href": f"{out_url}/{job_id}/output_json/output.json", + "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, }, } @@ -3963,6 +3963,7 @@ def test_execute_single_output_multipart_accept_data(self): ) job_id = results_url.rsplit("/results")[0].rsplit("/jobs/")[-1] assert is_uuid(job_id), f"Failed to retrieve the job ID: [{job_id}] is not a UUID" + out_url = get_wps_output_url(self.settings) # validate the results based on original execution request results = resp @@ -3982,7 +3983,7 @@ def test_execute_single_output_multipart_accept_data(self): assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { "output_json": { - "href": f"{out_url}/{job_id}/output_json/output.json", + "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, }, } @@ -4044,7 +4045,7 @@ def test_execute_single_output_multipart_accept_link(self): --{boundary} Content-Type: {ContentType.APP_JSON} Content-ID: - Content-Location: {out_url}/{job_id}/output_json/output.json + Content-Location: {out_url}/{job_id}/output_json/result.json --{boundary}-- """) assert results.text == results_body @@ -4052,7 +4053,7 @@ def test_execute_single_output_multipart_accept_link(self): assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { "output_json": { - "href": f"{out_url}/{job_id}/output_json/output.json", + "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, }, } @@ -4181,7 +4182,7 @@ def test_execute_multi_output_multipart_accept(self, multipart_header): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" # request status instead of results since not expecting 'document' JSON in this case status_url = resp.json["location"] @@ -4201,7 +4202,7 @@ def test_execute_multi_output_multipart_accept(self, multipart_header): --{boundary} Content-Type: {ContentType.APP_JSON} Content-ID: - Content-Location: {out_url}/{job_id}/output_json/output.json + Content-Location: {out_url}/{job_id}/output_json/result.json --{boundary}-- """) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) @@ -4211,7 +4212,7 @@ def test_execute_multi_output_multipart_accept(self, multipart_header): assert outputs.json["outputs"] == { "output_data": "test", "output_json": { - "href": f"{out_url}/{job_id}/output_json/output.json", + "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, }, } @@ -4355,7 +4356,7 @@ def test_execute_multi_output_prefer_header_return_representation(self): assert outputs.json["outputs"] == { "output_data": "test", "output_json": { - "href": f"{out_url}/{job_id}/output_json/output.json", + "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, }, } @@ -4418,7 +4419,7 @@ def test_execute_multi_output_response_raw_value(self): assert outputs.json["outputs"] == { "output_data": "test", "output_json": { - "href": f"{out_url}/{job_id}/output_json/output.json", + "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, }, } @@ -4464,11 +4465,11 @@ def test_execute_multi_output_response_raw_reference(self): --{boundary} Content-Type: {ContentType.TEXT_PLAIN} Content-ID: - Content-Location: {out_url}/{job_id}/output_data/output.txt + Content-Location: {out_url}/{job_id}/output_data/result.txt --{boundary} Content-Type: {ContentType.APP_JSON} Content-ID: - Content-Location: {out_url}/{job_id}/output_json/output.json + Content-Location: {out_url}/{job_id}/output_json/result.json --{boundary}-- """) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) @@ -4478,7 +4479,7 @@ def test_execute_multi_output_response_raw_reference(self): assert outputs.json["outputs"] == { "output_data": "test", "output_json": { - "href": f"{out_url}/{job_id}/output_json/output.json", + "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, }, } @@ -4531,7 +4532,7 @@ def test_execute_multi_output_response_raw_mixed(self): --{boundary} Content-Type: {ContentType.TEXT_PLAIN} Content-ID: - Content-Location: {out_url}/{job_id}/output_text/output.txt + Content-Location: {out_url}/{job_id}/output_text/result.txt --{boundary} Content-Type: {ContentType.APP_JSON} Content-ID: @@ -4546,11 +4547,11 @@ def test_execute_multi_output_response_raw_mixed(self): assert outputs.json["outputs"] == { "output_data": "test", "output_text": { - "href": f"{out_url}/{job_id}/output_text/output.txt", + "href": f"{out_url}/{job_id}/output_text/result.txt", "type": ContentType.TEXT_PLAIN, }, "output_json": { - "href": f"{out_url}/{job_id}/output_json/output.json", + "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, }, } @@ -4600,7 +4601,7 @@ def test_execute_multi_output_prefer_header_return_minimal_defaults(self): assert results_json == { "output_data": "test", "output_json": { - "href": f"{out_url}/{job_id}/output_json/output.json", + "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, }, } @@ -4609,7 +4610,7 @@ def test_execute_multi_output_prefer_header_return_minimal_defaults(self): assert outputs.json["outputs"] == { "output_data": "test", "output_json": { - "href": f"{out_url}/{job_id}/output_json/output.json", + "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, }, } @@ -4664,7 +4665,7 @@ def test_execute_multi_output_prefer_header_return_minimal_override_transmission assert results.content_type.startswith(ContentType.APP_JSON) assert results_json == { "output_data": { - "href": f"{out_url}/{job_id}/output_text/output.txt", + "href": f"{out_url}/{job_id}/output_text/result.txt", "type": ContentType.TEXT_PLAIN, }, "output_json": { @@ -4672,7 +4673,7 @@ def test_execute_multi_output_prefer_header_return_minimal_override_transmission "mediaType": ContentType.APP_JSON, }, "output_text": { - "href": f"{out_url}/{job_id}/output_text/output.txt", + "href": f"{out_url}/{job_id}/output_text/result.txt", "type": ContentType.TEXT_PLAIN, }, } @@ -4681,11 +4682,11 @@ def test_execute_multi_output_prefer_header_return_minimal_override_transmission assert outputs.json["outputs"] == { "output_data": "test", "output_json": { - "href": f"{out_url}/{job_id}/output_json/output.json", + "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, }, "output_text": { - "href": f"{out_url}/{job_id}/output_text/output.txt", + "href": f"{out_url}/{job_id}/output_text/result.txt", "type": ContentType.TEXT_PLAIN, }, } @@ -4735,7 +4736,7 @@ def test_execute_multi_output_response_document_defaults(self): assert results_json == { "output_data": "test", "output_json": { - "href": f"{out_url}/{job_id}/output_json/output.json", + "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, }, } @@ -4744,7 +4745,7 @@ def test_execute_multi_output_response_document_defaults(self): assert outputs.json["outputs"] == { "output_data": "test", "output_json": { - "href": f"{out_url}/{job_id}/output_json/output.json", + "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, }, } @@ -4796,7 +4797,7 @@ def test_execute_multi_output_response_document_mixed(self): assert results.content_type.startswith(ContentType.APP_JSON) assert results_json == { "output_data": { - "href": f"{out_url}/{job_id}/output_text/output.txt", + "href": f"{out_url}/{job_id}/output_text/result.txt", "type": ContentType.TEXT_PLAIN, }, "output_json": { @@ -4804,7 +4805,7 @@ def test_execute_multi_output_response_document_mixed(self): "mediaType": ContentType.APP_JSON, }, "output_text": { - "href": f"{out_url}/{job_id}/output_text/output.txt", + "href": f"{out_url}/{job_id}/output_text/result.txt", "type": ContentType.TEXT_PLAIN, }, } @@ -4813,11 +4814,11 @@ def test_execute_multi_output_response_document_mixed(self): assert outputs.json["outputs"] == { "output_data": "test", "output_json": { - "href": f"{out_url}/{job_id}/output_json/output.json", + "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, }, "output_text": { - "href": f"{out_url}/{job_id}/output_text/output.txt", + "href": f"{out_url}/{job_id}/output_text/result.txt", "type": ContentType.TEXT_PLAIN, }, } diff --git a/weaver/datatype.py b/weaver/datatype.py index eba5acd96..e2bc38493 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -38,7 +38,7 @@ from weaver import xml_util from weaver.exceptions import ProcessInstanceError, ServiceParsingError from weaver.execute import ExecuteControlOption, ExecuteMode, ExecuteResponse, ExecuteTransmissionMode -from weaver.formats import AcceptLanguage, ContentType, repr_json +from weaver.formats import AcceptLanguage, ContentType, OutputFormat, repr_json from weaver.processes.constants import ( CWL_NAMESPACE_WEAVER_ID, CWL_REQUIREMENT_APP_DOCKER, @@ -63,6 +63,7 @@ get_job_log_msg, get_log_date_fmt, get_log_fmt, + get_path_kvp, get_settings, now, request_extra @@ -1363,6 +1364,7 @@ def links(self, container=None, self_link=None): :param self_link: name of a section that represents the current link that will be returned. """ settings = get_settings(container) + html_on = settings.get("weaver.wps_restapi_html", True) base_url = get_wps_restapi_base_url(settings) job_url = self._job_url(base_url) # full URL job_path = base_url + sd.job_service.path.format(job_id=self.id) @@ -1371,8 +1373,8 @@ def links(self, container=None, self_link=None): job_links = [ {"href": job_url, "rel": "status", "title": "Job status."}, # OGC {"href": job_url, "rel": "monitor", "title": "Job monitoring location."}, # IANA - {"href": job_path, "rel": "alternate", "title": "Job status generic endpoint."}, # IANA - {"href": job_list, "rel": "collection", "title": "List of submitted jobs."}, # IANA + {"href": get_path_kvp(job_path, f=OutputFormat.JSON), "type": ContentType.APP_JSON, + "rel": "alternate", "title": "Job status generic endpoint."}, # IANA {"href": job_list, "rel": "http://www.opengis.net/def/rel/ogc/1.0/job-list", # OGC "title": "List of submitted jobs."}, {"href": job_exec, "rel": "http://www.opengis.net/def/rel/ogc/1.0/execute", @@ -1380,6 +1382,19 @@ def links(self, container=None, self_link=None): {"href": f"{job_url}/inputs", "rel": "inputs", # unofficial "title": "Submitted job inputs for process execution."} ] + if html_on: + job_links.append({ + "href": get_path_kvp(job_path, f=OutputFormat.HTML), + "rel": "alternate", + "title": "HTML Job Status", + "type": ContentType.TEXT_HTML, + }) + if self_link in ["status", None]: + job_links.extend([ + {"href": job_list, "rel": "collection", "title": "List of submitted jobs."}, # IANA + + ]) + if self.status in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]: job_status = map_status(self.status) if job_status == Status.SUCCEEDED: @@ -1413,7 +1428,8 @@ def links(self, container=None, self_link=None): job_links.extend([self_link_body, self_link_up]) link_meta = {"type": ContentType.APP_JSON, "hreflang": AcceptLanguage.EN_CA} for link in job_links: - link.update(link_meta) + for meta, parma in link_meta.items(): + link.setdefault(meta, parma) return job_links def json(self, container=None): # pylint: disable=W0221,arguments-differ @@ -1484,6 +1500,7 @@ def params(self): "request": self.request, "response": self.response, "subscribers": self.subscribers, + "accept_type": self.accept_type, "accept_language": self.accept_language, } diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index c1d8740dd..b0ae1465d 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -71,6 +71,7 @@ from pywps.inout.inputs import BoundingBoxInput, ComplexInput from weaver.datatype import Job + from weaver.execute import AnyExecuteMode from weaver.processes.convert import OWS_Input_Type, ProcessOWS from weaver.status import StatusType from weaver.typedefs import ( @@ -836,7 +837,7 @@ def submit_job_handler(payload, # type: ProcessExecution # sync not respected, therefore must drop it # since both could be provided as alternative preferences, drop only async with limited subset prefer = get_header("Preference-Applied", headers, pop=True) - _, _, async_applied = parse_prefer_header_execute_mode({"Prefer": prefer}, [ExecuteMode.ASYNC]) + _, _, async_applied = parse_prefer_header_execute_mode({"Prefer": prefer}, [ExecuteControlOption.ASYNC]) if async_applied: resp_headers.update(async_applied) @@ -853,7 +854,7 @@ def submit_job_handler(payload, # type: ProcessExecution def validate_job_accept_header(headers, execution_mode): - # type: (AnyHeadersContainer, ExecuteMode) -> Optional[str] + # type: (AnyHeadersContainer, AnyExecuteMode) -> Optional[str] """ Validate that the submitted ``Accept`` header is permitted. """ diff --git a/weaver/utils.py b/weaver/utils.py index f3bdba199..f76bd4946 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -97,7 +97,6 @@ from mypy_boto3_s3.client import S3Client - from weaver.execute import AnyExecuteControlOption, AnyExecuteMode from weaver.status import Status from weaver.typedefs import ( AnyCallable, @@ -119,6 +118,7 @@ JSON, KVP, KVP_Item, + Link, Literal, Number, OpenAPISchema, @@ -1291,6 +1291,44 @@ def get_href_headers( return headers +def make_link_header( + href, # type: Union[str, Link] + hreflang=None, # type: Optional[str] + rel=None, # type: Optional[str] + type=None, # type: Optional[str] # noqa + title=None, # type: Optional[str] + charset=None, # type: Optional[str] +): # type: (...) -> str + """ + Creates the HTTP Link (:rfc:`8288`) header value from input parameters or a dictionary representation. + + Parameter names are specifically selected to allow direct unpacking from the dictionary representation. + Otherwise, a dictionary can be passed as the first parameter, allowing other parameters to act as override values. + Alternatively, all parameters can be supplied individually. + + .. note:: + Parameter :paramref:`rel` is optional to allow unpacking with a single parameter, + but its value is required to form a valid ``Link`` header. + """ + if isinstance(href, dict): + rel = rel or href.get("rel") + type = type or href.get("type") # noqa + title = title or href.get("title") + charset = charset or href.get("charset") # noqa + hreflang = hreflang or href.get("hreflang") + href = href["href"] + link = f"<{href}>; rel=\"{rel}\"" + if type: + link += f"; type=\"{type}\"" + if charset: + link += f"; charset=\"{charset}\"" + if title: + link += f"; title=\"{title}\"" + if hreflang: + link += f"; hreflang={hreflang}" + return link + + def get_base_url(url): # type: (str) -> str """ diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 631e1a2e6..470c8477f 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -20,6 +20,7 @@ from pyramid.response import FileResponse from pyramid_celery import celery_app from requests.structures import CaseInsensitiveDict +from webob.headers import ResponseHeaders from weaver.database import get_db from weaver.datatype import Job, Process @@ -50,7 +51,8 @@ get_secure_path, get_settings, get_weaver_url, - is_uuid + is_uuid, + make_link_header, ) from weaver.visibility import Visibility from weaver.wps.utils import get_wps_output_dir, get_wps_output_url, map_wps_output_location @@ -61,7 +63,7 @@ if TYPE_CHECKING: from typing import Any, Dict, List, Optional, Tuple, Union - from weaver.execute import AnyExecuteResponse + from weaver.execute import AnyExecuteResponse, AnyExecuteTransmissionMode from weaver.processes.constants import JobInputsOutputsSchemaType from weaver.typedefs import ( AnyHeadersContainer, @@ -313,8 +315,8 @@ def make_result_link(result_id, result, job_id, settings): url = get_any_value(value, data=False, file=True) # should already include full path if fmt == ContentType.TEXT_PLAIN and not enc: # only if text, otherwise binary content could differ enc = "UTF-8" # default both omit/empty - encoding = f"; charset={enc}" if enc else "" - links.append(f"<{url}>; rel=\"{result_id}{suffix}\"; type={typ}{encoding}") + link_header = make_link_header(url, rel=f"{result_id}{suffix}", type=typ, charset=enc) + links.append(link_header) return links @@ -471,6 +473,17 @@ def get_job_return(job, body=None, headers=None): return job.execution_response +def get_job_output_transmission(job, output_id): + # type: (Job, str) -> Optional[AnyExecuteTransmissionMode] + """ + Obtain the requested :term:`Job` output ``transmissionMode``. + """ + outputs = job.outputs or {} + out = outputs.get(output_id) or {} + mode = out.get("transmissionMode") + return mode + + def get_job_results_response( job, # type: Job container, # type: AnySettingsContainer @@ -528,12 +541,18 @@ def get_job_results_response( schema=JobInputsOutputsSchema.OGC, # not strict to provide more format details link_references=is_raw) - headers = CaseInsensitiveDict(headers or {}) - if "Location" in headers: - headers.setdefault("Content-Location", headers.pop("Location")) - headers.setdefault("Content-Location", job.status_url(container)) + headers = ResponseHeaders(headers or {}) + headers.pop("Location", None) + headers.setdefault("Content-Location", job.results_url(container)) + for link in job.links(container, self_link="results"): + link_header = make_link_header(link) + headers.add("Link", link_header) - if not is_raw: + is_accept_multipart = ( + isinstance(job.accept_type, str) and + any(ctype in job.accept_type for ctype in ContentType.ANY_MULTIPART) + ) + if not is_raw and not is_accept_multipart: try: results_schema = sd.ResultsDocument() results_json = results_schema.deserialize(results) @@ -563,8 +582,8 @@ def get_job_results_response( # Status code 204 for empty body # see: # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-ref - refs.extend(headers.items()) - return HTTPNoContent(headers=refs) + headers.extend(refs) + return HTTPNoContent(headers=headers) # raw response can be data-only value, link-only or a mix of them if results: @@ -577,9 +596,9 @@ def get_job_results_response( if ( len(results) > 1 or (isinstance(out_data, list) and len(out_data) > 1) or - (isinstance(job.accept_type, str) and any(ctype in job.accept_type for ctype in ContentType.ANY_MULTIPART)) + is_accept_multipart ): - return get_job_results_multipart(job, results) + return get_job_results_multipart(job, results, container) # single value only out_data = out_data[0] if isinstance(out_data, list) else out_data @@ -601,8 +620,8 @@ def get_job_results_response( return resp -def get_job_results_multipart(job, results): - # type: (Job, ExecutionResults) -> HTTPOk +def get_job_results_multipart(job, results, container): + # type: (Job, ExecutionResults, AnySettingsContainer) -> HTTPOk """ Generates the :term:`Job` results multipart response from available or requested outputs. @@ -612,34 +631,71 @@ def get_job_results_multipart(job, results): :param job: :param results: Pre-filtered and pre-processed results in a normalized format structure. """ - # FIXME: https://github.com/crim-ca/weaver/issues/376 - # implement multipart, both for multi-output IDs and array-output under same ID - multi = MIMEMultipart() - for res_id, result in results.items(): - key = get_any_value(result, key=True) - val = get_any_value(result) - if key == "href": - typ = result.get("type") or ContentType.APP_OCTET_STREAM - res_headers = get_href_headers(val, download_headers=True, content_headers=True, content_type=typ) - else: - typ = ContentType.TEXT_PLAIN - - get_href_headers() - fmt = get_format() - part = MIMEPart() - part.add_header("Content-Disposition", "inline", filename=f"{res_id}{ext}") - part.set_type(typ) - part.set_charset() - part.set_param() # in ctype - # data - part.set_payload() - multi.attach(part) + settings = get_settings(container) + + def add_result_parts(result_parts): + multi = MIMEMultipart("mixed") + for res_id, result in result_parts.items(): + if isinstance(result, list): + sub_parts = {f"{res_id}.{i}": data for i, data in enumerate(result)} + part = add_result_parts(sub_parts) + multi.attach(part) + continue + + key = get_any_value(result, key=True) + val = get_any_value(result) + mode = get_job_output_transmission(job, res_id) + # FIXME: adjust output based on transmissionMode rather than href/value key + if key == "value": + url = None + if mode == ExecuteTransmissionMode.REFERENCE: + url = None # FIXME: write file + else: + mode = ExecuteTransmissionMode.VALUE # in case unspecified, default "auto" + if key == "href": + url = val + if mode == ExecuteTransmissionMode.VALUE: + val = None # FIXME: read file + else: + mode = ExecuteTransmissionMode.REFERENCE # in case unspecified, default "auto" + + # NOTE: work with local files (since we have them), to avoid unnecessary loopback request + # FIXME: Handle S3 output storage. Should multipart response even be allowed in this case? + if key == "href": + typ = result.get("type") or ContentType.APP_OCTET_STREAM + loc = map_wps_output_location(url, settings, exists=True, file_scheme=True, url=False) + res_headers = get_href_headers( + loc, + download_headers=True, + content_headers=True, + content_type=typ, + settings=settings, + ) + res_headers["Content-Location"] = url # rewrite back the original URL + else: + typ = ContentType.TEXT_PLAIN + name = f"{res_id}.txt" + res_headers = get_href_headers( + name, + download_headers=True, + content_headers=True, + content_type=typ, + settings=settings, + ) + + part = MIMEPart() + for hdr_key, hdr_val in res_headers.items(): + part.add_header(hdr_key, hdr_val) + part.set_payload(val) + multi.attach(part) + return multi + res_multi = add_result_parts(results) resp = HTTPOk( detail=f"Multipart Response for {job}", - headers={"Content-Type": multi.get_content_type()}, + headers={"Content-Type": res_multi.get_content_type()}, ) - resp.body = multi.as_bytes() + resp.body = res_multi.as_bytes() return resp From d5d3a21386ca80796d877d22daaaf6b31c7d5916 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 23 Sep 2024 19:54:25 -0400 Subject: [PATCH 22/75] =?UTF-8?q?[wip]=C2=A0generalize=20job=20result=20va?= =?UTF-8?q?lue/href=20resolution=20for=20single/multi/array=20results?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- weaver/datatype.py | 26 ++++++ weaver/processes/wps_package.py | 2 +- weaver/typedefs.py | 1 + weaver/utils.py | 20 ++++- weaver/wps_restapi/jobs/utils.py | 143 +++++++++++++++++++++---------- 5 files changed, 142 insertions(+), 50 deletions(-) diff --git a/weaver/datatype.py b/weaver/datatype.py index e2bc38493..4f9dc359d 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -1352,6 +1352,32 @@ def results_url(self, container=None): # type: (Optional[AnySettingsContainer]) -> str return self.job_url(container=container, extra_path="/results") + def result_path(self, job_id=None, output_id=None, file_name=None): + # type: (Optional[AnyUUID], Optional[str], Optional[str]) -> str + """ + Obtains a *relative* result path, according to requested parameters and the :term:`Job` definition. + + The generated path will automatically apply the relative job context if defined. + + :param job_id: Override ID to employ for the job path. Otherwise, uses the usually job UUID by default. + This should be used for cases where the ID is "not yet" established by the job, or that an alternate + location based on a UUID established by another source must be employed. + :param output_id: + Output ID to refer to in the path. If omitted, the path prefix will stop at the job ID fragment. + :param file_name: + Output file name and extension to apply to the path. If omitted, the path prefix will stop at the output ID. + :return: Resolved *relative* result path. + """ + result_job_id = str(job_id or self.id) + result_job_path = os.path.join(self.context, result_job_id) if self.context else result_job_id + if not output_id: + return result_job_path + result_job_path = os.path.join(result_job_path, output_id) + if not file_name: + return result_job_path + result_job_path = os.path.join(result_job_path, file_name) + return result_job_path + def links(self, container=None, self_link=None): # type: (Optional[AnySettingsContainer], Optional[str]) -> List[Link] """ diff --git a/weaver/processes/wps_package.py b/weaver/processes/wps_package.py index 4d52acc38..cb1361843 100644 --- a/weaver/processes/wps_package.py +++ b/weaver/processes/wps_package.py @@ -2727,7 +2727,7 @@ def make_location_storage(self, storage_type, location_type): ) output_job_id = str(self.response.uuid) - output_prefix = os.path.join(self.job.context, output_job_id) if self.job.context else output_job_id + output_prefix = self.job.result_path(job_id=output_job_id) # pylint: disable=attribute-defined-outside-init # references to nested storage dynamically created if storage_type == STORE_TYPE.S3: storage.prefix = output_prefix diff --git a/weaver/typedefs.py b/weaver/typedefs.py index 4ad1469b6..51d80fb03 100644 --- a/weaver/typedefs.py +++ b/weaver/typedefs.py @@ -359,6 +359,7 @@ class CWL_SchemaName(Protocol): AnyRegistryContainer = AnyContainer AnyDatabaseContainer = AnyContainer + AnyData = Union[str, bytes, bytearray] CookiesType = Dict[str, str] HeadersType = Dict[str, str] CookiesTupleType = List[Tuple[str, str]] diff --git a/weaver/utils.py b/weaver/utils.py index f76bd4946..89345babe 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -157,6 +157,7 @@ MetadataResult = TypedDict("MetadataResult", { "Date": str, "Last-Modified": str, + "Content-ID": NotRequired[str], "Content-Type": NotRequired[str], "Content-Length": NotRequired[str], "Content-Location": NotRequired[str], @@ -1191,6 +1192,8 @@ def get_href_headers( content_headers=False, # type: bool content_type=None, # type: Optional[str] content_disposition_type="attachment", # type: Literal["attachment", "inline"] + content_location=None, # type: Optional[str] + content_id=None, # type: Optional[str] settings=None, # type: Optional[SettingsType] **option_kwargs, # type: Unpack[Union[SchemeOptions, RequestOptions]] ): # type: (...) -> MetadataResult @@ -1209,8 +1212,16 @@ def get_href_headers( Explicit ``Content-Type`` to provide. Otherwise, use default guessed by file system (often ``application/octet-stream``). If the reference is a directory, this parameter is ignored and ``application/directory`` will be enforced. + Requires that :paramref:`content_headers` is enabled. :param content_disposition_type: - Whether ``inline`` or ``attachment`` should be used, when enabled by :paramref:`download_headers`. + Whether ``inline`` or ``attachment`` should be used. + Requires that :paramref:`content_headers` and :paramref:`download_headers` are enabled. + :param content_location: + Override ``Content-Location`` to include in headers. Otherwise, defaults to the :paramref:`path`. + Requires that :paramref:`location_headers` and :paramref:`content_headers` are enabled in each case. + :param content_id: + Optional ``Content-ID`` to include in the headers. + Requires that :paramref:`content_headers` is enabled. :param settings: Application settings to pass down to relevant utility functions. :return: Headers for the reference. """ @@ -1259,8 +1270,13 @@ def get_href_headers( f_size = stat.st_size f_modified = datetime.fromtimestamp(stat.st_mtime) - headers = {"Content-Location": href} if location_headers else {} + headers = {} if content_headers: + content_id = content_id.strip("<>") if isinstance(content_id, str) else "" + if content_id: + headers["Content-ID"] = f"<{content_id}>" + if location_headers: + headers["Content-Location"] = content_location or href c_type, c_enc = guess_file_contents(href) if not f_type: if c_type == ContentType.APP_OCTET_STREAM: # default diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 470c8477f..a4bc48af4 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -19,7 +19,6 @@ ) from pyramid.response import FileResponse from pyramid_celery import celery_app -from requests.structures import CaseInsensitiveDict from webob.headers import ResponseHeaders from weaver.database import get_db @@ -66,6 +65,7 @@ from weaver.execute import AnyExecuteResponse, AnyExecuteTransmissionMode from weaver.processes.constants import JobInputsOutputsSchemaType from weaver.typedefs import ( + AnyData, AnyHeadersContainer, AnyRequestType, AnyResponseType, @@ -77,7 +77,9 @@ ExecutionResults, ExecutionResultValue, HeadersTupleType, + HeadersType, JSON, + Path, PyramidRequest, SettingsType ) @@ -473,15 +475,20 @@ def get_job_return(job, body=None, headers=None): return job.execution_response -def get_job_output_transmission(job, output_id): - # type: (Job, str) -> Optional[AnyExecuteTransmissionMode] +def get_job_output_transmission(job, output_id, is_reference): + # type: (Job, str, bool) -> AnyExecuteTransmissionMode """ Obtain the requested :term:`Job` output ``transmissionMode``. """ outputs = job.outputs or {} out = outputs.get(output_id) or {} mode = out.get("transmissionMode") - return mode + # because mode can be omitted, resolve their default explicitly + if not mode and is_reference: + return ExecuteTransmissionMode.REFERENCE + if not mode and not is_reference: + return ExecuteTransmissionMode.VALUE + return cast("AnyExecuteTransmissionMode", mode) def get_job_results_response( @@ -620,6 +627,80 @@ def get_job_results_response( return resp +def generate_or_resolve_result( + job, # type: Job + result, # type: ExecutionResultObject + result_id, # type: str + output_id, # type: str + output_mode, # type: ExecuteTransmissionMode + settings, # type: SettingsType +): # type: (...) -> Tuple[HeadersType, AnyData] + """ + Obtains the local file path and the corresponding :term:`URL` reference for a given result, generating it as needed. + + :param job: Job with results details. + :param result: The specific output value or reference (could be an item index within an array of a given output). + :param result_id: Specific identifier of the result, including any array index as applicable. + :param output_id: Generic identifier of the output containing the result. + :param output_mode: Desired output transmission mode. + :param settings: Application settings to resolve locations. + :return: Resolved locations. + """ + key = get_any_value(result, key=True) + val = get_any_value(result) + cid = f"{result_id}@{job.id}" + url = None + loc = None + typ = None + res_data = None + + # NOTE: + # work with local files (since we have them), to avoid unnecessary loopback request + # then, rewrite the locations after generating their headers to obtain the final result URL + + # FIXME: Handle S3 output storage. Should multipart response even be allowed in this case? + + if key == "href": + url = val + typ = result.get("type") or ContentType.APP_OCTET_STREAM + loc = map_wps_output_location(val, settings, exists=True, file_scheme=True, url=False) + + if not url: + out_dir = get_wps_output_dir(settings) + out_name = f"{result_id}.txt" + job_path = job.result_path(output_id=output_id, file_name=out_name) + loc = os.path.join(out_dir, job_path) + url = map_wps_output_location(loc, settings, exists=True, url=True) + + if key == "value": + res_data = val + typ = ContentType.TEXT_PLAIN + + if key == "value" and output_mode == ExecuteTransmissionMode.REFERENCE: + if not os.path.isfile(loc): + os.makedirs(os.path.dirname(loc), exist_ok=True) + with open(loc, mode="w", encoding="utf-8") as out_file: + out_file.write(val) + + if key == "href" and output_mode == ExecuteTransmissionMode.VALUE: + with open(loc, mode="rb") as out_file: + res_data = out_file.read() + + if output_mode == ExecuteTransmissionMode.REFERENCE: + res_data = "" + + res_headers = get_href_headers( + loc, + download_headers=True, + content_headers=True, + content_type=typ, + content_id=cid, + content_location=url, # rewrite back the original URL + settings=settings, + ) + return res_headers, res_data + + def get_job_results_multipart(job, results, container): # type: (Job, ExecutionResults, AnySettingsContainer) -> HTTPOk """ @@ -630,67 +711,35 @@ def get_job_results_multipart(job, results, container): :param job: :param results: Pre-filtered and pre-processed results in a normalized format structure. + :param container: Application settings to resolve locations. """ settings = get_settings(container) def add_result_parts(result_parts): + # type: (List[Tuple[str, str, ExecutionResultObject]]) -> MIMEMultipart + multi = MIMEMultipart("mixed") - for res_id, result in result_parts.items(): + for res_id, out_id, result in result_parts: if isinstance(result, list): - sub_parts = {f"{res_id}.{i}": data for i, data in enumerate(result)} + sub_parts = [(f"{out_id}.{i}", out_id, data) for i, data in enumerate(result)] part = add_result_parts(sub_parts) multi.attach(part) continue key = get_any_value(result, key=True) - val = get_any_value(result) - mode = get_job_output_transmission(job, res_id) - # FIXME: adjust output based on transmissionMode rather than href/value key - if key == "value": - url = None - if mode == ExecuteTransmissionMode.REFERENCE: - url = None # FIXME: write file - else: - mode = ExecuteTransmissionMode.VALUE # in case unspecified, default "auto" - if key == "href": - url = val - if mode == ExecuteTransmissionMode.VALUE: - val = None # FIXME: read file - else: - mode = ExecuteTransmissionMode.REFERENCE # in case unspecified, default "auto" - - # NOTE: work with local files (since we have them), to avoid unnecessary loopback request - # FIXME: Handle S3 output storage. Should multipart response even be allowed in this case? - if key == "href": - typ = result.get("type") or ContentType.APP_OCTET_STREAM - loc = map_wps_output_location(url, settings, exists=True, file_scheme=True, url=False) - res_headers = get_href_headers( - loc, - download_headers=True, - content_headers=True, - content_type=typ, - settings=settings, - ) - res_headers["Content-Location"] = url # rewrite back the original URL - else: - typ = ContentType.TEXT_PLAIN - name = f"{res_id}.txt" - res_headers = get_href_headers( - name, - download_headers=True, - content_headers=True, - content_type=typ, - settings=settings, - ) + mode = get_job_output_transmission(job, out_id, is_reference=(key == "href")) + res_headers, res_data = generate_or_resolve_result(job, result, res_id, out_id, mode, settings) part = MIMEPart() for hdr_key, hdr_val in res_headers.items(): part.add_header(hdr_key, hdr_val) - part.set_payload(val) + if res_data: + part.set_payload(res_data) multi.attach(part) return multi - res_multi = add_result_parts(results) + results_parts = [(_res_id, _res_id, _res_val) for _res_id, _res_val in results.items()] + res_multi = add_result_parts(results_parts) resp = HTTPOk( detail=f"Multipart Response for {job}", headers={"Content-Type": res_multi.get_content_type()}, From bbafc3966d3d4085c92a72b7f4c5f1bb5bbee298 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 24 Sep 2024 03:25:29 -0400 Subject: [PATCH 23/75] [wip] fix invalid no-content multipart response content creation using forked https://github.com/crim-ca/requests-toolbelt/tree/patch-multipart --- requirements.txt | 1 + weaver/typedefs.py | 2 + weaver/wps_restapi/jobs/utils.py | 108 ++++++++++++++++++++++--------- 3 files changed, 80 insertions(+), 31 deletions(-) diff --git a/requirements.txt b/requirements.txt index ddf4db203..534843fa4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -107,6 +107,7 @@ pyyaml>=5.2 rdflib>=5 # pyup: ignore requests>=2.32 requests_file +requests_toolbelt @ git+https://github.com/crim-ca/requests-toolbelt.git@patch-multipart ruamel.yaml>=0.16 # force use of later mistune (https://github.com/common-workflow-language/schema_salad/pull/619#issuecomment-1346025607) # employed by cwltool -> schema-salad -> mistune diff --git a/weaver/typedefs.py b/weaver/typedefs.py index 51d80fb03..58dc1c66f 100644 --- a/weaver/typedefs.py +++ b/weaver/typedefs.py @@ -1,6 +1,7 @@ from typing import TYPE_CHECKING # pragma: no cover if TYPE_CHECKING: + import io import os import sys import uuid @@ -360,6 +361,7 @@ class CWL_SchemaName(Protocol): AnyDatabaseContainer = AnyContainer AnyData = Union[str, bytes, bytearray] + AnyDataStream = Union[AnyData, io.IOBase] CookiesType = Dict[str, str] HeadersType = Dict[str, str] CookiesTupleType = List[Tuple[str, str]] diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index a4bc48af4..012fb9e7f 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -1,9 +1,12 @@ +import io + import math import os import shutil from copy import deepcopy from email.message import MIMEPart from email.mime.multipart import MIMEMultipart +from email.policy import HTTP as PolicyHTTP from typing import TYPE_CHECKING, cast import colander @@ -19,6 +22,7 @@ ) from pyramid.response import FileResponse from pyramid_celery import celery_app +from requests_toolbelt.multipart.encoder import MultipartEncoder from webob.headers import ResponseHeaders from weaver.database import get_db @@ -60,12 +64,12 @@ from weaver.wps_restapi.providers.utils import forbid_local_only if TYPE_CHECKING: - from typing import Any, Dict, List, Optional, Tuple, Union + from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Union from weaver.execute import AnyExecuteResponse, AnyExecuteTransmissionMode from weaver.processes.constants import JobInputsOutputsSchemaType from weaver.typedefs import ( - AnyData, + AnyDataStream, AnyHeadersContainer, AnyRequestType, AnyResponseType, @@ -84,6 +88,17 @@ SettingsType ) + MultiPartFieldsParamsType = Union[ + AnyDataStream, + # filename, data/io + Tuple[Optional[str], AnyDataStream], + # filename, data/io, content-type + Tuple[Optional[str], AnyDataStream, Optional[str], HeadersType], + # filename, data/io, content-type, headers + Tuple[Optional[str], AnyDataStream, str, HeadersType], + ] + MultiPartFieldsType = Sequence[Tuple[str, MultiPartFieldsParamsType]] + LOGGER = get_task_logger(__name__) @@ -605,7 +620,7 @@ def get_job_results_response( (isinstance(out_data, list) and len(out_data) > 1) or is_accept_multipart ): - return get_job_results_multipart(job, results, container) + return get_job_results_multipart(job, results, headers=headers, container=container) # single value only out_data = out_data[0] if isinstance(out_data, list) else out_data @@ -632,9 +647,9 @@ def generate_or_resolve_result( result, # type: ExecutionResultObject result_id, # type: str output_id, # type: str - output_mode, # type: ExecuteTransmissionMode + output_mode, # type: AnyExecuteTransmissionMode settings, # type: SettingsType -): # type: (...) -> Tuple[HeadersType, AnyData] +): # type: (...) -> Tuple[HeadersType, Optional[AnyDataStream]] """ Obtains the local file path and the corresponding :term:`URL` reference for a given result, generating it as needed. @@ -673,7 +688,8 @@ def generate_or_resolve_result( url = map_wps_output_location(loc, settings, exists=True, url=True) if key == "value": - res_data = val + res_data = io.StringIO() + res_data.read(val) typ = ContentType.TEXT_PLAIN if key == "value" and output_mode == ExecuteTransmissionMode.REFERENCE: @@ -683,11 +699,9 @@ def generate_or_resolve_result( out_file.write(val) if key == "href" and output_mode == ExecuteTransmissionMode.VALUE: - with open(loc, mode="rb") as out_file: - res_data = out_file.read() - - if output_mode == ExecuteTransmissionMode.REFERENCE: - res_data = "" + res_data = io.FileIO(loc, mode="rb") + # with open(loc, mode="rb") as out_file: + # res_data = out_file.read() res_headers = get_href_headers( loc, @@ -698,11 +712,15 @@ def generate_or_resolve_result( content_location=url, # rewrite back the original URL settings=settings, ) + if output_mode == ExecuteTransmissionMode.REFERENCE: + res_data = None + res_headers["Content-Length"] = "0" + return res_headers, res_data -def get_job_results_multipart(job, results, container): - # type: (Job, ExecutionResults, AnySettingsContainer) -> HTTPOk +def get_job_results_multipart(job, results, headers, container): + # type: (Job, ExecutionResults, AnyHeadersContainer, AnySettingsContainer) -> HTTPOk """ Generates the :term:`Job` results multipart response from available or requested outputs. @@ -711,40 +729,68 @@ def get_job_results_multipart(job, results, container): :param job: :param results: Pre-filtered and pre-processed results in a normalized format structure. + :param headers: Additional headers to include in the response. :param container: Application settings to resolve locations. """ settings = get_settings(container) + # class AnyMultipartEncoder(MultipartEncoder): + # def __init__(self, fields, content_type=ContentType.MULTIPART_MIXED, **kwargs): + # # type: (MultiPartFieldsType, str, **str) -> None + # super().__init__(fields, **kwargs) + # self._content_type = content_type + # + # @property + # def content_type(self): + # return f"{self._content_type}; boundary=\"{self.boundary_value}\"" + def add_result_parts(result_parts): - # type: (List[Tuple[str, str, ExecutionResultObject]]) -> MIMEMultipart + # type: (List[Tuple[str, str, ExecutionResultObject]]) -> MultiPartFieldsType + #### type: (List[Tuple[str, str, ExecutionResultObject]]) -> MIMEMultipart - multi = MIMEMultipart("mixed") + ##multi = AnyMultipartEncoder("mixed", policy=PolicyHTTP) for res_id, out_id, result in result_parts: if isinstance(result, list): sub_parts = [(f"{out_id}.{i}", out_id, data) for i, data in enumerate(result)] - part = add_result_parts(sub_parts) - multi.attach(part) - continue + sub_parts = add_result_parts(sub_parts) + sub_multi = MultipartEncoder(sub_parts) + sub_out_url = job.result_path(output_id=out_id) + sub_headers = { + "Content-Type": sub_multi.content_type, + "Content-ID": f"<{out_id}@{job.id}>", + "Content-Location": sub_out_url + } + yield out_id, (None, sub_multi, None, sub_headers) + ###part = add_result_parts(sub_parts) + ###multi.attach(part) + ##continue key = get_any_value(result, key=True) mode = get_job_output_transmission(job, out_id, is_reference=(key == "href")) res_headers, res_data = generate_or_resolve_result(job, result, res_id, out_id, mode, settings) + yield out_id, (None, res_data, None, res_headers) - part = MIMEPart() - for hdr_key, hdr_val in res_headers.items(): - part.add_header(hdr_key, hdr_val) - if res_data: - part.set_payload(res_data) - multi.attach(part) - return multi + # part = MIMEPart(policy=PolicyHTTP) + # for hdr_key, hdr_val in res_headers.items(): + # if hdr_val: + # part.add_header(hdr_key, hdr_val) + # if res_data: + # part.set_payload(res_data) + ###multi.attach(part) + ##return multi results_parts = [(_res_id, _res_id, _res_val) for _res_id, _res_val in results.items()] - res_multi = add_result_parts(results_parts) - resp = HTTPOk( - detail=f"Multipart Response for {job}", - headers={"Content-Type": res_multi.get_content_type()}, - ) - resp.body = res_multi.as_bytes() + results_parts = list(add_result_parts(results_parts)) + #res_multi = AnyMultipartEncoder(results_parts) + res_multi = MultipartEncoder(results_parts) + ##resp_ctype = f"{res_multi.get_content_type()}; boundary=\"{res_multi.get_boundary()}\"" + resp_headers = headers or {} + ##resp_headers.update({"Content-Type": resp_ctype}) + resp_headers.update({"Content-Type": res_multi.content_type}) + resp = HTTPOk(detail=f"Multipart Response for {job}", headers=resp_headers) + # drop generator contents that includes its own headers in the body, only keep nested parts + ###resp.body = res_multi.as_bytes(policy=PolicyHTTP).split(res_multi.policy.linesep.encode(), 3)[-1] + resp.body = res_multi.read() return resp From 1d87f5832b7f3bdd42e43cb780b1a22905d7bc46 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 24 Sep 2024 18:56:30 -0400 Subject: [PATCH 24/75] use temmporary requests-toolbelt crim-ca/patch-multipart branch to fix no-content and nested multipart (relates to https://github.com/requests/toolbelt/pull/380) --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 534843fa4..f82af75c8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -107,6 +107,7 @@ pyyaml>=5.2 rdflib>=5 # pyup: ignore requests>=2.32 requests_file +# FIXME: https://github.com/requests/toolbelt/pull/380 requests_toolbelt @ git+https://github.com/crim-ca/requests-toolbelt.git@patch-multipart ruamel.yaml>=0.16 # force use of later mistune (https://github.com/common-workflow-language/schema_salad/pull/619#issuecomment-1346025607) From fc75d9305af76ecf63e304d9402b58d0085ecc3d Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 24 Sep 2024 23:45:51 -0400 Subject: [PATCH 25/75] fix multipart generation - WIP tests results checks --- tests/functional/test_wps_package.py | 113 +++++++++++++++------- weaver/processes/execution.py | 6 +- weaver/utils.py | 88 +++++++++++------ weaver/wps_restapi/jobs/utils.py | 110 +++++++++++---------- weaver/wps_restapi/swagger_definitions.py | 61 +++++++++--- 5 files changed, 248 insertions(+), 130 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 6c8d43ebb..d047cec16 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -7,16 +7,16 @@ .. seealso:: - :mod:`tests.processes.wps_package`. """ -import inspect import contextlib import copy +import inspect import json import logging import os +import re import shutil import tempfile -from inspect import cleandoc from typing import TYPE_CHECKING import boto3 @@ -1957,7 +1957,7 @@ def test_execute_job_with_array_input(self): "listing": [ { "entryname": "script.py", - "entry": cleandoc(""" + "entry": inspect.cleandoc(""" import json import os input = $(inputs) @@ -2129,7 +2129,7 @@ def test_execute_job_with_inline_input_values(self): "listing": [ { "entryname": "script.py", - "entry": cleandoc(""" + "entry": inspect.cleandoc(""" import json import os import ast @@ -3558,6 +3558,16 @@ def remove_result_format(results): result.pop("format", None) return results + @staticmethod + def remove_result_multipart_variable(results): + # type: (str) -> str + """ + Removes any variable headers from the multipart contents to simplify test comparison. + """ + results = re.sub(r"Date: .*\r\n", "", results) + results = re.sub(r"Last-Modified: .*\r\n", "", results) + return results.strip() + def test_execute_single_output_prefer_header_return_representation_literal(self): proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) @@ -3596,7 +3606,7 @@ def test_execute_single_output_prefer_header_return_representation_literal(self) outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json == { - "output_data": "test", + "output_data": {"value": "test"}, } def test_execute_single_output_prefer_header_return_representation_complex(self): @@ -3968,7 +3978,7 @@ def test_execute_single_output_multipart_accept_data(self): # validate the results based on original execution request results = resp assert ContentType.MULTIPART_MIXED in results.content_type - boundary = parse_kvp(results.content_type)["boundary"][0] + boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] output_json = json.dumps({"data": "test"}, separators=(",", ":")) results_body = inspect.cleandoc(f""" --{boundary} @@ -3977,8 +3987,9 @@ def test_execute_single_output_multipart_accept_data(self): {output_json} --{boundary}-- - """) - assert results.text == results_body + """).replace("\n", "\r\n") + results_text = self.remove_result_multipart_variable(results.text) + assert results_text == results_body outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { @@ -4040,15 +4051,17 @@ def test_execute_single_output_multipart_accept_link(self): # validate the results based on original execution request results = resp assert ContentType.MULTIPART_MIXED in results.content_type - boundary = parse_kvp(results.content_type)["boundary"][0] + boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] results_body = inspect.cleandoc(f""" --{boundary} Content-Type: {ContentType.APP_JSON} Content-ID: + Content-Length: 0 Content-Location: {out_url}/{job_id}/output_json/result.json --{boundary}-- - """) - assert results.text == results_body + """).replace("\n", "\r\n") + results_text = self.remove_result_multipart_variable(results.text) + assert results_text == results_body outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { @@ -4109,18 +4122,20 @@ def test_execute_single_output_multipart_accept_alt_format(self): # validate the results based on original execution request results = resp assert ContentType.MULTIPART_MIXED in results.content_type - boundary = parse_kvp(results.content_type)["boundary"][0] + boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] output_json_as_yaml = yaml.safe_dump({"data": "test"}) results_body = inspect.cleandoc(f""" --{boundary} Content-Type: {ContentType.APP_YAML} Content-ID: + Content-Length: 12 {output_json_as_yaml} --{boundary}-- - """) + """).replace("\n", "\r\n") + results_text = self.remove_result_multipart_variable(results.text) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) - assert results.text == results_body + assert results_text == results_body outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { @@ -4192,21 +4207,24 @@ def test_execute_multi_output_multipart_accept(self, multipart_header): job_id = status["jobID"] out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") - boundary = parse_kvp(results.content_type)["boundary"][0] + boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] results_body = inspect.cleandoc(f""" --{boundary} Content-Type: {ContentType.TEXT_PLAIN} Content-ID: + Content-Length: 4 test --{boundary} Content-Type: {ContentType.APP_JSON} Content-ID: + Content-Length: 0 Content-Location: {out_url}/{job_id}/output_json/result.json --{boundary}-- - """) + """).replace("\n", "\r\n") + results_text = self.remove_result_multipart_variable(results.text) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) - assert results.text == results_body + assert results_text == results_body outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { @@ -4303,9 +4321,9 @@ def test_execute_multi_output_prefer_header_return_representation(self): self.deploy_process(body, process_id=p_id) exec_headers = { - "Prefer": f"return={ExecuteReturnPreference.REPRESENTATION}, respond-async" + "Prefer": f"return={ExecuteReturnPreference.REPRESENTATION}, respond-async", + "Content-Type": ContentType.APP_JSON, } - exec_headers.update(self.json_headers) exec_content = { "inputs": { "message": "test" @@ -4334,23 +4352,29 @@ def test_execute_multi_output_prefer_header_return_representation(self): job_id = status["jobID"] out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") - boundary = parse_kvp(results.content_type)["boundary"][0] + boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] output_json = json.dumps({"data": "test"}, separators=(",", ":")) results_body = inspect.cleandoc(f""" --{boundary} + Content-Disposition: attachment; filename="output_data.txt" name="output_data" Content-Type: {ContentType.TEXT_PLAIN} Content-ID: + Content-Length: 4 test --{boundary} + Content-Disposition: attachment; filename="result.json" name="output_json" Content-Type: {ContentType.APP_JSON} + Content-Location: {out_url}/{job_id}/output_json/result.json Content-ID: + Content-Length: 16 {output_json} --{boundary}-- - """) + """).replace("\n", "\r\n") + results_text = self.remove_result_multipart_variable(results.text) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) - assert results.text == results_body + assert results_text == results_body outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { @@ -4397,21 +4421,23 @@ def test_execute_multi_output_response_raw_value(self): job_id = status["jobID"] out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") - boundary = parse_kvp(results.content_type)["boundary"][0] + boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] output_json = json.dumps({"data": "test"}, separators=(",", ":")) results_body = inspect.cleandoc(f""" --{boundary} Content-Type: {ContentType.TEXT_PLAIN} Content-ID: + Content-Length: 4 test --{boundary} Content-Type: {ContentType.APP_JSON} Content-ID: + Content-Length: 16 {output_json} --{boundary}-- - """) + """).replace("\n", "\r\n") assert results.content_type.startswith(ContentType.MULTIPART_MIXED) assert results.text == results_body outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) @@ -4460,20 +4486,23 @@ def test_execute_multi_output_response_raw_reference(self): job_id = status["jobID"] out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") - boundary = parse_kvp(results.content_type)["boundary"][0] + boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] results_body = inspect.cleandoc(f""" --{boundary} Content-Type: {ContentType.TEXT_PLAIN} Content-ID: + Content-Length: 0 Content-Location: {out_url}/{job_id}/output_data/result.txt --{boundary} Content-Type: {ContentType.APP_JSON} Content-ID: + Content-Length: 0 Content-Location: {out_url}/{job_id}/output_json/result.json --{boundary}-- - """) + """).replace("\n", "\r\n") + results_text = self.remove_result_multipart_variable(results.text) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) - assert results.text == results_body + assert results_text == results_body outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { @@ -4521,31 +4550,37 @@ def test_execute_multi_output_response_raw_mixed(self): job_id = status["jobID"] out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") - boundary = parse_kvp(results.content_type)["boundary"][0] + boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] output_json = json.dumps({"data": "test"}, separators=(",", ":")) results_body = inspect.cleandoc(f""" --{boundary} Content-Type: {ContentType.TEXT_PLAIN} Content-ID: + Content-Length: 4 test --{boundary} Content-Type: {ContentType.TEXT_PLAIN} Content-ID: + Content-Length: 0 Content-Location: {out_url}/{job_id}/output_text/result.txt --{boundary} Content-Type: {ContentType.APP_JSON} Content-ID: + Content-Length: 16 {output_json} --{boundary}-- - """) + """).replace("\n", "\r\n") + results_text = self.remove_result_multipart_variable(results.text) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) - assert results.text == results_body + assert results_text == results_body outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { - "output_data": "test", + "output_data": { + "value": "test" + }, "output_text": { "href": f"{out_url}/{job_id}/output_text/result.txt", "type": ContentType.TEXT_PLAIN, @@ -4608,7 +4643,9 @@ def test_execute_multi_output_prefer_header_return_minimal_defaults(self): outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { - "output_data": "test", + "output_data": { + "value": "test" + }, "output_json": { "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, @@ -4680,7 +4717,9 @@ def test_execute_multi_output_prefer_header_return_minimal_override_transmission outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { - "output_data": "test", + "output_data": { + "value": "test" + }, "output_json": { "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, @@ -4743,7 +4782,9 @@ def test_execute_multi_output_response_document_defaults(self): outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { - "output_data": "test", + "output_data": { + "value": "test" + }, "output_json": { "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, @@ -4812,7 +4853,9 @@ def test_execute_multi_output_response_document_mixed(self): outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { - "output_data": "test", + "output_data": { + "value": "test" + }, "output_json": { "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index b0ae1465d..3cdb115f7 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -58,7 +58,7 @@ load_pywps_config ) from weaver.wps_restapi import swagger_definitions as sd -from weaver.wps_restapi.jobs.utils import get_job_results_response, get_job_submission_response +from weaver.wps_restapi.jobs.utils import get_job_results_response, get_job_submission_response, get_job_return from weaver.wps_restapi.processes.utils import resolve_process_tag LOGGER = logging.getLogger(__name__) @@ -794,9 +794,9 @@ def submit_job_handler(payload, # type: ProcessExecution # Prefer header not resolved with a valid value should still resume without error is_execute_async = mode != ExecuteMode.SYNC accept_type = validate_job_accept_header(headers, mode) + exec_resp = get_job_return(job=None, body=json_body, headers=headers) # job 'none' since still doing 1st parsing get_header("prefer", headers, pop=True) # don't care about value, just ensure removed with any header container - exec_resp = json_body.get("response") subscribers = map_job_subscribers(json_body, settings) job_inputs = json_body.get("inputs") job_outputs = json_body.get("outputs") @@ -867,6 +867,8 @@ def validate_job_accept_header(headers, execution_mode): # anything always allowed in sync, since results returned directly if execution_mode == ExecuteMode.SYNC: return accept + if ContentType.ANY in accept: + return raise HTTPNotAcceptable( json=sd.ErrorJsonResponseBodySchema(schema_include=True).deserialize({ "type": "NotAcceptable", diff --git a/weaver/utils.py b/weaver/utils.py index 89345babe..1764a42f0 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -33,6 +33,7 @@ from beaker.container import MemoryNamespaceManager from beaker.exceptions import BeakerException from botocore.config import Config as S3Config +from botocore.exceptions import ClientError, HTTPClientError from bs4 import BeautifulSoup from celery.app import Celery from mypy_boto3_s3.literals import RegionName @@ -1193,7 +1194,9 @@ def get_href_headers( content_type=None, # type: Optional[str] content_disposition_type="attachment", # type: Literal["attachment", "inline"] content_location=None, # type: Optional[str] + content_name=None, # type: Optional[str] content_id=None, # type: Optional[str] + missing_ok=False, # type: bool settings=None, # type: Optional[SettingsType] **option_kwargs, # type: Unpack[Union[SchemeOptions, RequestOptions]] ): # type: (...) -> MetadataResult @@ -1219,9 +1222,19 @@ def get_href_headers( :param content_location: Override ``Content-Location`` to include in headers. Otherwise, defaults to the :paramref:`path`. Requires that :paramref:`location_headers` and :paramref:`content_headers` are enabled in each case. + :param content_name: + Optional ``name`` parameter to assign in the ``Content-Disposition`` header. + Requires that :paramref:`content_headers` and :paramref:`download_headers` are enabled. :param content_id: Optional ``Content-ID`` to include in the headers. Requires that :paramref:`content_headers` is enabled. + This should be a uniquely identifiable reference *across the server* (not just within a specific response), + which can be used for cross-referencing by ``{cid:<>}`` within and between multipart document contents. + For a generic ID or field name, employ :paramref:`content_name` instead. + :param missing_ok: + If the referenced resource does not exist (locally or remotely as applicable), and that content information + to describe it cannot be retrieved, either raise an error (default) or resume with the minimal information + details that could be resolved. :param settings: Application settings to pass down to relevant utility functions. :return: Headers for the reference. """ @@ -1229,6 +1242,9 @@ def get_href_headers( if not any(href.startswith(proto) for proto in ["file", "http", "https", "s3"]): href = f"file://{os.path.abspath(path)}" f_enc = None + f_size = None + f_type = None + f_modified = None # handle directory if path.endswith("/"): @@ -1245,30 +1261,39 @@ def get_href_headers( options["http"].update(**configs) if path.startswith("s3://") or path.startswith("https://s3."): - s3_params = resolve_s3_http_options(**options["http"], **kwargs) - s3_region = options["s3"].pop("region_name", None) - s3_client = boto3.client("s3", region_name=s3_region, **s3_params) # type: S3Client - s3_bucket, file_key = path[5:].split("/", 1) - s3_file = s3_client.head_object(Bucket=s3_bucket, Key=file_key) - f_type = content_type or s3_file["ResponseMetadata"]["HTTPHeaders"]["ContentType"] - f_size = s3_file["ResponseMetadata"]["HTTPHeaders"]["Size"] - f_modified = s3_file["ResponseMetadata"]["HTTPHeaders"]["LastModified"] + try: + s3_params = resolve_s3_http_options(**options["http"], **kwargs) + s3_region = options["s3"].pop("region_name", None) + s3_client = boto3.client("s3", region_name=s3_region, **s3_params) # type: S3Client + s3_bucket, file_key = path[5:].split("/", 1) + s3_file = s3_client.head_object(Bucket=s3_bucket, Key=file_key) + f_type = content_type or s3_file["ResponseMetadata"]["HTTPHeaders"]["ContentType"] + f_size = s3_file["ResponseMetadata"]["HTTPHeaders"]["Size"] + f_modified = s3_file["ResponseMetadata"]["HTTPHeaders"]["LastModified"] + except (ClientError, HTTPClientError): + if not missing_ok: + raise elif path.startswith("http://") or path.startswith("https://"): resp = request_extra("HEAD", href, **options["http"]) - if not resp.status_code == 200: + if resp.status_code != 200 and not missing_ok: raise ValueError(f"Could not obtain file reference metadata from [{href}]") - f_modified = resp.last_modified - f_type = content_type or resp.content_type - f_size = resp.content_length - f_enc = resp.content_encoding + if resp.status_code == 200: + f_modified = resp.last_modified + f_type = content_type or resp.content_type + f_size = resp.content_length + f_enc = resp.content_encoding else: - path = path.split("file://", 1)[-1] - stat = os.stat(path) - f_type = content_type - f_size = stat.st_size - f_modified = datetime.fromtimestamp(stat.st_mtime) + try: + path = path.split("file://", 1)[-1] + stat = os.stat(path) + f_type = content_type + f_size = stat.st_size + f_modified = datetime.fromtimestamp(stat.st_mtime) + except OSError: + if not missing_ok: + raise headers = {} if content_headers: @@ -1278,32 +1303,33 @@ def get_href_headers( if location_headers: headers["Content-Location"] = content_location or href c_type, c_enc = guess_file_contents(href) - if not f_type: + f_type = f_type or content_type # in case of error, all above failed, use provided content-type if any + if not f_type: # last resort, guess from file path if c_type == ContentType.APP_OCTET_STREAM: # default f_ext = os.path.splitext(path)[-1] f_type = get_content_type(f_ext, charset="UTF-8", default=ContentType.APP_OCTET_STREAM) else: f_type = c_type - if not f_enc: - f_enc = c_enc + f_enc = f_enc or c_enc or "" headers.update({ "Content-Type": f_type, - "Content-Encoding": f_enc or "", - "Content-Length": str(f_size), + "Content-Encoding": f_enc, }) + if f_size is not None: + headers["Content-Length"] = str(f_size) if download_headers: if os.path.splitext(path)[-1] in ["", "."]: f_ext = get_extension(f_type, dot=True) path = f"{path}{f_ext}" - headers.update({ - "Content-Disposition": f"{content_disposition_type}; filename=\"{os.path.basename(path)}\"", - }) + content_disposition_params = f"filename=\"{os.path.basename(path)}\"" + if content_name: + content_disposition_params += f"; name=\"{content_name}\"" + headers["Content-Disposition"] = f"{content_disposition_type}; {content_disposition_params}" f_current = get_file_header_datetime(now()) - f_modified = get_file_header_datetime(f_modified) - headers.update({ - "Date": f_current, - "Last-Modified": f_modified, - }) + headers["Date"] = f_current + if f_modified: + f_modified = get_file_header_datetime(f_modified) + headers["Last-Modified"] = f_modified return headers diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 012fb9e7f..fd39a8ece 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -4,9 +4,6 @@ import os import shutil from copy import deepcopy -from email.message import MIMEPart -from email.mime.multipart import MIMEMultipart -from email.policy import HTTP as PolicyHTTP from typing import TYPE_CHECKING, cast import colander @@ -64,7 +61,7 @@ from weaver.wps_restapi.providers.utils import forbid_local_only if TYPE_CHECKING: - from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Union + from typing import Any, Dict, List, Optional, Sequence, Tuple, Union from weaver.execute import AnyExecuteResponse, AnyExecuteTransmissionMode from weaver.processes.constants import JobInputsOutputsSchemaType @@ -83,7 +80,6 @@ HeadersTupleType, HeadersType, JSON, - Path, PyramidRequest, SettingsType ) @@ -93,9 +89,9 @@ # filename, data/io Tuple[Optional[str], AnyDataStream], # filename, data/io, content-type - Tuple[Optional[str], AnyDataStream, Optional[str], HeadersType], + Tuple[Optional[str], AnyDataStream, Optional[str]], # filename, data/io, content-type, headers - Tuple[Optional[str], AnyDataStream, str, HeadersType], + Tuple[Optional[str], AnyDataStream, Optional[str], HeadersType], ] MultiPartFieldsType = Sequence[Tuple[str, MultiPartFieldsParamsType]] @@ -471,10 +467,14 @@ def get_results( # pylint: disable=R1260 return outputs, headers -def get_job_return(job, body=None, headers=None): - # type: (Job, Optional[JSON], Optional[AnyHeadersContainer]) -> AnyExecuteResponse +def get_job_return(job=None, body=None, headers=None): + # type: (Optional[Job], Optional[JSON], Optional[AnyHeadersContainer]) -> AnyExecuteResponse """ Obtain the :term:`Job` result representation based on the resolution order of preferences and request parameters. + + Body and header parameters are considered first, in case they provide 'overrides' for the active request. + Then, if the :paramref:`job` was already parsed from the original request, and contains pre-resolved return, + this format is employed. When doing the initial parsing, ``job=None`` MUST be used. """ body = body or {} resp = ExecuteResponse.get(body.get("response")) @@ -487,6 +487,8 @@ def get_job_return(job, body=None, headers=None): if pref == ExecuteReturnPreference.REPRESENTATION: return ExecuteResponse.RAW + if not job: + return ExecuteResponse.DOCUMENT return job.execution_response @@ -595,6 +597,11 @@ def get_job_results_response( "value": repr_json(exc.value), }) ) + + # simplify data literals if qualified value representation is not needed + # use deserialized contents such that only the applicable fields remain + results_json = get_job_results_simplified(results_json) + # note: # Cannot add "links" field in response body because variable Output ID keys are directly at the root # Possible conflict with an output that would be named "links". @@ -642,6 +649,28 @@ def get_job_results_response( return resp +def get_job_results_simplified(results): + # type: (ExecutionResults) -> ExecutionResults + """ + Removes nested literal value definitions if qualified value representation is not needed. + + Qualified value representation is not needed if no other field than ``value`` is provided with the literal data. + The simplification is applied for both literals on their own and nested array of literals. + """ + out_results = {} + for res_id, res_val in results.items(): + if isinstance(res_val, dict) and list(res_val) == ["value"]: + out_results[res_id] = res_val["value"] + elif isinstance(res_val, list): + out_results[res_id] = [ + item["value"] if isinstance(item, dict) and list(item) == ["value"] else item + for item in res_val + ] + else: + out_results[res_id] = res_val + return out_results + + def generate_or_resolve_result( job, # type: Job result, # type: ExecutionResultObject @@ -668,6 +697,7 @@ def generate_or_resolve_result( loc = None typ = None res_data = None + c_length = None # NOTE: # work with local files (since we have them), to avoid unnecessary loopback request @@ -678,18 +708,18 @@ def generate_or_resolve_result( if key == "href": url = val typ = result.get("type") or ContentType.APP_OCTET_STREAM - loc = map_wps_output_location(val, settings, exists=True, file_scheme=True, url=False) + loc = map_wps_output_location(val, settings, exists=True, url=False) if not url: out_dir = get_wps_output_dir(settings) out_name = f"{result_id}.txt" job_path = job.result_path(output_id=output_id, file_name=out_name) loc = os.path.join(out_dir, job_path) - url = map_wps_output_location(loc, settings, exists=True, url=True) + url = map_wps_output_location(loc, settings, exists=False, url=True) if key == "value": res_data = io.StringIO() - res_data.read(val) + c_length = res_data.write(val) typ = ContentType.TEXT_PLAIN if key == "value" and output_mode == ExecuteTransmissionMode.REFERENCE: @@ -700,22 +730,25 @@ def generate_or_resolve_result( if key == "href" and output_mode == ExecuteTransmissionMode.VALUE: res_data = io.FileIO(loc, mode="rb") - # with open(loc, mode="rb") as out_file: - # res_data = out_file.read() res_headers = get_href_headers( loc, download_headers=True, + missing_ok=True, # only basic details if file does not exist content_headers=True, content_type=typ, content_id=cid, - content_location=url, # rewrite back the original URL + content_name=result_id, + content_location=url, # rewrite back the original URL settings=settings, ) + if output_mode == ExecuteTransmissionMode.VALUE and not res_headers.get("Content-Length") and c_length is not None: + res_headers["Content-Length"] = str(c_length) if output_mode == ExecuteTransmissionMode.REFERENCE: res_data = None res_headers["Content-Length"] = "0" - + if not os.path.exists(loc): + res_headers.pop("Content-Location", None) return res_headers, res_data @@ -725,7 +758,8 @@ def get_job_results_multipart(job, results, headers, container): Generates the :term:`Job` results multipart response from available or requested outputs. .. seealso:: - Function :func:`get_results` should be used to avoid re-processing all output format combinations. + - Function :func:`get_results` should be used to avoid re-processing all output format combinations. + - Details of ``multipart`` (:rfc:`2046#section-5.1`) :term:`Media-Type` family. :param job: :param results: Pre-filtered and pre-processed results in a normalized format structure. @@ -734,62 +768,36 @@ def get_job_results_multipart(job, results, headers, container): """ settings = get_settings(container) - # class AnyMultipartEncoder(MultipartEncoder): - # def __init__(self, fields, content_type=ContentType.MULTIPART_MIXED, **kwargs): - # # type: (MultiPartFieldsType, str, **str) -> None - # super().__init__(fields, **kwargs) - # self._content_type = content_type - # - # @property - # def content_type(self): - # return f"{self._content_type}; boundary=\"{self.boundary_value}\"" - def add_result_parts(result_parts): # type: (List[Tuple[str, str, ExecutionResultObject]]) -> MultiPartFieldsType - #### type: (List[Tuple[str, str, ExecutionResultObject]]) -> MIMEMultipart - - ##multi = AnyMultipartEncoder("mixed", policy=PolicyHTTP) for res_id, out_id, result in result_parts: if isinstance(result, list): sub_parts = [(f"{out_id}.{i}", out_id, data) for i, data in enumerate(result)] sub_parts = add_result_parts(sub_parts) - sub_multi = MultipartEncoder(sub_parts) + sub_multi = MultipartEncoder(sub_parts, content_type=ContentType.MULTIPART_MIXED) sub_out_url = job.result_path(output_id=out_id) sub_headers = { "Content-Type": sub_multi.content_type, "Content-ID": f"<{out_id}@{job.id}>", - "Content-Location": sub_out_url + "Content-Location": sub_out_url, + "Content-Disposition": f"attachment; name=\"{out_id}\"", } yield out_id, (None, sub_multi, None, sub_headers) - ###part = add_result_parts(sub_parts) - ###multi.attach(part) - ##continue key = get_any_value(result, key=True) mode = get_job_output_transmission(job, out_id, is_reference=(key == "href")) res_headers, res_data = generate_or_resolve_result(job, result, res_id, out_id, mode, settings) - yield out_id, (None, res_data, None, res_headers) - - # part = MIMEPart(policy=PolicyHTTP) - # for hdr_key, hdr_val in res_headers.items(): - # if hdr_val: - # part.add_header(hdr_key, hdr_val) - # if res_data: - # part.set_payload(res_data) - ###multi.attach(part) - ##return multi + c_type = res_headers.get("Content-Type") + c_loc = res_headers.get("Content-Location") + c_fn = os.path.basename(c_loc) if c_loc else None + yield out_id, (c_fn, res_data, c_type, res_headers) results_parts = [(_res_id, _res_id, _res_val) for _res_id, _res_val in results.items()] results_parts = list(add_result_parts(results_parts)) - #res_multi = AnyMultipartEncoder(results_parts) - res_multi = MultipartEncoder(results_parts) - ##resp_ctype = f"{res_multi.get_content_type()}; boundary=\"{res_multi.get_boundary()}\"" + res_multi = MultipartEncoder(results_parts, content_type=ContentType.MULTIPART_MIXED) resp_headers = headers or {} - ##resp_headers.update({"Content-Type": resp_ctype}) resp_headers.update({"Content-Type": res_multi.content_type}) resp = HTTPOk(detail=f"Multipart Response for {job}", headers=resp_headers) - # drop generator contents that includes its own headers in the body, only keep nested parts - ###resp.body = res_multi.as_bytes(policy=PolicyHTTP).split(res_multi.policy.linesep.encode(), 3)[-1] resp.body = res_multi.read() return resp diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 8aa202222..fd1e09a6f 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -4174,8 +4174,7 @@ class Execute(ExecuteInputOutputs): validator=OneOf(ExecuteMode.values()) ) response = JobResponseOptionsEnum( - missing=drop, - default=ExecuteResponse.DOCUMENT, + missing=drop, # no default to ensure 'Prefer' header vs 'response' body resolution order can be performed description=( "Indicates the desired representation format of the response. " f"(see for more details: {DOC_URL}/processes.html#execution-body)." @@ -5831,22 +5830,46 @@ class JobOutputReference(ExtendedMappingSchema): format = FormatSelection(missing=drop) -class JobOutputValue(OneOfKeywordSchema): +class JobOutputArrayReference(ExtendedSequenceSchema): + item = JobOutputReference() + + +class JobOutputQualifiedValueLiteral(Format): + value = AnyLiteralType() + mediaType = MediaType(missing=drop, example=ContentType.APP_JSON) # override for optional, others already optional + format = FormatSelection(missing=drop) + + +class JobOutputQualifiedDataLiteral(Format): + data = AnyLiteralType() + mediaType = MediaType(missing=drop, example=ContentType.APP_JSON) # override for optional, others already optional + format = FormatSelection(missing=drop) + + +class JobOutputLiteral(OneOfKeywordSchema): _one_of = [ - JobOutputReference(tilte="JobOutputReference"), - AnyLiteralDataType(title="JobOutputLiteral") + # AnyLiteralType(), # NOTE: purposely omit value inline, always embed in 'value' or 'data' for job outputs + JobOutputQualifiedDataLiteral(), + JobOutputQualifiedValueLiteral(), ] -class JobOutput(AllOfKeywordSchema): - _all_of = [ - OutputIdentifierType(), - JobOutputValue(), +class JobOutputArrayLiteral(ExtendedSequenceSchema): + item = JobOutputLiteral() + + +class JobOutputValueObject(OneOfKeywordSchema): + _one_of = [ + JobOutputReference(), + JobOutputLiteral(), + # array possible since nested object under 'id' + JobOutputArrayReference(), + JobOutputArrayLiteral(), ] class JobOutputMap(ExtendedMappingSchema): - output_id = JobOutputValue( + output_id = JobOutputValueObject( variable="{output-id}", title="JobOutputData", description=( "Output data as literal value or file reference. " @@ -5855,12 +5878,28 @@ class JobOutputMap(ExtendedMappingSchema): ) +class JobOutputFields(OneOfKeywordSchema): + _one_of = [ + JobOutputReference(), + JobOutputQualifiedDataLiteral(), + JobOutputQualifiedValueLiteral(), + ] + + +class JobOutputItem(AllOfKeywordSchema): + _all_of = [ + OutputIdentifierType(), + JobOutputFields(), # cannot be an array directly, since 'id' field needed in this representation + ] + + class JobOutputList(ExtendedSequenceSchema): title = "JobOutputList" - output = JobOutput(description="Job output result with specific keyword according to represented format.") + output = JobOutputItem(description="Job output result with specific keyword according to represented format.") class JobOutputs(OneOfKeywordSchema): + description = "Job outputs with many alternate representations according to the specified 'schema' query parameter." _one_of = [ JobOutputMap(), JobOutputList(), From d050cd3c942a4799c8f1da7a03b1f7186bfa345b Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Sat, 28 Sep 2024 03:08:34 -0400 Subject: [PATCH 26/75] =?UTF-8?q?[wip]=C2=A0multipart=20result=20tests=20-?= =?UTF-8?q?=20add=20content-disposition=20and=20content-location=20headers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../EchoResultsTester/package.cwl | 3 ++ tests/functional/test_wps_package.py | 37 +++++++++++++------ tests/functional/utils.py | 3 +- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/tests/functional/application-packages/EchoResultsTester/package.cwl b/tests/functional/application-packages/EchoResultsTester/package.cwl index 08d6ace3c..37018f54f 100644 --- a/tests/functional/application-packages/EchoResultsTester/package.cwl +++ b/tests/functional/application-packages/EchoResultsTester/package.cwl @@ -22,15 +22,18 @@ outputs: output_data: type: string outputBinding: + # note: since no file is associated for literal type, a link representation from it should use 'output_data.txt' outputEval: $(inputs.message) output_text: type: File outputBinding: + # note: purposely use a different name than 'output_text' to validate the resulting path uses this one glob: result.txt format: "iana:text/plain" output_json: type: File outputBinding: + # note: purposely use a different name than 'output_json' to validate the resulting path uses this one glob: result.json format: "iana:application/json" $namespaces: diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index d047cec16..7b791cedb 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -3983,6 +3983,7 @@ def test_execute_single_output_multipart_accept_data(self): results_body = inspect.cleandoc(f""" --{boundary} Content-Type: {ContentType.APP_JSON} + Content-Location: {out_url}/{job_id}/output_json/result.json Content-ID: {output_json} @@ -4199,23 +4200,28 @@ def test_execute_multi_output_multipart_accept(self, multipart_header): data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" - # request status instead of results since not expecting 'document' JSON in this case - status_url = resp.json["location"] - status = self.monitor_job(status_url, return_status=True) - assert status["status"] == Status.SUCCEEDED - - job_id = status["jobID"] + # rely on location that should be provided to find the job ID + results_url = get_header("Content-Location", resp.headers) + assert results_url, ( + "Content-Location should have been provided in" + "results response pointing at where they can be found." + ) + job_id = results_url.rsplit("/results")[0].rsplit("/jobs/")[-1] + assert is_uuid(job_id), f"Failed to retrieve the job ID: [{job_id}] is not a UUID" out_url = get_wps_output_url(self.settings) + results = self.app.get(f"/jobs/{job_id}/results") boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] results_body = inspect.cleandoc(f""" --{boundary} + Content-Disposition: attachment; name="output_data" Content-Type: {ContentType.TEXT_PLAIN} Content-ID: Content-Length: 4 test --{boundary} + Content-Disposition: attachment; name="output_json"; filename="result.json" Content-Type: {ContentType.APP_JSON} Content-ID: Content-Length: 0 @@ -4356,14 +4362,14 @@ def test_execute_multi_output_prefer_header_return_representation(self): output_json = json.dumps({"data": "test"}, separators=(",", ":")) results_body = inspect.cleandoc(f""" --{boundary} - Content-Disposition: attachment; filename="output_data.txt" name="output_data" + Content-Disposition: attachment; filename="output_data.txt"; name="output_data" Content-Type: {ContentType.TEXT_PLAIN} Content-ID: Content-Length: 4 test --{boundary} - Content-Disposition: attachment; filename="result.json" name="output_json" + Content-Disposition: attachment; name="output_json"; filename="result.json" Content-Type: {ContentType.APP_JSON} Content-Location: {out_url}/{job_id}/output_json/result.json Content-ID: @@ -4425,13 +4431,16 @@ def test_execute_multi_output_response_raw_value(self): output_json = json.dumps({"data": "test"}, separators=(",", ":")) results_body = inspect.cleandoc(f""" --{boundary} + Content-Disposition: attachment; name="output_data" Content-Type: {ContentType.TEXT_PLAIN} Content-ID: Content-Length: 4 test --{boundary} + Content-Disposition: attachment; name="output_json"; filename="result.json" Content-Type: {ContentType.APP_JSON} + Content-Location: {out_url}/{job_id}/output_json/result.json Content-ID: Content-Length: 16 @@ -4489,15 +4498,17 @@ def test_execute_multi_output_response_raw_reference(self): boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] results_body = inspect.cleandoc(f""" --{boundary} + Content-Disposition: attachment; name="output_data" filename="output_data.txt" Content-Type: {ContentType.TEXT_PLAIN} + Content-Location: {out_url}/{job_id}/output_data/output_data.txt Content-ID: Content-Length: 0 - Content-Location: {out_url}/{job_id}/output_data/result.txt --{boundary} + Content-Disposition: attachment; name="output_json"; filename="result.json" Content-Type: {ContentType.APP_JSON} + Content-Location: {out_url}/{job_id}/output_json/result.json Content-ID: Content-Length: 0 - Content-Location: {out_url}/{job_id}/output_json/result.json --{boundary}-- """).replace("\n", "\r\n") results_text = self.remove_result_multipart_variable(results.text) @@ -4554,18 +4565,22 @@ def test_execute_multi_output_response_raw_mixed(self): output_json = json.dumps({"data": "test"}, separators=(",", ":")) results_body = inspect.cleandoc(f""" --{boundary} + Content-Disposition: attachment; name="output_data" Content-Type: {ContentType.TEXT_PLAIN} Content-ID: Content-Length: 4 test --{boundary} + Content-Disposition: attachment; name="output_text"; filename="result.txt" Content-Type: {ContentType.TEXT_PLAIN} + Content-Location: {out_url}/{job_id}/output_text/result.txt Content-ID: Content-Length: 0 - Content-Location: {out_url}/{job_id}/output_text/result.txt --{boundary} + Content-Disposition: attachment; name="output_json"; filename="result.json" Content-Type: {ContentType.APP_JSON} + Content-Location: {out_url}/{job_id}/output_json/result.json Content-ID: Content-Length: 16 diff --git a/tests/functional/utils.py b/tests/functional/utils.py index dbdfe31bb..21712a836 100644 --- a/tests/functional/utils.py +++ b/tests/functional/utils.py @@ -443,7 +443,8 @@ def _try_get_logs(self, status_url): def fully_qualified_test_process_name(self, name=""): extra_name = f"-{name}" if name else "" class_name = fully_qualified_name(self) - test_name = f"{class_name}.{self._testMethodName}{extra_name}".replace(".", "-") + test_name = f"{class_name}.{self._testMethodName}{extra_name}" + test_name = test_name.replace(".", "-").replace("-_", "_").replace("_-", "-") return test_name @overload From 26d399d6bf754a4729af5630c1f62fe268e1b6df Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 1 Oct 2024 15:55:07 -0400 Subject: [PATCH 27/75] [wip] job document response with inline conversion of data/link outputs --- tests/functional/test_wps_package.py | 52 +++++---- tests/test_formats.py | 31 +++--- tests/wps_restapi/test_jobs.py | 47 ++++++++ weaver/formats.py | 20 +++- weaver/utils.py | 14 +++ weaver/wps_restapi/jobs/utils.py | 129 ++++++++++++++++------ weaver/wps_restapi/swagger_definitions.py | 3 +- 7 files changed, 221 insertions(+), 75 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 7b791cedb..67b094b66 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -3643,7 +3643,7 @@ def test_execute_single_output_prefer_header_return_representation_complex(self) job_id = status["jobID"] out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") - output_json = json.dumps({"data": "test"}, separators=(",", ":")) + output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) assert results.content_type.startswith(ContentType.APP_JSON) assert results.text == output_json outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) @@ -3822,7 +3822,7 @@ def test_execute_single_output_response_raw_value_complex(self): assert results.content_type.startswith(ContentType.APP_JSON) assert results.json == {"data": "test"} outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) - output_json = json.dumps({"data": "test"}, separators=(",", ":")) + output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { "output_json": { @@ -3979,7 +3979,7 @@ def test_execute_single_output_multipart_accept_data(self): results = resp assert ContentType.MULTIPART_MIXED in results.content_type boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] - output_json = json.dumps({"data": "test"}, separators=(",", ":")) + output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) results_body = inspect.cleandoc(f""" --{boundary} Content-Type: {ContentType.APP_JSON} @@ -4130,7 +4130,7 @@ def test_execute_single_output_multipart_accept_alt_format(self): Content-Type: {ContentType.APP_YAML} Content-ID: Content-Length: 12 - + {output_json_as_yaml} --{boundary}-- """).replace("\n", "\r\n") @@ -4149,7 +4149,7 @@ def test_execute_single_output_multipart_accept_alt_format(self): # validate the results can be obtained with the "real" representation result_json = self.app.get(f"/jobs/{job_id}/results/output_json", headers=self.json_headers) - output_json = json.dumps({"data": "test"}, separators=(",", ":")) + output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) assert result_json.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" assert result_json.content_type == ContentType.APP_JSON assert result_json.text == output_json @@ -4223,9 +4223,10 @@ def test_execute_multi_output_multipart_accept(self, multipart_header): --{boundary} Content-Disposition: attachment; name="output_json"; filename="result.json" Content-Type: {ContentType.APP_JSON} + Content-Location: {out_url}/{job_id}/output_json/result.json Content-ID: Content-Length: 0 - Content-Location: {out_url}/{job_id}/output_json/result.json + --{boundary}-- """).replace("\n", "\r\n") results_text = self.remove_result_multipart_variable(results.text) @@ -4234,7 +4235,9 @@ def test_execute_multi_output_multipart_accept(self, multipart_header): outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { - "output_data": "test", + "output_data": { + "value": "test" + }, "output_json": { "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, @@ -4359,14 +4362,14 @@ def test_execute_multi_output_prefer_header_return_representation(self): out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] - output_json = json.dumps({"data": "test"}, separators=(",", ":")) + output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) results_body = inspect.cleandoc(f""" --{boundary} Content-Disposition: attachment; filename="output_data.txt"; name="output_data" Content-Type: {ContentType.TEXT_PLAIN} Content-ID: Content-Length: 4 - + test --{boundary} Content-Disposition: attachment; name="output_json"; filename="result.json" @@ -4374,7 +4377,7 @@ def test_execute_multi_output_prefer_header_return_representation(self): Content-Location: {out_url}/{job_id}/output_json/result.json Content-ID: Content-Length: 16 - + {output_json} --{boundary}-- """).replace("\n", "\r\n") @@ -4384,7 +4387,9 @@ def test_execute_multi_output_prefer_header_return_representation(self): outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { - "output_data": "test", + "output_data": { + "value": "test" + }, "output_json": { "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, @@ -4428,7 +4433,7 @@ def test_execute_multi_output_response_raw_value(self): out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] - output_json = json.dumps({"data": "test"}, separators=(",", ":")) + output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) results_body = inspect.cleandoc(f""" --{boundary} Content-Disposition: attachment; name="output_data" @@ -4452,7 +4457,9 @@ def test_execute_multi_output_response_raw_value(self): outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { - "output_data": "test", + "output_data": { + "value": "test" + }, "output_json": { "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, @@ -4503,12 +4510,14 @@ def test_execute_multi_output_response_raw_reference(self): Content-Location: {out_url}/{job_id}/output_data/output_data.txt Content-ID: Content-Length: 0 + --{boundary} Content-Disposition: attachment; name="output_json"; filename="result.json" Content-Type: {ContentType.APP_JSON} Content-Location: {out_url}/{job_id}/output_json/result.json Content-ID: Content-Length: 0 + --{boundary}-- """).replace("\n", "\r\n") results_text = self.remove_result_multipart_variable(results.text) @@ -4517,7 +4526,9 @@ def test_execute_multi_output_response_raw_reference(self): outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { - "output_data": "test", + "output_data": { + "value": "test" + }, "output_json": { "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, @@ -4562,14 +4573,14 @@ def test_execute_multi_output_response_raw_mixed(self): out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] - output_json = json.dumps({"data": "test"}, separators=(",", ":")) + output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) results_body = inspect.cleandoc(f""" --{boundary} Content-Disposition: attachment; name="output_data" Content-Type: {ContentType.TEXT_PLAIN} Content-ID: Content-Length: 4 - + test --{boundary} Content-Disposition: attachment; name="output_text"; filename="result.txt" @@ -4577,13 +4588,14 @@ def test_execute_multi_output_response_raw_mixed(self): Content-Location: {out_url}/{job_id}/output_text/result.txt Content-ID: Content-Length: 0 + --{boundary} Content-Disposition: attachment; name="output_json"; filename="result.json" Content-Type: {ContentType.APP_JSON} Content-Location: {out_url}/{job_id}/output_json/result.json Content-ID: Content-Length: 16 - + {output_json} --{boundary}-- """).replace("\n", "\r\n") @@ -4713,7 +4725,7 @@ def test_execute_multi_output_prefer_header_return_minimal_override_transmission out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") results_json = self.remove_result_format(results.json) - output_json = json.dumps({"data": "test"}, separators=(",", ":")) + output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) assert results.content_type.startswith(ContentType.APP_JSON) assert results_json == { "output_data": { @@ -4849,11 +4861,11 @@ def test_execute_multi_output_response_document_mixed(self): out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") results_json = self.remove_result_format(results.json) - output_json = json.dumps({"data": "test"}, separators=(",", ":")) + output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) assert results.content_type.startswith(ContentType.APP_JSON) assert results_json == { "output_data": { - "href": f"{out_url}/{job_id}/output_text/result.txt", + "href": f"{out_url}/{job_id}/output_data/output_data.txt", "type": ContentType.TEXT_PLAIN, }, "output_json": { diff --git a/tests/test_formats.py b/tests/test_formats.py index 874716f35..7c009e451 100644 --- a/tests/test_formats.py +++ b/tests/test_formats.py @@ -628,23 +628,26 @@ def test_repr_json_default_string(): @pytest.mark.parametrize( - ["test", "expect", "force_string"], + ["test", "expect", "force_string", "extra_params"], [ - ("abc", "abc", True), - (123, 123, False), - (123, "123", True), - ([1, 2], [1, 2], False), - ([1, 2], "[1, 2]", True), - ("[1, 2]", "[1, 2]", True), - ({"a": 1}, {"a": 1}, False), - ({"a": 1}, "{\"a\": 1}", True), - ("{\"a\": 1}", "{\"a\": 1}", True), - (null, str(null), False), - (null, str(null), True), + ("abc", "abc", True, {}), + (123, 123, False, {}), + (123, "123", True, {}), + ([1, 2], [1, 2], False, {}), + ([1, 2], "[1, 2]", True, {}), + ("[1, 2]", "[1, 2]", True, {}), + ({"a": 1}, {"a": 1}, False, {}), + ({"a": 1}, "{\"a\": 1}", True, {}), + ({"a": [1, 2]}, "{\"a\":[1,2]}", True, {"separators": (",", ":")}), + ("{\"a\": 1}", "{\"a\": 1}", True, {}), + ("\t\r\n{\"a\": 1}\r\n", "{\"a\": 1}", True, {}), + ("\t\r\n{\"a\": [1, 2]}\r\n", "{\"a\":[1,2]}", True, {"separators": (",", ":")}), + (null, str(null), False, {}), + (null, str(null), True, {}), ] ) -def test_repr_json_force_string_handling(test, expect, force_string): - result = f.repr_json(test, force_string=force_string, indent=None) +def test_repr_json_force_string_handling(test, expect, force_string, extra_params): + result = f.repr_json(test, force_string=force_string, indent=None, **extra_params) assert result == expect diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index b83151fb9..ea89d50d6 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -46,6 +46,7 @@ DATETIME_INTERVAL_OPEN_END_SYMBOL, DATETIME_INTERVAL_OPEN_START_SYMBOL ) +from weaver.wps_restapi.jobs.utils import get_job_results_document if TYPE_CHECKING: from typing import Iterable, List, Optional, Tuple, Union @@ -1732,3 +1733,49 @@ def test_job_statistics_response(self): finally: if job: self.job_store.delete_job(job.id) + + +@pytest.mark.parametrize( + ["results", "expected"], + [ + # cases not handled by the function, expect qualified value representation as input + # ({"test": 1}, {"test": 1}), + # ({"test": [1, 2, 3]}, {"test": [1, 2, 3]}), + ( + {"test": {"value": 1}}, + {"test": 1}, + ), + ( + {"test": {"value": [1, 2, 3]}}, + {"test": [1, 2, 3]}, + ), + ( + {"test": [1, {"value": 2}, {"value": 3, "mediaType": ContentType.TEXT_PLAIN}]}, + {"test": [1, 2, 3]}, + ), + ( + {"test": [1, {"value": 2, "mediaType": "text/special"}, {"value": 3}]}, + {"test": [ + {"value": "1", "mediaType": ContentType.TEXT_PLAIN}, + {"value": "2", "mediaType": "text/special"}, + {"value": "3", "mediaType": ContentType.TEXT_PLAIN} + ]}, + ), + ( + {"test": [ + {"value": 1, "mediaType": ContentType.APP_JSON}, + {"value": 2, "mediaType": "text/special"}, + {"value": 3, "mediaType": ContentType.APP_YAML} + ]}, + {"test": [ + {"value": "1", "mediaType": ContentType.APP_JSON}, + {"value": "2", "mediaType": "text/special"}, + {"value": "3", "mediaType": ContentType.APP_YAML} + ]}, + ), + ] +) +def test_get_job_results_document(results, expected): + job = Job(task_id="test", outputs={}) + output = get_job_results_document(job, results, container={}) + assert output == expected diff --git a/weaver/formats.py b/weaver/formats.py index aae073b73..133512880 100644 --- a/weaver/formats.py +++ b/weaver/formats.py @@ -1159,8 +1159,8 @@ def json_default_handler(obj): raise TypeError(f"Type {type(obj)} not serializable.") -def repr_json(data, force_string=True, ensure_ascii=False, indent=2, **kwargs): - # type: (Any, bool, bool, Optional[int], **Any) -> Union[JSON, str, None] +def repr_json(data, force_string=True, ensure_ascii=False, indent=2, separators=None, **kwargs): + # type: (Any, bool, bool, Optional[int], Optional[Tuple[str, str]], **Any) -> Union[JSON, str, None] """ Ensure that the input data can be serialized as JSON to return it formatted representation as such. @@ -1173,8 +1173,18 @@ def repr_json(data, force_string=True, ensure_ascii=False, indent=2, **kwargs): default = json_default_handler try: if isinstance(data, str): - return data # avoid adding additional quotes - data_str = json.dumps(data, indent=indent, ensure_ascii=ensure_ascii, default=default, **kwargs) - return data_str if force_string else data + try: + data = json.loads(data) + except ValueError: + return data.strip() # avoid adding additional quotes + data_str = json.dumps( + data, + indent=indent, + ensure_ascii=ensure_ascii, + separators=separators, + default=default, + **kwargs, + ) + return data_str.strip() if force_string else data except Exception: # noqa: W0703 # nosec: B110 return str(data) diff --git a/weaver/utils.py b/weaver/utils.py index 1764a42f0..2c8b0067c 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -1480,6 +1480,20 @@ def bytes2str(string): return string.decode("UTF-8") +def data2str(data): + # type: (Union[AnyValueType, io.IOBase]) -> str + """ + Converts literal data to a plain string representation. + """ + if hasattr(data, "seek"): + data.seek(0) + if hasattr(data, "read"): + data = data.read() + if not isinstance(data, (str, bytes)): + data = str(data) + return bytes2str(data) + + def islambda(func): # type: (Any) -> bool return isinstance(func, type(lambda: None)) and func.__name__ == (lambda: None).__name__ diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index fd39a8ece..c4759ffda 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -42,6 +42,7 @@ from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory, map_status from weaver.store.base import StoreJobs, StoreProcesses, StoreServices from weaver.utils import ( + data2str, get_any_id, get_any_value, get_header, @@ -598,13 +599,10 @@ def get_job_results_response( }) ) - # simplify data literals if qualified value representation is not needed # use deserialized contents such that only the applicable fields remain - results_json = get_job_results_simplified(results_json) - - # note: - # Cannot add "links" field in response body because variable Output ID keys are directly at the root - # Possible conflict with an output that would be named "links". + # (simplify compares, this is assumed by the following call) + results_json = get_job_results_document(job, results_json, container=container) + headers.extend(refs) return HTTPOk(json=results_json, headers=headers) if not results: # avoid schema validation error if all by reference @@ -622,6 +620,7 @@ def get_job_results_response( out_type = get_any_value(out_info, key=True) out_data = get_any_value(out_info) + # multipart response if ( len(results) > 1 or (isinstance(out_data, list) and len(out_data) > 1) or @@ -649,28 +648,6 @@ def get_job_results_response( return resp -def get_job_results_simplified(results): - # type: (ExecutionResults) -> ExecutionResults - """ - Removes nested literal value definitions if qualified value representation is not needed. - - Qualified value representation is not needed if no other field than ``value`` is provided with the literal data. - The simplification is applied for both literals on their own and nested array of literals. - """ - out_results = {} - for res_id, res_val in results.items(): - if isinstance(res_val, dict) and list(res_val) == ["value"]: - out_results[res_id] = res_val["value"] - elif isinstance(res_val, list): - out_results[res_id] = [ - item["value"] if isinstance(item, dict) and list(item) == ["value"] else item - for item in res_val - ] - else: - out_results[res_id] = res_val - return out_results - - def generate_or_resolve_result( job, # type: Job result, # type: ExecutionResultObject @@ -719,8 +696,8 @@ def generate_or_resolve_result( if key == "value": res_data = io.StringIO() - c_length = res_data.write(val) - typ = ContentType.TEXT_PLAIN + c_length = res_data.write(data2str(val)) + typ = result.get("mediaType") or ContentType.TEXT_PLAIN if key == "value" and output_mode == ExecuteTransmissionMode.REFERENCE: if not os.path.isfile(loc): @@ -752,16 +729,98 @@ def generate_or_resolve_result( return res_headers, res_data -def get_job_results_multipart(job, results, headers, container): - # type: (Job, ExecutionResults, AnyHeadersContainer, AnySettingsContainer) -> HTTPOk +def get_job_results_document(job, results, *, container): + # type: (Job, ExecutionResults, Any, AnySettingsContainer) -> ExecutionResults + """ + Generates the :term:`Job` results document response from available or requested outputs with necessary conversions. + + Removes nested literal value definitions if qualified value representation is not needed. + Qualified value representation is not needed if no other field than ``value`` is provided with the literal data, + or when the specified :term:`Media-Type` is simply the plain text default for data literals. + The simplification is applied for both literals on their own and nested array of literals. + However, when processing an array, the qualified value representation is preserved if any of the items requires + the explicit mention of another :term:`Media-Type` than plain text, to return a consistent structure. + + Uses the :paramref:`job` definition and submitted ``headers`` + + .. warning:: + This function assumes that schema deserialization was applied beforehand. + Therefore, it will not attempt matching every possible combination of the results representation. + """ + settings = get_settings(container) + + def make_result(result, result_id, output_id): + # type: (ExecutionResultValue, str, str) -> Union[AnyValueType, ExecutionResultObject] + if isinstance(result, dict): + key = get_any_value(result, key=True) + val = get_any_value(result) + else: + key = "value" + val = result + result = {"value": val} + mode = get_job_output_transmission(job, result_id, is_reference=(key == "href")) + headers, data = generate_or_resolve_result(job, result, result_id, output_id, mode, settings) + if data is None: + ref = { + "href": headers["Content-Location"], + "type": headers["Content-Type"], + } + return ref + + c_type = headers.get("Content-Type") or "" + c_enc = headers.get("Content-Encoding") + if not c_type or ( + # note: + # Explicit content-type check to consider that any additional parameter provided + # with text/plain must be reported. Only "purely" plain/text can be removed. + c_type == ContentType.TEXT_PLAIN and not c_enc + ): + value = val # use original to avoid string conversion + else: + value = { + "value": data2str(data), + "mediaType": c_type, + } + if c_enc: + value["encoding"] = c_enc + return value + + out_results = {} + for res_id, res_val in results.items(): + if isinstance(res_val, list): + res_data = [] + for out_idx, item in enumerate(res_val): + out_id = f"{res_id}.{out_idx}" + out_res = make_result(item, res_id, out_id) + res_data.append(out_res) + + # backtrack is not all literals (all qualified or none qualified, but no mix) + is_qualified = [isinstance(item, dict) for item in res_data] + if not all(is_qualified) and len([item for item in is_qualified if item]): + res_data = [ + item + if isinstance(item, dict) + else {"value": data2str(item), "mediaType": ContentType.TEXT_PLAIN} + for item in res_data + ] + + else: + res_data = make_result(res_val, res_id, res_id) + + out_results[res_id] = res_data + return out_results + + +def get_job_results_multipart(job, results, *, headers, container): + # type: (Job, ExecutionResults, Any, AnyHeadersContainer, AnySettingsContainer) -> HTTPOk """ - Generates the :term:`Job` results multipart response from available or requested outputs. + Generates the :term:`Job` results multipart response from available or requested outputs with necessary conversions. .. seealso:: - Function :func:`get_results` should be used to avoid re-processing all output format combinations. - Details of ``multipart`` (:rfc:`2046#section-5.1`) :term:`Media-Type` family. - :param job: + :param job: Job definition with potential metadata about requested outputs. :param results: Pre-filtered and pre-processed results in a normalized format structure. :param headers: Additional headers to include in the response. :param container: Application settings to resolve locations. @@ -772,7 +831,7 @@ def add_result_parts(result_parts): # type: (List[Tuple[str, str, ExecutionResultObject]]) -> MultiPartFieldsType for res_id, out_id, result in result_parts: if isinstance(result, list): - sub_parts = [(f"{out_id}.{i}", out_id, data) for i, data in enumerate(result)] + sub_parts = [(out_id, f"{out_id}.{out_idx}", data) for out_idx, data in enumerate(result)] sub_parts = add_result_parts(sub_parts) sub_multi = MultipartEncoder(sub_parts, content_type=ContentType.MULTIPART_MIXED) sub_out_url = job.result_path(output_id=out_id) diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index fd1e09a6f..aae08425f 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -2126,7 +2126,8 @@ class TransmissionModeEnum(ExtendedSchemaNode): _schema = f"{OGC_API_PROC_PART1_SCHEMAS}/transmissionMode.yaml" schema_type = String title = "TransmissionMode" - default = ExecuteTransmissionMode.VALUE + # no default to allow auto-resolution as data/link if omitted + # default = ExecuteTransmissionMode.VALUE example = ExecuteTransmissionMode.VALUE validator = OneOf(ExecuteTransmissionMode.values()) From aadc2ed16aae35c305391a8109078307ed34a4c9 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 1 Oct 2024 22:35:36 -0400 Subject: [PATCH 28/75] job response for links-only with raw representation --- tests/functional/test_wps_package.py | 38 +++++++++++++++++++++++----- weaver/wps_restapi/jobs/utils.py | 16 +++++++----- 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 67b094b66..35b0f1438 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -3730,10 +3730,18 @@ def test_execute_single_output_prefer_header_return_minimal_complex(self): job_id = status["jobID"] out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") + results_href = f"{self.url}/processes/{p_id}/jobs/{job_id}/results" + output_json_href = f"{out_url}/{job_id}/output_json/result.json" + output_json_link = f"<{output_json_href}>; rel=\"output_json\"; type=\"{ContentType.APP_JSON}\"" assert results.status_code == 204, "No contents expected for minimal reference result." assert results.body == b"" - assert results.content_type.startswith(ContentType.APP_JSON) - assert results.headers["Content-Location"] == f"{out_url}/{job_id}/output_json/result.json" + assert results.content_type is None + assert results.headers["Content-Location"] == results_href + assert ("Link", output_json_link) in results.headerlist + assert not any( + any(out_id in link[-1] for out_id in ["output_datta", "output_text"]) + for link in results.headerlist if link[0] == "Link" + ), "Filtered outputs should not be found in results response links." outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json == { @@ -3866,10 +3874,18 @@ def test_execute_single_output_response_raw_reference_literal(self): job_id = status["jobID"] out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") - assert results.status_code == 204, "No contents expected for single reference result." + results_href = f"{self.url}/processes/{p_id}/jobs/{job_id}/results" + output_json_href = f"{out_url}/{job_id}/output_json/result.json" + output_json_link = f"<{output_json_href}>; rel=\"output_json\"; type=\"{ContentType.APP_JSON}\"" + assert results.status_code == 204, "No contents expected for minimal reference result." assert results.body == b"" - assert results.content_type.startswith(ContentType.TEXT_PLAIN) - assert results.headers["Content-Location"] == f"{out_url}/{job_id}/output_data/result.txt" + assert results.content_type is None + assert results.headers["Content-Location"] == results_href + assert ("Link", output_json_link) in results.headerlist + assert not any( + any(out_id in link[-1] for out_id in ["output_datta", "output_text"]) + for link in results.headerlist if link[0] == "Link" + ), "Filtered outputs should not be found in results response links." outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { @@ -3911,10 +3927,18 @@ def test_execute_single_output_response_raw_reference_complex(self): job_id = status["jobID"] out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") + results_href = f"{self.url}/processes/{p_id}/jobs/{job_id}/results" + output_json_href = f"{out_url}/{job_id}/output_json/result.json" + output_json_link = f"<{output_json_href}>; rel=\"output_json\"; type=\"{ContentType.APP_JSON}\"" assert results.status_code == 204, "No contents expected for single reference result." assert results.body == b"" - assert results.content_type.startswith(ContentType.APP_JSON) - assert results.headers["Content-Location"] == f"{out_url}/{job_id}/output_json/result.json" + assert results.content_type is None + assert results.headers["Content-Location"] == results_href + assert ("Link", output_json_link) in results.headerlist + assert not any( + any(out_id in link[-1] for out_id in ["output_datta", "output_text"]) + for link in results.headerlist if link[0] == "Link" + ), "Filtered outputs should not be found in results response links." outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index c4759ffda..6ab912e02 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -368,7 +368,6 @@ def get_results( # pylint: disable=R1260 ogc_api = schema == JobInputsOutputsSchema.OGC outputs = {} if ogc_api else [] fmt_key = "mediaType" if ogc_api else "mimeType" - out_ref = convert_output_params_schema(job.outputs, JobInputsOutputsSchema.OGC) if link_references else {} references = {} for result in job.results: # Filter outputs not requested, unless 'all' requested by omitting @@ -403,7 +402,7 @@ def get_results( # pylint: disable=R1260 rtype = "href" if get_any_value(val_item, key=True, file=True, data=False) else "data" val_data = get_any_value(val_item, file=True, data=False) out_key = rtype - out_mode = out_ref.get(out_id, {}).get("transmissionMode") + out_mode = get_job_output_transmission(job, out_id, is_reference=(out_key == "href")) as_ref = link_references and out_mode == ExecuteTransmissionMode.REFERENCE if rtype == "href" and isinstance(val_data, str): # fix paths relative to instance endpoint, @@ -499,6 +498,7 @@ def get_job_output_transmission(job, output_id, is_reference): Obtain the requested :term:`Job` output ``transmissionMode``. """ outputs = job.outputs or {} + outputs = convert_output_params_schema(outputs, JobInputsOutputsSchema.OGC) out = outputs.get(output_id) or {} mode = out.get("transmissionMode") # because mode can be omitted, resolve their default explicitly @@ -562,9 +562,15 @@ def get_job_results_response( # - https://docs.ogc.org/is/18-062r2/18-062r2.html#_response_7 (/req/core/job-results-async-document) # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-document is_raw = get_job_return(job, results_contents, results_headers) == ExecuteResponse.RAW + # when multipart is needed (either requested explicitly or inferred), do not use references at this point + # this is to make multipart content generation simply by grouping everything under a single 'results' container + is_accept_multipart = ( + isinstance(job.accept_type, str) and + any(ctype in job.accept_type for ctype in ContentType.ANY_MULTIPART) + ) results, refs = get_results(job, container, value_key="value", schema=JobInputsOutputsSchema.OGC, # not strict to provide more format details - link_references=is_raw) + link_references=is_raw and not is_accept_multipart) headers = ResponseHeaders(headers or {}) headers.pop("Location", None) @@ -573,10 +579,6 @@ def get_job_results_response( link_header = make_link_header(link) headers.add("Link", link_header) - is_accept_multipart = ( - isinstance(job.accept_type, str) and - any(ctype in job.accept_type for ctype in ContentType.ANY_MULTIPART) - ) if not is_raw and not is_accept_multipart: try: results_schema = sd.ResultsDocument() From bf2b5398ef91b6b153005a86cc6232a2b38a3f18 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 1 Oct 2024 23:48:04 -0400 Subject: [PATCH 29/75] [wip] update tests and docs for expected handling of multi-output all by-ref --- docs/source/processes.rst | 30 +++++--- tests/functional/test_wps_package.py | 102 ++++++++++++++++++++++++--- 2 files changed, 115 insertions(+), 17 deletions(-) diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 9602330c8..444227a23 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -874,8 +874,10 @@ Following is a detailed listing of the expected response structure according to | | | | | - using embedded content parts with data | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | |na| | ``raw`` | ``reference`` | >1 | - :ref:`Multipart ` | - | [#resPreferReturn]_ | | (for *all*) | | content [#resCTypeMulti]_ | - | | | | | - using embedded content parts with data | + | [#resPreferReturn]_ | | (for *all*) | | content with embedded part links if requested | + | | | | | by ``Accept`` header [#resCTypeMulti]_ | + | | | | | - otherwise, similar to |res-ref|, but with | + | | | | | a ``Link`` header for each requested output | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | |none| | ``document`` | |none| | |any| | - :ref:`Results ` | | | | | | content | @@ -987,10 +989,14 @@ Following is a detailed listing of the expected response structure according to representation using other encoding (e.g.: :term:`XML` or :term:`YAML`) could be returned if requested by the ``Accept`` header. - For every other case where a return ``representation`` or ``raw`` results are explicitly requested, + For cases where a return ``representation`` or ``raw`` response results are explicitly requested, + and that no ``Accept`` header explicitly requests an alternative representation, the :ref:`Multipart Results ` structure - using ``multipart`` contents (:rfc:`2046#section-5.1`) is employed by default. - The representation of each part (as literal data or link reference [#resValRef]_) + using ``multipart`` contents (:rfc:`2046#section-5.1`) is employed by default, unless *all* requested + outputs resolve to a :ref:`File Reference `. In such case, the references will be contained + in ``Link`` headers, similar to the |res-ref|_ response, but with multiple links for all requested outputs. + + When resolved as ``multipart``, the representation of each part (as literal data or link reference [#resValRef]_) is established by the ``transmissionMode`` parameter combinations, or as applicable according to the ``Accept`` and the ``Prefer: return`` headers. Alternatively to requesting ``representation`` or ``raw`` results, the :ref:`Multipart Results ` structure *could* also be requested explicitly @@ -2154,10 +2160,16 @@ a combination of ``Content-ID``, ``Content-Type`` and ``Content-Location`` will To respect :rfc:`2392` definitions, ``Content-ID`` will use pattern ``<{outputID}@{jobID}>`` as unique identifier, and ``<{outputID}.{index}@{jobID}>`` in the case of an array of :ref:`File References `. -When the number of *requested* ``outputs`` [#outN]_ is more than one, the response will -either be ``multipart`` contents (:rfc:`2046#section-5.1`) or similar to -the :ref:`Document Result ` contents, -accordingly to the negotiated ``Accept`` content header. An example of a ``multipart`` representation is shown below. +When the number of *requested* ``outputs`` [#outN]_ is more than one, the obtained response will depend +on the negotiated ``Accept`` content header and the data/link resolution of each output. + +1. If all outputs are :ref:`File References ` and no ``Accept`` header was specified, a no-content + response with a ``Link`` for each output similarly to the above :ref:`job-results-raw-single-ref` is returned. +2. If a ``response=document`` or ``Prefer: return=minimal`` resolution is requested, outputs are + embedded in the :ref:`Document Result ` contents. +3. If either ``multipart`` contents (:rfc:`2046#section-5.1`) are explicitly requested by ``Accept`` header, or that + the above cases were not encountered, a multipart content response as shown below is returned [#resCTypeMulti]_. + The resolution of the nested outputs within each boundary, either by value or reference, will resolve for each respective output according to the same rule conditions specified above for single output. diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 35b0f1438..76ed0e53e 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -3739,7 +3739,7 @@ def test_execute_single_output_prefer_header_return_minimal_complex(self): assert results.headers["Content-Location"] == results_href assert ("Link", output_json_link) in results.headerlist assert not any( - any(out_id in link[-1] for out_id in ["output_datta", "output_text"]) + any(out_id in link[-1] for out_id in ["output_data", "output_text"]) for link in results.headerlist if link[0] == "Link" ), "Filtered outputs should not be found in results response links." outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) @@ -3875,15 +3875,15 @@ def test_execute_single_output_response_raw_reference_literal(self): out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") results_href = f"{self.url}/processes/{p_id}/jobs/{job_id}/results" - output_json_href = f"{out_url}/{job_id}/output_json/result.json" - output_json_link = f"<{output_json_href}>; rel=\"output_json\"; type=\"{ContentType.APP_JSON}\"" + output_data_href = f"{out_url}/{job_id}/output_data/output_data.txt" + output_data_link = f"<{output_data_href}>; rel=\"output_data\"; type=\"{ContentType.TEXT_PLAIN}\"" assert results.status_code == 204, "No contents expected for minimal reference result." assert results.body == b"" assert results.content_type is None assert results.headers["Content-Location"] == results_href - assert ("Link", output_json_link) in results.headerlist + assert ("Link", output_data_link) in results.headerlist assert not any( - any(out_id in link[-1] for out_id in ["output_datta", "output_text"]) + any(out_id in link[-1] for out_id in ["output_json", "output_text"]) for link in results.headerlist if link[0] == "Link" ), "Filtered outputs should not be found in results response links." outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) @@ -3936,7 +3936,7 @@ def test_execute_single_output_response_raw_reference_complex(self): assert results.headers["Content-Location"] == results_href assert ("Link", output_json_link) in results.headerlist assert not any( - any(out_id in link[-1] for out_id in ["output_datta", "output_text"]) + any(out_id in link[-1] for out_id in ["output_data", "output_text"]) for link in results.headerlist if link[0] == "Link" ), "Filtered outputs should not be found in results response links." outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) @@ -4490,7 +4490,15 @@ def test_execute_multi_output_response_raw_value(self): }, } - def test_execute_multi_output_response_raw_reference(self): + def test_execute_multi_output_response_raw_reference_default_links(self): + """ + All outputs resolved as reference (explicitly or inferred) with raw representation should be all Link headers. + + The multipart representation of the corresponding request must ask for it explicitly. + + .. seealso:: + - :func:`test_execute_multi_output_response_raw_reference_accept_multipart` + """ proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) @@ -4525,11 +4533,89 @@ def test_execute_multi_output_response_raw_reference(self): job_id = status["jobID"] out_url = get_wps_output_url(self.settings) + results = self.app.get(f"/jobs/{job_id}/results") + results_href = f"{self.url}/processes/{p_id}/jobs/{job_id}/results" + output_data_href = f"{out_url}/{job_id}/output_data/output_data.txt" + output_data_link = f"<{output_data_href}>; rel=\"output_data\"; type=\"{ContentType.TEXT_PLAIN}\"" + output_json_href = f"{out_url}/{job_id}/output_json/result.json" + output_json_link = f"<{output_json_href}>; rel=\"output_json\"; type=\"{ContentType.APP_JSON}\"" + assert results.status_code == 204, "No contents expected for minimal reference result." + assert results.body == b"" + assert results.content_type is None + assert results.headers["Content-Location"] == results_href + assert ("Link", output_data_link) in results.headerlist + assert ("Link", output_json_link) in results.headerlist + assert not any( + any(out_id in link[-1] for out_id in ["output_text"]) + for link in results.headerlist if link[0] == "Link" + ), "Filtered outputs should not be found in results response links." + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_data": { + "value": "test" + }, + "output_json": { + "href": f"{out_url}/{job_id}/output_json/result.json", + "type": ContentType.APP_JSON, + }, + } + + def test_execute_multi_output_response_raw_reference_accept_multipart(self): + """ + Requesting ``multipart`` explicitly should return it instead of default ``Link`` headers response. + + .. seealso:: + - :func:`test_execute_multi_output_response_raw_reference_default_links` + - :func:`test_execute_multi_output_multipart_accept_async_alt_acceptable` + - :func:`test_execute_multi_output_multipart_accept_async_not_acceptable` + """ + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + # NOTE: + # No 'response' nor 'Prefer: return' to ensure resolution is done by 'Accept' header + # without 'Accept' using multipart, it is expected that JSON document is used + # Also, use 'Prefer: wait' to avoid 'respond-async', since async always respond with the Job status. + exec_headers = { + "Accept": ContentType.MULTIPART_MIXED, + "Content-Type": ContentType.APP_JSON, + "Prefer": "wait=5", + } + exec_content = { + "inputs": { + "message": "test" + }, + "outputs": { + "output_json": {}, # should use 'reference' by default + "output_data": {"transmissionMode": ExecuteTransmissionMode.REFERENCE}, + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # rely on location that should be provided to find the job ID + results_url = get_header("Content-Location", resp.headers) + assert results_url, ( + "Content-Location should have been provided in" + "results response pointing at where they can be found." + ) + job_id = results_url.rsplit("/results")[0].rsplit("/jobs/")[-1] + assert is_uuid(job_id), f"Failed to retrieve the job ID: [{job_id}] is not a UUID" + out_url = get_wps_output_url(self.settings) + results = self.app.get(f"/jobs/{job_id}/results") boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] results_body = inspect.cleandoc(f""" --{boundary} - Content-Disposition: attachment; name="output_data" filename="output_data.txt" + Content-Disposition: attachment; name="output_data" Content-Type: {ContentType.TEXT_PLAIN} Content-Location: {out_url}/{job_id}/output_data/output_data.txt Content-ID: From 65062951e89a845e4414ac053af087cab6b68ed4 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 2 Oct 2024 00:07:05 -0400 Subject: [PATCH 30/75] update tests --- tests/functional/test_wps_package.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 76ed0e53e..e7e6f88c6 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -4079,10 +4079,12 @@ def test_execute_single_output_multipart_accept_link(self): boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] results_body = inspect.cleandoc(f""" --{boundary} + Content-Disposition: attachment; name="output_json"; filename="result.json" Content-Type: {ContentType.APP_JSON} + Content-Location: {out_url}/{job_id}/output_json/result.json Content-ID: Content-Length: 0 - Content-Location: {out_url}/{job_id}/output_json/result.json + --{boundary}-- """).replace("\n", "\r\n") results_text = self.remove_result_multipart_variable(results.text) @@ -4615,7 +4617,7 @@ def test_execute_multi_output_response_raw_reference_accept_multipart(self): boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] results_body = inspect.cleandoc(f""" --{boundary} - Content-Disposition: attachment; name="output_data" + Content-Disposition: attachment; name="output_data"; filename="output_data.txt" Content-Type: {ContentType.TEXT_PLAIN} Content-Location: {out_url}/{job_id}/output_data/output_data.txt Content-ID: From 2abe8703c6b35877624083299b482a90c15d45ed Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 2 Oct 2024 00:07:51 -0400 Subject: [PATCH 31/75] [wip] FIXME note job response for no-content multi-link handling --- weaver/wps_restapi/jobs/utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 6ab912e02..01a4be7ab 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -607,6 +607,14 @@ def get_job_results_response( headers.extend(refs) return HTTPOk(json=results_json, headers=headers) + if is_raw and not is_accept_multipart: + # FIXME: convert on-demand as per requested transmissionMode + # If "raw and not multipart" (ie: link_references=True), 'results' and 'refs' at this point would + # contain a mixture of the desired output transmissionMode and the available ones as per their + # original 'literal/complex' results, but they are not ALL converted to needed transmissionMode. + # Must convert before below empty-results check to return multi-link no-content response. + pass + if not results: # avoid schema validation error if all by reference # Status code 204 for empty body # see: From 4ea918ae22e493e248868e1fbb369afb6e3754e8 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 2 Oct 2024 15:54:00 -0400 Subject: [PATCH 32/75] [wip] job multi-results as links inline output transitionMode conversion --- weaver/wps_restapi/jobs/utils.py | 100 +++++++++++++++++++------------ 1 file changed, 63 insertions(+), 37 deletions(-) diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 01a4be7ab..ba4c349c6 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -340,6 +340,7 @@ def get_results( # pylint: disable=R1260 value_key=None, # type: Optional[str] schema=JobInputsOutputsSchema.OLD, # type: Optional[JobInputsOutputsSchemaType] link_references=False, # type: bool + convert_output_transmission=False, # type: bool ): # type: (...) -> Tuple[ExecutionResults, HeadersTupleType] """ Obtains the job results with extended full WPS output URL as applicable and according to configuration settings. @@ -353,6 +354,11 @@ def get_results( # pylint: disable=R1260 Selects which schema to employ for representing the output results (listing or mapping). :param link_references: If enabled, an output that was requested by reference instead of by value will be returned as ``Link`` header. + :param convert_output_transmission: + If disabled (default), data/link representation preserves original results as per their literal/complex type. + If enabled, an output that was requested as reference will be converted as an :term:`URL`, whereas + an output requested by value will be converted to its literal contents, both as needed according to + their original results literal/complex type. :returns: Tuple with: - List or mapping of all outputs each with minimally an ID and value under the requested key. @@ -392,19 +398,38 @@ def get_results( # pylint: disable=R1260 ) ): array = [value] # array of array such that it iterated as the array of literals directly - # Any other type of array implies complex data (bbox or file) + # Any other type of array implies complex data (bbox, collection, file, etc.) # They must be defined on their own with respective media-type/format details per item. else: array = value if isinstance(value, list) else [value] - for val_item in array: + res_multi = len(array) > 1 + for val_idx, val_item in enumerate(array): val_data = val_item if isinstance(val_item, dict) and isinstance(value, list): rtype = "href" if get_any_value(val_item, key=True, file=True, data=False) else "data" val_data = get_any_value(val_item, file=True, data=False) + if not isinstance(val_item, dict): + # use the representation that contains all metadata if possible, otherwise rely on literal data only + val_item = result if isinstance(result, dict) else {rtype: val_data} + out_key = rtype - out_mode = get_job_output_transmission(job, out_id, is_reference=(out_key == "href")) + is_ref = rtype == "href" + out_mode = get_job_output_transmission(job, out_id, is_reference=is_ref) as_ref = link_references and out_mode == ExecuteTransmissionMode.REFERENCE - if rtype == "href" and isinstance(val_data, str): + res_id = f"{out_id}{val_idx}" if res_multi else out_id + + # on-demand convertion to requested transmission mode + if convert_output_transmission: + res_hdr, val_data = generate_or_resolve_result(job, val_item, res_id, out_id, out_mode, settings) + if val_data is not None and is_ref: # data generated from reference + is_ref = as_ref = False + out_key = value_key or "data" # OGC schema overrides after as needed + elif val_data is None and not is_ref: # reference generated from data + is_ref = as_ref = True + out_key = "href" + val_data = res_hdr["Content-Location"] + + if is_ref and isinstance(val_data, str): # fix paths relative to instance endpoint, # but leave explicit links as is (eg: S3 bucket, remote HTTP, etc.) if val_data.startswith("/"): @@ -419,18 +444,17 @@ def get_results( # pylint: disable=R1260 output = {out_key: val_data} # required for the rest to be there, other fields optional - if rtype == "href": - val_fmt = val_item if isinstance(val_item, dict) else result - if "mimeType" not in val_fmt: - val_fmt["mimeType"] = get_format(val_data, default=ContentType.TEXT_PLAIN).mime_type + if is_ref: + if "mimeType" not in val_item: + val_item["mimeType"] = get_format(val_data, default=ContentType.TEXT_PLAIN).mime_type if ogc_api or not strict: - output["type"] = val_fmt["mimeType"] + output["type"] = val_item["mimeType"] if not ogc_api or not strict or as_ref: - output["format"] = {fmt_key: val_fmt["mimeType"]} + output["format"] = {fmt_key: val_item["mimeType"]} for field in ["encoding", "schema"]: if field in result: - output["format"][field] = val_fmt[field] - elif rtype != "href": + output["format"][field] = val_item[field] + elif not is_ref: dtype = result.get("dataType", any2wps_literal_datatype(val_data, is_value=True) or "string") if ogc_api: output["dataType"] = {"name": dtype} @@ -558,19 +582,23 @@ def get_job_results_response( raise_job_bad_status(job, container) # when 'response=document', ignore 'transmissionMode=value|reference', respect it when 'response=raw' + # resolution of 'transmissionMode' for document representation will be done by its own handler function # See: # - https://docs.ogc.org/is/18-062r2/18-062r2.html#_response_7 (/req/core/job-results-async-document) # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-document is_raw = get_job_return(job, results_contents, results_headers) == ExecuteResponse.RAW - # when multipart is needed (either requested explicitly or inferred), do not use references at this point - # this is to make multipart content generation simply by grouping everything under a single 'results' container + # when multipart is requested explicitly, do NOT use 'link_references' at this point + # this is to simplify multipart content generation by grouping everything under a single 'results' container is_accept_multipart = ( isinstance(job.accept_type, str) and any(ctype in job.accept_type for ctype in ContentType.ANY_MULTIPART) ) - results, refs = get_results(job, container, value_key="value", - schema=JobInputsOutputsSchema.OGC, # not strict to provide more format details - link_references=is_raw and not is_accept_multipart) + results, refs = get_results( + job, container, value_key="value", + schema=JobInputsOutputsSchema.OGC, # not strict to provide more format details + link_references=is_raw and not is_accept_multipart, + convert_output_transmission=is_raw and not is_accept_multipart, + ) headers = ResponseHeaders(headers or {}) headers.pop("Location", None) @@ -607,14 +635,6 @@ def get_job_results_response( headers.extend(refs) return HTTPOk(json=results_json, headers=headers) - if is_raw and not is_accept_multipart: - # FIXME: convert on-demand as per requested transmissionMode - # If "raw and not multipart" (ie: link_references=True), 'results' and 'refs' at this point would - # contain a mixture of the desired output transmissionMode and the available ones as per their - # original 'literal/complex' results, but they are not ALL converted to needed transmissionMode. - # Must convert before below empty-results check to return multi-link no-content response. - pass - if not results: # avoid schema validation error if all by reference # Status code 204 for empty body # see: @@ -675,7 +695,10 @@ def generate_or_resolve_result( :param output_id: Generic identifier of the output containing the result. :param output_mode: Desired output transmission mode. :param settings: Application settings to resolve locations. - :return: Resolved locations. + :return: + Resolved headers and data (as applicable) for the result. + If only returned by reference, ``None`` data is returned. An empty-data contents would be an empty string. + Therefore, the explicit check of ``None`` is important to identify a by-reference result. """ key = get_any_value(result, key=True) val = get_any_value(result) @@ -685,6 +708,8 @@ def generate_or_resolve_result( typ = None res_data = None c_length = None + is_val = key in ["value", "data"] + is_ref = key in ["href", "reference"] # NOTE: # work with local files (since we have them), to avoid unnecessary loopback request @@ -692,10 +717,11 @@ def generate_or_resolve_result( # FIXME: Handle S3 output storage. Should multipart response even be allowed in this case? - if key == "href": + if is_ref: url = val typ = result.get("type") or ContentType.APP_OCTET_STREAM loc = map_wps_output_location(val, settings, exists=True, url=False) + # FIXME: fails if output path is the "relative" results '/{jobID}/...' if not url: out_dir = get_wps_output_dir(settings) @@ -704,18 +730,18 @@ def generate_or_resolve_result( loc = os.path.join(out_dir, job_path) url = map_wps_output_location(loc, settings, exists=False, url=True) - if key == "value": + if is_val: res_data = io.StringIO() c_length = res_data.write(data2str(val)) typ = result.get("mediaType") or ContentType.TEXT_PLAIN - if key == "value" and output_mode == ExecuteTransmissionMode.REFERENCE: + if is_val and output_mode == ExecuteTransmissionMode.REFERENCE: if not os.path.isfile(loc): os.makedirs(os.path.dirname(loc), exist_ok=True) with open(loc, mode="w", encoding="utf-8") as out_file: out_file.write(val) - if key == "href" and output_mode == ExecuteTransmissionMode.VALUE: + if is_ref and output_mode == ExecuteTransmissionMode.VALUE: res_data = io.FileIO(loc, mode="rb") res_headers = get_href_headers( @@ -796,11 +822,11 @@ def make_result(result, result_id, output_id): return value out_results = {} - for res_id, res_val in results.items(): + for out_id, res_val in results.items(): if isinstance(res_val, list): res_data = [] for out_idx, item in enumerate(res_val): - out_id = f"{res_id}.{out_idx}" + res_id = f"{out_id}.{out_idx}" out_res = make_result(item, res_id, out_id) res_data.append(out_res) @@ -815,9 +841,9 @@ def make_result(result, result_id, output_id): ] else: - res_data = make_result(res_val, res_id, res_id) + res_data = make_result(res_val, out_id, out_id) - out_results[res_id] = res_data + out_results[out_id] = res_data return out_results @@ -839,7 +865,7 @@ def get_job_results_multipart(job, results, *, headers, container): def add_result_parts(result_parts): # type: (List[Tuple[str, str, ExecutionResultObject]]) -> MultiPartFieldsType - for res_id, out_id, result in result_parts: + for out_id, res_id, result in result_parts: if isinstance(result, list): sub_parts = [(out_id, f"{out_id}.{out_idx}", data) for out_idx, data in enumerate(result)] sub_parts = add_result_parts(sub_parts) @@ -851,7 +877,7 @@ def add_result_parts(result_parts): "Content-Location": sub_out_url, "Content-Disposition": f"attachment; name=\"{out_id}\"", } - yield out_id, (None, sub_multi, None, sub_headers) + yield res_id, (None, sub_multi, None, sub_headers) key = get_any_value(result, key=True) mode = get_job_output_transmission(job, out_id, is_reference=(key == "href")) @@ -859,7 +885,7 @@ def add_result_parts(result_parts): c_type = res_headers.get("Content-Type") c_loc = res_headers.get("Content-Location") c_fn = os.path.basename(c_loc) if c_loc else None - yield out_id, (c_fn, res_data, c_type, res_headers) + yield res_id, (c_fn, res_data, c_type, res_headers) results_parts = [(_res_id, _res_id, _res_val) for _res_id, _res_val in results.items()] results_parts = list(add_result_parts(results_parts)) From 2946791d6b102c78f103b5e5fd0cc03237836912 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 2 Oct 2024 19:09:56 -0400 Subject: [PATCH 33/75] job multi-results as links inline output transitionMode conversion --- weaver/wps_restapi/jobs/utils.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index ba4c349c6..6f1421aab 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -418,13 +418,14 @@ def get_results( # pylint: disable=R1260 as_ref = link_references and out_mode == ExecuteTransmissionMode.REFERENCE res_id = f"{out_id}{val_idx}" if res_multi else out_id - # on-demand convertion to requested transmission mode + # on-demand convertion to requested transmission mode, leave original data/link if not converted if convert_output_transmission: - res_hdr, val_data = generate_or_resolve_result(job, val_item, res_id, out_id, out_mode, settings) - if val_data is not None and is_ref: # data generated from reference + res_hdr, res_data = generate_or_resolve_result(job, val_item, res_id, out_id, out_mode, settings) + if res_data is not None and is_ref: # data generated from reference is_ref = as_ref = False out_key = value_key or "data" # OGC schema overrides after as needed - elif val_data is None and not is_ref: # reference generated from data + val_data = res_data + elif res_data is None and not is_ref: # reference generated from data is_ref = as_ref = True out_key = "href" val_data = res_hdr["Content-Location"] @@ -705,7 +706,6 @@ def generate_or_resolve_result( cid = f"{result_id}@{job.id}" url = None loc = None - typ = None res_data = None c_length = None is_val = key in ["value", "data"] @@ -720,8 +720,13 @@ def generate_or_resolve_result( if is_ref: url = val typ = result.get("type") or ContentType.APP_OCTET_STREAM - loc = map_wps_output_location(val, settings, exists=True, url=False) - # FIXME: fails if output path is the "relative" results '/{jobID}/...' + job_out_url = job.result_path(output_id=output_id) + if url.startswith(f"/{job_out_url}/"): # job "relative" path + out_url = get_wps_output_url(settings) + url = os.path.join(out_url, url[1:]) + loc = map_wps_output_location(url, settings, exists=True, url=False) + else: + typ = result.get("mediaType") or ContentType.TEXT_PLAIN if not url: out_dir = get_wps_output_dir(settings) @@ -730,16 +735,15 @@ def generate_or_resolve_result( loc = os.path.join(out_dir, job_path) url = map_wps_output_location(loc, settings, exists=False, url=True) - if is_val: + if is_val and output_mode == ExecuteTransmissionMode.VALUE: res_data = io.StringIO() c_length = res_data.write(data2str(val)) - typ = result.get("mediaType") or ContentType.TEXT_PLAIN if is_val and output_mode == ExecuteTransmissionMode.REFERENCE: if not os.path.isfile(loc): os.makedirs(os.path.dirname(loc), exist_ok=True) with open(loc, mode="w", encoding="utf-8") as out_file: - out_file.write(val) + out_file.write(data2str(val)) if is_ref and output_mode == ExecuteTransmissionMode.VALUE: res_data = io.FileIO(loc, mode="rb") From 8968c0b0563183c49da378e9c0dbac231d380ec1 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 2 Oct 2024 19:47:51 -0400 Subject: [PATCH 34/75] setup more tests for special case of JSON for both output tranmissionMode and response document --- tests/functional/test_wps_package.py | 223 +++++++++++++++++++++++++++ 1 file changed, 223 insertions(+) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index e7e6f88c6..4a5cc1831 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -4180,6 +4180,229 @@ def test_execute_single_output_multipart_accept_alt_format(self): assert result_json.content_type == ContentType.APP_JSON assert result_json.text == output_json + # FIXME: implement (https://github.com/crim-ca/weaver/pull/548) + @pytest.mark.xfail(reason="not implemented") + def test_execute_single_output_response_document_alt_format(self): + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Accept": ContentType.MULTIPART_MIXED, + "Content-Type": ContentType.APP_JSON, + } + exec_content = { + "mode": ExecuteMode.SYNC, # force sync to make sure JSON job status is not returned instead + "inputs": { + "message": "test" + }, + "outputs": { + "output_json": { + "transmissionMode": ExecuteTransmissionMode.VALUE, # embed in the part contents + "format": {"mediaType": ContentType.APP_YAML}, # request alternate output format + } + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # rely on location that should be provided to find the job ID + results_url = get_header("Content-Location", resp.headers) + assert results_url, ( + "Content-Location should have been provided in" + "results response pointing at where they can be found." + ) + job_id = results_url.rsplit("/results")[0].rsplit("/jobs/")[-1] + assert is_uuid(job_id), f"Failed to retrieve the job ID: [{job_id}] is not a UUID" + out_url = get_wps_output_url(self.settings) + + # validate the results based on original execution request + results = resp + assert ContentType.MULTIPART_MIXED in results.content_type + boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] + output_json_as_yaml = yaml.safe_dump({"data": "test"}) + results_body = inspect.cleandoc(f""" + --{boundary} + Content-Type: {ContentType.APP_YAML} + Content-ID: + Content-Length: 12 + + {output_json_as_yaml} + --{boundary}-- + """).replace("\n", "\r\n") + results_text = self.remove_result_multipart_variable(results.text) + assert results.content_type.startswith(ContentType.MULTIPART_MIXED) + assert results_text == results_body + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_data": "test", + "output_json": { + "href": f"{out_url}/{job_id}/output_json/output.yml", + "type": ContentType.APP_YAML, + }, + } + + # validate the results can be obtained with the "real" representation + result_json = self.app.get(f"/jobs/{job_id}/results/output_json", headers=self.json_headers) + output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) + assert result_json.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert result_json.content_type == ContentType.APP_JSON + assert result_json.text == output_json + + # FIXME: implement (https://github.com/crim-ca/weaver/pull/548) + @pytest.mark.xfail(reason="not implemented") + def test_execute_single_output_response_document_alt_format_json(self): + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Accept": ContentType.APP_JSON, # response 'document' should be enough to use JSON, but make extra sure + "Content-Type": ContentType.APP_JSON, + } + exec_content = { + "mode": ExecuteMode.SYNC, # force sync to make sure JSON job status is not returned instead + "response": ExecuteResponse.DOCUMENT, + "inputs": { + "message": "test" + }, + "outputs": { + "output_json": { + # note: + # Default output format is JSON, but request it as plain text. + # Ensure the JSON response contents does not revert it back to nested JSON. + # Expect a literal string containing the embedded JSON. + "transmissionMode": ExecuteTransmissionMode.VALUE, # force convert of the file reference + "format": {"mediaType": ContentType.TEXT_PLAIN}, # force output format explicitly + } + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # rely on location that should be provided to find the job ID + results_url = get_header("Content-Location", resp.headers) + assert results_url, ( + "Content-Location should have been provided in" + "results response pointing at where they can be found." + ) + job_id = results_url.rsplit("/results")[0].rsplit("/jobs/")[-1] + assert is_uuid(job_id), f"Failed to retrieve the job ID: [{job_id}] is not a UUID" + out_url = get_wps_output_url(self.settings) + + # validate the results based on original execution request + results = resp + output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) + assert results.content_type.startswith(ContentType.APP_JSON) + assert results.json == { + "output_json": { + "mediaType": ContentType.APP_RAW_JSON, # ensure special type used to distinguish a literal JSON + "value": output_json, + } + } + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_json": { + "href": f"{out_url}/{job_id}/output_json/result.json", + "type": ContentType.APP_JSON, + }, + } + + # validate the results can be obtained with the "real" representation + result_json = self.app.get(f"/jobs/{job_id}/results/output_json", headers=self.json_headers) + assert result_json.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert result_json.content_type == ContentType.APP_JSON + assert result_json.json == {"data": "test"} + + def test_execute_single_output_response_document_default_format_json_special(self): + """ + Validate that a :term:`JSON` output is directly embedded in a ``document`` response also using :term:`JSON`. + + For most types, the data converted from a file reference would be directly embedded as a string + nested under a ``value`` property and provide the associated ``mediaType``. However, given the + same :term:`JSON` representation is used for the entire response contents and the nested contents, + this special case typically expected that the nested :term:`JSON` is not embedded in a string to + facilitate directly parsing the entire response contents as :term:`JSON`. + + .. seealso:: + - :func:`test_execute_single_output_response_document_alt_format_json` + """ + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Accept": ContentType.APP_JSON, # response 'document' should be enough to use JSON, but make extra sure + "Content-Type": ContentType.APP_JSON, + } + exec_content = { + "mode": ExecuteMode.SYNC, # force sync to make sure JSON job status is not returned instead + "response": ExecuteResponse.DOCUMENT, + "inputs": { + "message": "test" + }, + "outputs": { + "output_json": { + # note: + # Technically, 'format' does not necessarily need to be specified for this case since + # JSON is the default output format for this result, but specify it for clarity + # (see other test cases that ensure non-JSON by default can be converted). + "transmissionMode": ExecuteTransmissionMode.VALUE, # force convert of the file reference + "format": {"mediaType": ContentType.APP_JSON}, # request output format explicitly + } + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # rely on location that should be provided to find the job ID + results_url = get_header("Content-Location", resp.headers) + assert results_url, ( + "Content-Location should have been provided in" + "results response pointing at where they can be found." + ) + job_id = results_url.rsplit("/results")[0].rsplit("/jobs/")[-1] + assert is_uuid(job_id), f"Failed to retrieve the job ID: [{job_id}] is not a UUID" + out_url = get_wps_output_url(self.settings) + + # validate the results based on original execution request + results = resp + assert results.content_type.startswith(ContentType.APP_JSON) + assert results.json == { + "output_json": { + "mediaType": ContentType.APP_JSON, + "value": {"data": "test"}, + } + } + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_json": { + "href": f"{out_url}/{job_id}/output_json/output.yml", + "type": ContentType.APP_YAML, + }, + } + @parameterized.expand([ ContentType.MULTIPART_ANY, ContentType.MULTIPART_MIXED, From 28c9d77e2739e0bd00485cd18ef61f1999b3c4cd Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 3 Oct 2024 04:44:03 -0400 Subject: [PATCH 35/75] [wip] more updates for multipart inline convert of data/link representation --- tests/functional/test_wps_package.py | 100 ++++++++++++++++++--------- weaver/cli.py | 20 +++--- weaver/typedefs.py | 5 +- weaver/utils.py | 23 +++++- weaver/wps_restapi/jobs/utils.py | 30 +++++++- 5 files changed, 127 insertions(+), 51 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 4a5cc1831..84a8826bd 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -3568,6 +3568,34 @@ def remove_result_multipart_variable(results): results = re.sub(r"Last-Modified: .*\r\n", "", results) return results.strip() + @staticmethod + def fix_result_multipart_indent(results): + # type: (str) -> str + """ + Remove indented whitespace from multipart literal contents. + + This behaves similarly to :func:`inspect.cleandoc`, but handles cases were the nested part contents could + themselves contain newlines, leading to inconsistent indents for some lines when injected by string formating, + and causing :func:`inspect.cleandoc` to fail removing any indent. + + Also, automatically applies ``\r\n`` characters correction which are critical in parsing multipart contents. + This is done to consider that literal newlines will include or not the ``\r`` depending on the OS running tests. + + .. warning:: + This should be used only for literal test string (i.e.: expected value) for comparison against the result. + Result contents obtained from the response should be compared as-is, without any fix for strict checks. + """ + if results.startswith("\n "): + results = results[1:] + res_dedent = results.lstrip() + res_indent = len(results) - len(res_dedent) + res_spaces = " " * res_indent + res_dedent = res_dedent.replace(f"\n{res_spaces}", "\r\n") # indented line + res_dedent = res_dedent.replace(f"\n\r\n", "\r\n\r\n") # empty line (header/body separator) + res_dedent = res_dedent.replace("\r\r", "\r") # in case windows + res_dedent = res_dedent.rstrip("\n ") # last line often indented less because of closing multiline string + return res_dedent + def test_execute_single_output_prefer_header_return_representation_literal(self): proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) @@ -3605,8 +3633,10 @@ def test_execute_single_output_prefer_header_return_representation_literal(self) assert results.text == "test" outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) - assert outputs.json == { - "output_data": {"value": "test"}, + assert outputs.json["outputs"] == { + "output_data": { + "value": "test" + }, } def test_execute_single_output_prefer_header_return_representation_complex(self): @@ -3648,7 +3678,7 @@ def test_execute_single_output_prefer_header_return_representation_complex(self) assert results.text == output_json outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) - assert outputs.json == { + assert outputs.json["outputs"] == { "output_json": { "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, @@ -3692,8 +3722,10 @@ def test_execute_single_output_prefer_header_return_minimal_literal(self): assert results.text == "test" outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) - assert outputs.json == { - "output_data": "test", + assert outputs.json["outputs"] == { + "output_data": { + "value": "test" + }, } def test_execute_single_output_prefer_header_return_minimal_complex(self): @@ -3744,7 +3776,7 @@ def test_execute_single_output_prefer_header_return_minimal_complex(self): ), "Filtered outputs should not be found in results response links." outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) - assert outputs.json == { + assert outputs.json["outputs"] == { "output_json": { "href": f"{out_url}/{job_id}/output_json/result.json", "type": ContentType.APP_JSON, @@ -3790,7 +3822,9 @@ def test_execute_single_output_response_raw_value_literal(self): outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { - "output_data": "test", + "output_data": { + "value": "test" + }, } def test_execute_single_output_response_raw_value_complex(self): @@ -3889,7 +3923,9 @@ def test_execute_single_output_response_raw_reference_literal(self): outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { - "output_data": "test", + "output_data": { + "value": "test" + }, } def test_execute_single_output_response_raw_reference_complex(self): @@ -4004,7 +4040,7 @@ def test_execute_single_output_multipart_accept_data(self): assert ContentType.MULTIPART_MIXED in results.content_type boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) - results_body = inspect.cleandoc(f""" + results_body = self.fix_result_multipart_indent(f""" --{boundary} Content-Type: {ContentType.APP_JSON} Content-Location: {out_url}/{job_id}/output_json/result.json @@ -4012,7 +4048,7 @@ def test_execute_single_output_multipart_accept_data(self): {output_json} --{boundary}-- - """).replace("\n", "\r\n") + """) results_text = self.remove_result_multipart_variable(results.text) assert results_text == results_body outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) @@ -4077,7 +4113,7 @@ def test_execute_single_output_multipart_accept_link(self): results = resp assert ContentType.MULTIPART_MIXED in results.content_type boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] - results_body = inspect.cleandoc(f""" + results_body = self.fix_result_multipart_indent(f""" --{boundary} Content-Disposition: attachment; name="output_json"; filename="result.json" Content-Type: {ContentType.APP_JSON} @@ -4086,7 +4122,7 @@ def test_execute_single_output_multipart_accept_link(self): Content-Length: 0 --{boundary}-- - """).replace("\n", "\r\n") + """) results_text = self.remove_result_multipart_variable(results.text) assert results_text == results_body outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) @@ -4151,7 +4187,7 @@ def test_execute_single_output_multipart_accept_alt_format(self): assert ContentType.MULTIPART_MIXED in results.content_type boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] output_json_as_yaml = yaml.safe_dump({"data": "test"}) - results_body = inspect.cleandoc(f""" + results_body = self.fix_result_multipart_indent(f""" --{boundary} Content-Type: {ContentType.APP_YAML} Content-ID: @@ -4159,7 +4195,7 @@ def test_execute_single_output_multipart_accept_alt_format(self): {output_json_as_yaml} --{boundary}-- - """).replace("\n", "\r\n") + """) results_text = self.remove_result_multipart_variable(results.text) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) assert results_text == results_body @@ -4182,7 +4218,7 @@ def test_execute_single_output_multipart_accept_alt_format(self): # FIXME: implement (https://github.com/crim-ca/weaver/pull/548) @pytest.mark.xfail(reason="not implemented") - def test_execute_single_output_response_document_alt_format(self): + def test_execute_single_output_response_document_alt_format_yaml(self): proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) @@ -4227,7 +4263,7 @@ def test_execute_single_output_response_document_alt_format(self): assert ContentType.MULTIPART_MIXED in results.content_type boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] output_json_as_yaml = yaml.safe_dump({"data": "test"}) - results_body = inspect.cleandoc(f""" + results_body = self.fix_result_multipart_indent(f""" --{boundary} Content-Type: {ContentType.APP_YAML} Content-ID: @@ -4235,7 +4271,7 @@ def test_execute_single_output_response_document_alt_format(self): {output_json_as_yaml} --{boundary}-- - """).replace("\n", "\r\n") + """) results_text = self.remove_result_multipart_variable(results.text) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) assert results_text == results_body @@ -4461,7 +4497,7 @@ def test_execute_multi_output_multipart_accept(self, multipart_header): results = self.app.get(f"/jobs/{job_id}/results") boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] - results_body = inspect.cleandoc(f""" + results_body = self.fix_result_multipart_indent(f""" --{boundary} Content-Disposition: attachment; name="output_data" Content-Type: {ContentType.TEXT_PLAIN} @@ -4477,7 +4513,7 @@ def test_execute_multi_output_multipart_accept(self, multipart_header): Content-Length: 0 --{boundary}-- - """).replace("\n", "\r\n") + """) results_text = self.remove_result_multipart_variable(results.text) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) assert results_text == results_body @@ -4611,10 +4647,10 @@ def test_execute_multi_output_prefer_header_return_representation(self): out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] - output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) - results_body = inspect.cleandoc(f""" + output_json = repr_json({"data": "test"}, indent=None, separators=(",", ":"), force_string=True) + results_body = self.fix_result_multipart_indent(f""" --{boundary} - Content-Disposition: attachment; filename="output_data.txt"; name="output_data" + Content-Disposition: attachment; name="output_data"; filename="output_data.txt" Content-Type: {ContentType.TEXT_PLAIN} Content-ID: Content-Length: 4 @@ -4629,7 +4665,7 @@ def test_execute_multi_output_prefer_header_return_representation(self): {output_json} --{boundary}-- - """).replace("\n", "\r\n") + """) results_text = self.remove_result_multipart_variable(results.text) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) assert results_text == results_body @@ -4683,7 +4719,7 @@ def test_execute_multi_output_response_raw_value(self): results = self.app.get(f"/jobs/{job_id}/results") boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) - results_body = inspect.cleandoc(f""" + results_body = self.fix_result_multipart_indent(f""" --{boundary} Content-Disposition: attachment; name="output_data" Content-Type: {ContentType.TEXT_PLAIN} @@ -4700,7 +4736,7 @@ def test_execute_multi_output_response_raw_value(self): {output_json} --{boundary}-- - """).replace("\n", "\r\n") + """) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) assert results.text == results_body outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) @@ -4838,7 +4874,7 @@ def test_execute_multi_output_response_raw_reference_accept_multipart(self): results = self.app.get(f"/jobs/{job_id}/results") boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] - results_body = inspect.cleandoc(f""" + results_body = self.fix_result_multipart_indent(f""" --{boundary} Content-Disposition: attachment; name="output_data"; filename="output_data.txt" Content-Type: {ContentType.TEXT_PLAIN} @@ -4854,7 +4890,7 @@ def test_execute_multi_output_response_raw_reference_accept_multipart(self): Content-Length: 0 --{boundary}-- - """).replace("\n", "\r\n") + """) results_text = self.remove_result_multipart_variable(results.text) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) assert results_text == results_body @@ -4908,8 +4944,8 @@ def test_execute_multi_output_response_raw_mixed(self): out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] - output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) - results_body = inspect.cleandoc(f""" + output_json = repr_json({"data": "test"}, indent=None, separators=(",", ":"), force_string=True) + results_body = self.fix_result_multipart_indent(f""" --{boundary} Content-Disposition: attachment; name="output_data" Content-Type: {ContentType.TEXT_PLAIN} @@ -4933,7 +4969,7 @@ def test_execute_multi_output_response_raw_mixed(self): {output_json} --{boundary}-- - """).replace("\n", "\r\n") + """) results_text = self.remove_result_multipart_variable(results.text) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) assert results_text == results_body @@ -5060,11 +5096,11 @@ def test_execute_multi_output_prefer_header_return_minimal_override_transmission out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") results_json = self.remove_result_format(results.json) - output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) + output_json = repr_json({"data": "test"}, indent=None, separators=(",", ":"), force_string=True) assert results.content_type.startswith(ContentType.APP_JSON) assert results_json == { "output_data": { - "href": f"{out_url}/{job_id}/output_text/result.txt", + "href": f"{out_url}/{job_id}/output_data/output_data.txt", "type": ContentType.TEXT_PLAIN, }, "output_json": { diff --git a/weaver/cli.py b/weaver/cli.py index 687449bdc..3d1b910c2 100644 --- a/weaver/cli.py +++ b/weaver/cli.py @@ -51,7 +51,7 @@ import_target, load_file, null, - parse_kvp, + parse_link_header, request_extra, setup_loggers ) @@ -1691,23 +1691,21 @@ def _download_references(self, outputs, out_links, out_dir, job_id, auth=None): # download links from headers LOGGER.debug("%s outputs in results link headers.", "Processing" if len(out_links) else "No") for _, link_header in ResponseHeaders(out_links).items(): - link, params = link_header.split(";", 1) - href = link.strip("<>") - params = parse_kvp(params, multi_value_sep=None, accumulate_keys=False) - ctype = (params.get("type") or [None])[0] # type: str - rel = params["rel"][0].split(".") + link = parse_link_header(link_header) + rel = link["rel"].rsplit(".", 1) output = rel[0] is_array = len(rel) > 1 and str.isnumeric(rel[1]) - ref_path = fetch_reference(href, out_dir, auth=auth, + ref_path = fetch_reference(link["href"], out_dir, auth=auth, out_method=OutputMethod.COPY, out_listing=False) - value = {"href": href, "type": ctype, "path": ref_path, "source": "link"} # type: ExecutionResultObjectRef + link = cast("ExecutionResultObjectRef", link) + link.update({"path": ref_path, "source": "link"}) if output in outputs: if isinstance(outputs[output], dict): # in case 'rel="."' was not employed - outputs[output] = [outputs[output], value] + outputs[output] = [outputs[output], link] else: - outputs[output].append(value) + outputs[output].append(link) else: - outputs[output] = [value] if is_array else value + outputs[output] = [link] if is_array else link return outputs def results(self, diff --git a/weaver/typedefs.py b/weaver/typedefs.py index 58dc1c66f..0a3304fa6 100644 --- a/weaver/typedefs.py +++ b/weaver/typedefs.py @@ -113,7 +113,7 @@ JSON = Union[Dict[str, Union[_JSON, _JsonItem]], List[Union[_JSON, _JsonItem]], AnyValueType] Link = TypedDict("Link", { - "title": str, + "title": NotRequired[str], "rel": Required[str], "href": Required[str], "hreflang": NotRequired[str], @@ -349,7 +349,8 @@ class CWL_SchemaName(Protocol): WPS_OutputAsRefMediaType = Tuple[str, Optional[bool], Optional[str]] # (output_id, as_ref, mime_type) WPS_OutputRequested = Union[WPS_OutputAsRef, WPS_OutputAsRefMediaType] - KVP_Item = Union[ValueType, Sequence[ValueType]] + KVP_Value = Optional[str] + KVP_Item = Union[KVP_Value, Sequence[KVP_Value]] KVP_Container = Union[Sequence[Tuple[str, KVP_Item]], Dict[str, KVP_Item]] KVP = Dict[str, List[KVP_Item]] diff --git a/weaver/utils.py b/weaver/utils.py index 2c8b0067c..7fbc8794f 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -1321,9 +1321,9 @@ def get_href_headers( if os.path.splitext(path)[-1] in ["", "."]: f_ext = get_extension(f_type, dot=True) path = f"{path}{f_ext}" - content_disposition_params = f"filename=\"{os.path.basename(path)}\"" - if content_name: - content_disposition_params += f"; name=\"{content_name}\"" + # set name, then filename, to align with order employed by requests-toolbelt multipart class + content_disposition_params = f"name=\"{content_name}\"; "if content_name else "" + content_disposition_params += f"filename=\"{os.path.basename(path)}\"" headers["Content-Disposition"] = f"{content_disposition_type}; {content_disposition_params}" f_current = get_file_header_datetime(now()) headers["Date"] = f_current @@ -1371,6 +1371,23 @@ def make_link_header( return link +def parse_link_header(link_header): + # type: (str) -> Link + """ + Parses the parameters of the ``Link`` header. + """ + url, params = link_header.split(";", 1) + href = url.strip("<>") + params = parse_kvp(params, multi_value_sep=None, accumulate_keys=False) + ctype = (params.pop("type", None) or [None])[0] + rel = str(params.pop("rel")[0]) + link = {"href": href, "rel": rel} # type: Link + if ctype and isinstance(ctype, str): + link["type"] = ctype + link.update({param: value[0] for param, value in params.items() if value}) + return link + + def get_base_url(url): # type: (str) -> str """ diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 6f1421aab..cbaa4f8ba 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -54,6 +54,7 @@ get_weaver_url, is_uuid, make_link_header, + parse_link_header ) from weaver.visibility import Visibility from weaver.wps.utils import get_wps_output_dir, get_wps_output_url, map_wps_output_location @@ -582,6 +583,17 @@ def get_job_results_response( raise_job_dismissed(job, container) raise_job_bad_status(job, container) + # FIXME: if 'return=representation' (any type) without 'transmissionMode' override -> force 'transmissionMode=value' + # (see 'test_execute_multi_output_prefer_header_return_representation') + + # FIXME: if value is JSON with 'response=document' also JSON, auto-load value from ref to embed in body + # - test_execute_single_output_response_document_default_format_json_special + + # FIXME: apply converters (https://github.com/crim-ca/weaver/pull/548) + # - test_execute_single_output_response_document_alt_format_json + # - test_execute_single_output_response_document_alt_format_yaml + # - test_execute_single_output_multipart_accept_alt_format + # when 'response=document', ignore 'transmissionMode=value|reference', respect it when 'response=raw' # resolution of 'transmissionMode' for document representation will be done by its own handler function # See: @@ -653,11 +665,23 @@ def get_job_results_response( # multipart response if ( - len(results) > 1 or + (len(results) + len(refs)) > 1 or (isinstance(out_data, list) and len(out_data) > 1) or is_accept_multipart ): - return get_job_results_multipart(job, results, headers=headers, container=container) + # backtrack link references that were generated if 'Accept: multipart/*' was omitted + # while using 'response=raw' leading to at least 1 by-value output + # (must force multipart with empty-part for links to respect OGC API - Processes v1.0) + # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-mixed-multi + for ref in refs: + ref_link = parse_link_header(ref[-1]) + results[ref_link["rel"]] = ref_link + # attempt sort by original results ordering to generate multipart contents consistently + out_order = list(convert_output_params_schema(job.results, JobInputsOutputsSchema.OGC)) + res_order = {out_id: results[out_id] for out_id in out_order if out_id in results} + res_array = sorted(set(results) - set(res_order)) # in case of 'out.idx' employed for arrays + res_order.update({out_id: results[out_id] for out_id in res_array}) # if missing link arrays + return get_job_results_multipart(job, res_order, headers=headers, container=container) # single value only out_data = out_data[0] if isinstance(out_data, list) else out_data @@ -726,7 +750,7 @@ def generate_or_resolve_result( url = os.path.join(out_url, url[1:]) loc = map_wps_output_location(url, settings, exists=True, url=False) else: - typ = result.get("mediaType") or ContentType.TEXT_PLAIN + typ = get_field(result, "mime_type", search_variations=True, default=ContentType.TEXT_PLAIN) if not url: out_dir = get_wps_output_dir(settings) From 521d6346ab78f3369a35a0b304647acf91a78bfa Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 3 Oct 2024 14:55:37 -0400 Subject: [PATCH 36/75] fix lookup strategy of value/ref without using hardcoded key names --- weaver/wps_restapi/jobs/utils.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index cbaa4f8ba..af1129753 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -725,15 +725,17 @@ def generate_or_resolve_result( If only returned by reference, ``None`` data is returned. An empty-data contents would be an empty string. Therefore, the explicit check of ``None`` is important to identify a by-reference result. """ - key = get_any_value(result, key=True) + key_val = get_any_value(result, key=True, file=False, data=True) + key_ref = get_any_value(result, key=True, file=True, data=False) + key = key_val or key_ref + is_val = bool(key_val) + is_ref = bool(key_ref) val = get_any_value(result) cid = f"{result_id}@{job.id}" url = None loc = None res_data = None c_length = None - is_val = key in ["value", "data"] - is_ref = key in ["href", "reference"] # NOTE: # work with local files (since we have them), to avoid unnecessary loopback request From eabca51ab5fd01572b39bf75f88b267cc08c16d5 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 3 Oct 2024 15:56:23 -0400 Subject: [PATCH 37/75] remove unused var --- weaver/wps_restapi/jobs/utils.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index af1129753..6a7d09dde 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -725,11 +725,8 @@ def generate_or_resolve_result( If only returned by reference, ``None`` data is returned. An empty-data contents would be an empty string. Therefore, the explicit check of ``None`` is important to identify a by-reference result. """ - key_val = get_any_value(result, key=True, file=False, data=True) - key_ref = get_any_value(result, key=True, file=True, data=False) - key = key_val or key_ref - is_val = bool(key_val) - is_ref = bool(key_ref) + is_val = bool(get_any_value(result, key=True, file=False, data=True)) + is_ref = bool(get_any_value(result, key=True, file=True, data=False)) val = get_any_value(result) cid = f"{result_id}@{job.id}" url = None From dc749ded2bf37ec0eb2d71296fb45a67cf410aec Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 3 Oct 2024 19:24:47 -0400 Subject: [PATCH 38/75] [wip] more test cases to handle for job results --- tests/functional/test_wps_package.py | 203 ++++++++++++++++++++++----- weaver/processes/constants.py | 37 ++--- weaver/processes/convert.py | 6 +- weaver/typedefs.py | 3 +- weaver/utils.py | 2 +- weaver/wps_restapi/jobs/utils.py | 76 ++++++++-- 6 files changed, 258 insertions(+), 69 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 84a8826bd..7827db8b3 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -3673,9 +3673,8 @@ def test_execute_single_output_prefer_header_return_representation_complex(self) job_id = status["jobID"] out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") - output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) assert results.content_type.startswith(ContentType.APP_JSON) - assert results.text == output_json + assert results.text == "{\"data\":\"test\"}" outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { @@ -3685,16 +3684,20 @@ def test_execute_single_output_prefer_header_return_representation_complex(self) }, } - def test_execute_single_output_prefer_header_return_minimal_literal(self): + def test_execute_single_output_prefer_header_return_minimal_literal_accept_default(self): + """ + For single requested output, without ``Accept`` content negotiation, its default format is returned directly. + """ proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) exec_headers = { - "Prefer": f"return={ExecuteReturnPreference.MINIMAL}, respond-async" + "Prefer": f"return={ExecuteReturnPreference.MINIMAL}, wait=5", # sync to allow direct content response + "Accept": ContentType.ANY, + "Content-Type": ContentType.APP_JSON, } - exec_headers.update(self.json_headers) exec_content = { "inputs": { "message": "test" @@ -3709,14 +3712,17 @@ def test_execute_single_output_prefer_header_return_minimal_literal(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" - # request status instead of results since not expecting 'document' JSON in this case - status_url = resp.json["location"] - status = self.monitor_job(status_url, return_status=True) - assert status["status"] == Status.SUCCEEDED + # rely on location that should be provided to find the job ID + results_url = get_header("Content-Location", resp.headers) + assert results_url, ( + "Content-Location should have been provided in" + "results response pointing at where they can be found." + ) + job_id = results_url.rsplit("/results")[0].rsplit("/jobs/")[-1] + assert is_uuid(job_id), f"Failed to retrieve the job ID: [{job_id}] is not a UUID" - job_id = status["jobID"] results = self.app.get(f"/jobs/{job_id}/results") assert results.content_type.startswith(ContentType.TEXT_PLAIN) assert results.text == "test" @@ -3728,16 +3734,72 @@ def test_execute_single_output_prefer_header_return_minimal_literal(self): }, } - def test_execute_single_output_prefer_header_return_minimal_complex(self): + def test_execute_single_output_prefer_header_return_minimal_literal_accept_json(self): + """ + For single requested output, with ``Accept`` :term:`JSON` content negotiation, document response is returned. + """ proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) exec_headers = { - "Prefer": f"return={ExecuteReturnPreference.MINIMAL}, respond-async" + "Prefer": f"return={ExecuteReturnPreference.MINIMAL}, wait=5", # sync to allow direct content response + "Accept": ContentType.APP_JSON, + "Content-Type": ContentType.APP_JSON, + } + exec_content = { + "inputs": { + "message": "test" + }, + "outputs": { + "output_data": {} # no 'transmissionMode' to auto-resolve 'value' from 'return=minimal' + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # rely on location that should be provided to find the job ID + results_url = get_header("Content-Location", resp.headers) + assert results_url, ( + "Content-Location should have been provided in" + "results response pointing at where they can be found." + ) + job_id = results_url.rsplit("/results")[0].rsplit("/jobs/")[-1] + assert is_uuid(job_id), f"Failed to retrieve the job ID: [{job_id}] is not a UUID" + + results = self.app.get(f"/jobs/{job_id}/results") + assert results.content_type.startswith(ContentType.APP_JSON) + assert results.json == { + "output_data": "test" + } + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_data": { + "value": "test" + }, + } + + def test_execute_single_output_prefer_header_return_minimal_complex_accept_default(self): + """ + For single requested output, without ``Accept`` content negotiation, its default format is returned by link. + """ + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Prefer": f"return={ExecuteReturnPreference.MINIMAL}, wait=5", # sync to allow direct content response + "Accept": ContentType.ANY, + "Content-Type": ContentType.APP_JSON, } - exec_headers.update(self.json_headers) exec_content = { "inputs": { "message": "test" @@ -3752,14 +3814,17 @@ def test_execute_single_output_prefer_header_return_minimal_complex(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" - # request status instead of results since not expecting 'document' JSON in this case - status_url = resp.json["location"] - status = self.monitor_job(status_url, return_status=True) - assert status["status"] == Status.SUCCEEDED + # rely on location that should be provided to find the job ID + results_url = get_header("Content-Location", resp.headers) + assert results_url, ( + "Content-Location should have been provided in" + "results response pointing at where they can be found." + ) + job_id = results_url.rsplit("/results")[0].rsplit("/jobs/")[-1] + assert is_uuid(job_id), f"Failed to retrieve the job ID: [{job_id}] is not a UUID" - job_id = status["jobID"] out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") results_href = f"{self.url}/processes/{p_id}/jobs/{job_id}/results" @@ -3783,6 +3848,73 @@ def test_execute_single_output_prefer_header_return_minimal_complex(self): }, } + def test_execute_single_output_prefer_header_return_minimal_complex_accept_json(self): + """ + For single requested output, with ``Accept`` :term:`JSON` content negotiation, document response is returned. + + .. note:: + In this test, the selected output just so happens to be :term:`JSON` as well. + Since it is the ``Accept`` header that is requesting :term:`JSON`, and not a + combination of ``transmissionMode: value`` with :term:`JSON` ``format``, the + contents of ``output_json`` file are **NOT** directly returned in the response. + + .. seealso:: + - :func:`test_execute_single_output_response_raw_value_complex` + for case of embedded ``output_json`` file contents in the response. + """ + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Prefer": f"return={ExecuteReturnPreference.MINIMAL}, wait=5", # sync to allow direct content response + "Accept": ContentType.APP_JSON, + "Content-Type": ContentType.APP_JSON, + } + exec_content = { + "inputs": { + "message": "test" + }, + "outputs": { + "output_json": {} # no 'transmissionMode' to auto-resolve 'reference' from 'return=minimal' + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + + # rely on location that should be provided to find the job ID + results_url = get_header("Content-Location", resp.headers) + assert results_url, ( + "Content-Location should have been provided in" + "results response pointing at where they can be found." + ) + job_id = results_url.rsplit("/results")[0].rsplit("/jobs/")[-1] + assert is_uuid(job_id), f"Failed to retrieve the job ID: [{job_id}] is not a UUID" + out_url = get_wps_output_url(self.settings) + + results = self.app.get(f"/jobs/{job_id}/results") + assert results.content_type.startswith(ContentType.APP_JSON) + assert results.json == { + "output_json": { + "href": f"{out_url}/{job_id}/output_json/result.json", + "type": ContentType.APP_JSON, + } + } + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_json": { + "href": f"{out_url}/{job_id}/output_json/result.json", + "type": ContentType.APP_JSON, + }, + } + def test_execute_single_output_response_raw_value_literal(self): proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) @@ -3828,6 +3960,12 @@ def test_execute_single_output_response_raw_value_literal(self): } def test_execute_single_output_response_raw_value_complex(self): + """ + Since value transmission is requested for a single output, its :term:`JSON` contents are returned directly. + + .. seealso:: + - :func:`test_execute_single_output_prefer_header_return_minimal_complex_accept_json` + """ proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) @@ -4285,6 +4423,7 @@ def test_execute_single_output_response_document_alt_format_yaml(self): }, } + # FIXME: implement (https://github.com/crim-ca/weaver/pull/548) # validate the results can be obtained with the "real" representation result_json = self.app.get(f"/jobs/{job_id}/results/output_json", headers=self.json_headers) output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) @@ -4292,16 +4431,14 @@ def test_execute_single_output_response_document_alt_format_yaml(self): assert result_json.content_type == ContentType.APP_JSON assert result_json.text == output_json - # FIXME: implement (https://github.com/crim-ca/weaver/pull/548) - @pytest.mark.xfail(reason="not implemented") - def test_execute_single_output_response_document_alt_format_json(self): + def test_execute_single_output_response_document_alt_format_json_raw_literal(self): proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) exec_headers = { - "Accept": ContentType.APP_JSON, # response 'document' should be enough to use JSON, but make extra sure + "Accept": ContentType.APP_JSON, # response 'document' should be enough to use JSON, but make extra sure "Content-Type": ContentType.APP_JSON, } exec_content = { @@ -4341,12 +4478,11 @@ def test_execute_single_output_response_document_alt_format_json(self): # validate the results based on original execution request results = resp - output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) assert results.content_type.startswith(ContentType.APP_JSON) assert results.json == { "output_json": { "mediaType": ContentType.APP_RAW_JSON, # ensure special type used to distinguish a literal JSON - "value": output_json, + "value": "{\"data\":\"test\"}", } } outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) @@ -4383,7 +4519,7 @@ def test_execute_single_output_response_document_default_format_json_special(sel self.deploy_process(body, process_id=p_id) exec_headers = { - "Accept": ContentType.APP_JSON, # response 'document' should be enough to use JSON, but make extra sure + "Accept": ContentType.APP_JSON, # response 'document' should be enough to use JSON, but make extra sure "Content-Type": ContentType.APP_JSON, } exec_content = { @@ -4434,8 +4570,8 @@ def test_execute_single_output_response_document_default_format_json_special(sel assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { "output_json": { - "href": f"{out_url}/{job_id}/output_json/output.yml", - "type": ContentType.APP_YAML, + "href": f"{out_url}/{job_id}/output_json/result.json", + "type": ContentType.APP_JSON, }, } @@ -4944,7 +5080,6 @@ def test_execute_multi_output_response_raw_mixed(self): out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] - output_json = repr_json({"data": "test"}, indent=None, separators=(",", ":"), force_string=True) results_body = self.fix_result_multipart_indent(f""" --{boundary} Content-Disposition: attachment; name="output_data" @@ -4967,7 +5102,7 @@ def test_execute_multi_output_response_raw_mixed(self): Content-ID: Content-Length: 16 - {output_json} + {{"data":"test"}} --{boundary}-- """) results_text = self.remove_result_multipart_variable(results.text) @@ -5096,7 +5231,6 @@ def test_execute_multi_output_prefer_header_return_minimal_override_transmission out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") results_json = self.remove_result_format(results.json) - output_json = repr_json({"data": "test"}, indent=None, separators=(",", ":"), force_string=True) assert results.content_type.startswith(ContentType.APP_JSON) assert results_json == { "output_data": { @@ -5104,7 +5238,7 @@ def test_execute_multi_output_prefer_header_return_minimal_override_transmission "type": ContentType.TEXT_PLAIN, }, "output_json": { - "value": output_json, + "value": {"data": "test"}, "mediaType": ContentType.APP_JSON, }, "output_text": { @@ -5232,7 +5366,6 @@ def test_execute_multi_output_response_document_mixed(self): out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") results_json = self.remove_result_format(results.json) - output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) assert results.content_type.startswith(ContentType.APP_JSON) assert results_json == { "output_data": { @@ -5240,7 +5373,7 @@ def test_execute_multi_output_response_document_mixed(self): "type": ContentType.TEXT_PLAIN, }, "output_json": { - "value": output_json, + "value": {"data": "test"}, "mediaType": ContentType.APP_JSON, }, "output_text": { diff --git a/weaver/processes/constants.py b/weaver/processes/constants.py index 71ab61188..faf892e46 100644 --- a/weaver/processes/constants.py +++ b/weaver/processes/constants.py @@ -353,24 +353,36 @@ class OpenSearchField(Constants): OAS_LITERAL_TYPES ) +ProcessSchemaOGCType = Literal["OGC", "ogc"] +ProcessSchemaOLDType = Literal["OLD", "old"] +ProcessSchemaWPSType = Literal["WPS", "wps"] +ProcessSchemaType = Union[ProcessSchemaOGCType, ProcessSchemaOLDType, ProcessSchemaWPSType] +JobInputsOutputsSchemaType_OGC = Literal["OGC", "ogc"] +JobInputsOutputsSchemaType_OLD = Literal["OLD", "old"] +JobInputsOutputsSchemaType_OGC_STRICT = Literal["OGC+STRICT", "ogc+strict"] +JobInputsOutputsSchemaType_OLD_STRICT = Literal["OLD+STRICT", "old+strict"] +JobInputsOutputsSchemaAnyOGCType = Union[JobInputsOutputsSchemaType_OGC, JobInputsOutputsSchemaType_OGC_STRICT] +JobInputsOutputsSchemaAnyOLDType = Union[JobInputsOutputsSchemaType_OLD, JobInputsOutputsSchemaType_OLD_STRICT] +JobInputsOutputsSchemaType = Union[JobInputsOutputsSchemaAnyOGCType, JobInputsOutputsSchemaAnyOLDType] + class ProcessSchema(Constants): """ Schema selector to represent a :term:`Process` description. """ - OGC = "OGC" - OLD = "OLD" - WPS = "WPS" + OGC = "OGC" # type: ProcessSchemaOGCType + OLD = "OLD" # type: ProcessSchemaOLDType + WPS = "WPS" # type: ProcessSchemaWPSType class JobInputsOutputsSchema(Constants): """ Schema selector to represent a :term:`Job` output results. """ - OGC_STRICT = "ogc+strict" - OLD_STRICT = "old+strict" - OGC = "ogc" - OLD = "old" + OGC_STRICT = "ogc+strict" # type: JobInputsOutputsSchemaType_OGC_STRICT + OLD_STRICT = "old+strict" # type: JobInputsOutputsSchemaType_OLD_STRICT + OGC = "ogc" # type: JobInputsOutputsSchemaType_OGC + OLD = "old" # type: JobInputsOutputsSchemaType_OLD if TYPE_CHECKING: @@ -403,14 +415,3 @@ class JobInputsOutputsSchema(Constants): CWL_RequirementToolTimeLimitType, CWL_RequirementWorkReuseType, ] - ProcessSchemaType = Literal["OGC", "ogc", "OLD", "old", "WPS", "wps"] - JobInputsOutputsSchemaType = Literal[ - "ogc+strict", - "OGC+STRICT", - "old+strict", - "OLD+STRICT", - "ogc", - "OGC", - "old", - "OLD", - ] diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index 00abd9ea5..e5ad521d6 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -127,6 +127,8 @@ from weaver.processes.constants import ( JobInputsOutputsSchemaType, + JobInputsOutputsSchemaAnyOGCType, + JobInputsOutputsSchemaAnyOLDType, ProcessSchemaType, WPS_DataType, WPS_LiteralData_Type @@ -1910,13 +1912,13 @@ def convert_input_values_schema(inputs, schema): @overload def convert_output_params_schema(inputs, schema): - # type: (Optional[ExecutionOutputs], JobInputsOutputsSchema.OGC) -> Optional[ExecutionOutputsMap] + # type: (Optional[ExecutionOutputs], JobInputsOutputsSchemaAnyOGCType) -> Optional[ExecutionOutputsMap] ... @overload def convert_output_params_schema(inputs, schema): - # type: (Optional[ExecutionOutputs], JobInputsOutputsSchema.OLD) -> Optional[ExecutionOutputsList] + # type: (Optional[ExecutionOutputs], JobInputsOutputsSchemaAnyOLDType) -> Optional[ExecutionOutputsList] ... diff --git a/weaver/typedefs.py b/weaver/typedefs.py index 0a3304fa6..8471456f5 100644 --- a/weaver/typedefs.py +++ b/weaver/typedefs.py @@ -546,6 +546,7 @@ class CWL_SchemaName(Protocol): ExecutionOutputObject = TypedDict("ExecutionOutputObject", { "transmissionMode": AnyExecuteTransmissionMode, # type: ignore + "format": NotRequired[JobValueFormat], }, total=False) ExecutionOutputItem = TypedDict("ExecutionOutputItem", { "id": str, @@ -563,7 +564,7 @@ class CWL_SchemaName(Protocol): }, total=False) ExecutionResultObjectValue = TypedDict("ExecutionResultObjectValue", { "value": Optional[AnyValueType], - "type": NotRequired[str], + "mediaType": NotRequired[str], }, total=False) ExecutionResultObject = Union[ExecutionResultObjectRef, ExecutionResultObjectValue] ExecutionResultArray = List[ExecutionResultObject] diff --git a/weaver/utils.py b/weaver/utils.py index 7fbc8794f..c2575c860 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -1521,7 +1521,7 @@ def islambda(func): def get_path_kvp(path, sep=",", **params): - # type: (str, str, **KVP_Item) -> str + # type: (str, str, **AnyValueType) -> str """ Generates the URL with Key-Value-Pairs (:term:`KVP`) query parameters. diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 6a7d09dde..13b630f1a 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -82,6 +82,7 @@ HeadersTupleType, HeadersType, JSON, + JobValueFormat, PyramidRequest, SettingsType ) @@ -415,13 +416,15 @@ def get_results( # pylint: disable=R1260 out_key = rtype is_ref = rtype == "href" - out_mode = get_job_output_transmission(job, out_id, is_reference=is_ref) + out_mode, out_fmt = get_job_output_transmission(job, out_id, is_reference=is_ref) as_ref = link_references and out_mode == ExecuteTransmissionMode.REFERENCE res_id = f"{out_id}{val_idx}" if res_multi else out_id # on-demand convertion to requested transmission mode, leave original data/link if not converted if convert_output_transmission: - res_hdr, res_data = generate_or_resolve_result(job, val_item, res_id, out_id, out_mode, settings) + res_hdr, res_data = generate_or_resolve_result( + job, val_item, res_id, out_id, out_mode, out_fmt, settings + ) if res_data is not None and is_ref: # data generated from reference is_ref = as_ref = False out_key = value_key or "data" # OGC schema overrides after as needed @@ -519,20 +522,21 @@ def get_job_return(job=None, body=None, headers=None): def get_job_output_transmission(job, output_id, is_reference): - # type: (Job, str, bool) -> AnyExecuteTransmissionMode + # type: (Job, str, bool) -> Tuple[AnyExecuteTransmissionMode, Optional[JobValueFormat]] """ - Obtain the requested :term:`Job` output ``transmissionMode``. + Obtain the requested :term:`Job` output ``transmissionMode`` and ``format``. """ outputs = job.outputs or {} outputs = convert_output_params_schema(outputs, JobInputsOutputsSchema.OGC) out = outputs.get(output_id) or {} - mode = out.get("transmissionMode") + mode = cast("AnyExecuteTransmissionMode", out.get("transmissionMode")) + fmt = cast("JobValueFormat", out.get("format")) # because mode can be omitted, resolve their default explicitly if not mode and is_reference: - return ExecuteTransmissionMode.REFERENCE + return ExecuteTransmissionMode.REFERENCE, fmt if not mode and not is_reference: - return ExecuteTransmissionMode.VALUE - return cast("AnyExecuteTransmissionMode", mode) + return ExecuteTransmissionMode.VALUE, fmt + return mode, fmt def get_job_results_response( @@ -709,6 +713,7 @@ def generate_or_resolve_result( result_id, # type: str output_id, # type: str output_mode, # type: AnyExecuteTransmissionMode + output_format, # type: Optional # FIXME: implement (https://github.com/crim-ca/weaver/pull/548) settings, # type: SettingsType ): # type: (...) -> Tuple[HeadersType, Optional[AnyDataStream]] """ @@ -792,6 +797,49 @@ def generate_or_resolve_result( return res_headers, res_data +def resolve_result_json_literal( + result, # type: ExecutionResultValue + output_format, # type: Optional[str] + content_type=None, # type: Optional[str] + content_encoding=None, # type: Optional[str] +): # type: (...) -> ExecutionResultValue + """ + Generates a :term:`JSON` literal string or object representation according to requested format and result contents. + + If not a ``value`` structure, the result is returned unmodified. If no output ``format`` is provided, or that + the extracted result :term:`Media-Type` does not correspond to a :term:`JSON` value, the result is also unmodified. + Otherwise, string/object representation is resolved according to the relevant :term:`Media-Type`. + + :param result: Container with nested data. + :param output_format: Desired output transmission ``format``, with minimally the :term:`Media-Type`. + :param content_type: Explicit :term:`Media-Type` to employ instead of an embedded ``mediaType`` result property. + :param content_encoding: Explicit data encoding to employ instead of an embedded ``encoding`` result property. + :return: Converted :term:`JSON` data or the original result as applicable. + """ + if not result or not isinstance(result, dict) or "value" not in result: + return result + if not content_type: + content_type = get_field(result, "mediaType", default=None, search_variations=True) + if not content_encoding: + content_encoding = get_field(result, "encoding", default="utf-8", search_variations=True) + if content_type == ContentType.APP_JSON and "value" in result: + is_ascii = str(content_encoding).lower() == "ascii" + out_type = get_field(output_format, "mediaType", default=ContentType.APP_JSON) + if out_type == ContentType.APP_JSON: + result["value"] = repr_json(result["value"], force_string=False, ensure_ascii=is_ascii) + elif out_type in [ContentType.TEXT_PLAIN, ContentType.APP_RAW_JSON]: + result["value"] = repr_json( + result["value"], + force_string=True, + ensure_ascii=is_ascii, + # following for minimal representation + indent=None, + separators=(",", ":"), + ) + result["mediaType"] = ContentType.APP_RAW_JSON # ensure disambiguation from other plain text + return result + + def get_job_results_document(job, results, *, container): # type: (Job, ExecutionResults, Any, AnySettingsContainer) -> ExecutionResults """ @@ -821,8 +869,8 @@ def make_result(result, result_id, output_id): key = "value" val = result result = {"value": val} - mode = get_job_output_transmission(job, result_id, is_reference=(key == "href")) - headers, data = generate_or_resolve_result(job, result, result_id, output_id, mode, settings) + out_mode, out_fmt = get_job_output_transmission(job, result_id, is_reference=(key == "href")) + headers, data = generate_or_resolve_result(job, result, result_id, output_id, out_mode, out_fmt, settings) if data is None: ref = { "href": headers["Content-Location"], @@ -846,6 +894,10 @@ def make_result(result, result_id, output_id): } if c_enc: value["encoding"] = c_enc + + # special case of nested JSON data within the JSON document + value = resolve_result_json_literal(value, out_fmt, c_type, c_enc) + return value out_results = {} @@ -907,8 +959,8 @@ def add_result_parts(result_parts): yield res_id, (None, sub_multi, None, sub_headers) key = get_any_value(result, key=True) - mode = get_job_output_transmission(job, out_id, is_reference=(key == "href")) - res_headers, res_data = generate_or_resolve_result(job, result, res_id, out_id, mode, settings) + out_mode, out_fmt = get_job_output_transmission(job, out_id, is_reference=(key == "href")) + res_headers, res_data = generate_or_resolve_result(job, result, res_id, out_id, out_mode, out_fmt, settings) c_type = res_headers.get("Content-Type") c_loc = res_headers.get("Content-Location") c_fn = os.path.basename(c_loc) if c_loc else None From 1ef76d34bd9bc6aa9b5ed84b9006b02ee92296b7 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 4 Oct 2024 01:11:45 -0400 Subject: [PATCH 39/75] [wip] refector job results to resolve representation after extracting their definitions --- tests/functional/test_wps_package.py | 9 +- weaver/datatype.py | 38 ++++- weaver/processes/execution.py | 6 +- weaver/store/base.py | 3 +- weaver/store/mongodb.py | 4 +- weaver/wps_restapi/jobs/utils.py | 247 +++++++++++++++------------ 6 files changed, 187 insertions(+), 120 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 7827db8b3..9a9944be0 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -3673,6 +3673,7 @@ def test_execute_single_output_prefer_header_return_representation_complex(self) job_id = status["jobID"] out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") + assert results.status_code == 200, f"Failed with: [{results.status_code}]\nReason:\n{resp.text}" assert results.content_type.startswith(ContentType.APP_JSON) assert results.text == "{\"data\":\"test\"}" outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) @@ -4783,10 +4784,9 @@ def test_execute_multi_output_prefer_header_return_representation(self): out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] - output_json = repr_json({"data": "test"}, indent=None, separators=(",", ":"), force_string=True) results_body = self.fix_result_multipart_indent(f""" --{boundary} - Content-Disposition: attachment; name="output_data"; filename="output_data.txt" + Content-Disposition: attachment; name="output_data" Content-Type: {ContentType.TEXT_PLAIN} Content-ID: Content-Length: 4 @@ -4799,7 +4799,7 @@ def test_execute_multi_output_prefer_header_return_representation(self): Content-ID: Content-Length: 16 - {output_json} + {{"data":"test"}} --{boundary}-- """) results_text = self.remove_result_multipart_variable(results.text) @@ -4873,8 +4873,9 @@ def test_execute_multi_output_response_raw_value(self): {output_json} --{boundary}-- """) + results_text = self.remove_result_multipart_variable(results.text) assert results.content_type.startswith(ContentType.MULTIPART_MIXED) - assert results.text == results_body + assert results_text == results_body outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { diff --git a/weaver/datatype.py b/weaver/datatype.py index 4f9dc359d..8fd31182b 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -37,7 +37,13 @@ from weaver import xml_util from weaver.exceptions import ProcessInstanceError, ServiceParsingError -from weaver.execute import ExecuteControlOption, ExecuteMode, ExecuteResponse, ExecuteTransmissionMode +from weaver.execute import ( + ExecuteControlOption, + ExecuteMode, + ExecuteResponse, + ExecuteReturnPreference, + ExecuteTransmissionMode +) from weaver.formats import AcceptLanguage, ContentType, OutputFormat, repr_json from weaver.processes.constants import ( CWL_NAMESPACE_WEAVER_ID, @@ -81,7 +87,13 @@ from owslib.wps import WebProcessingService - from weaver.execute import AnyExecuteControlOption, AnyExecuteMode, AnyExecuteResponse, AnyExecuteTransmissionMode + from weaver.execute import ( + AnyExecuteControlOption, + AnyExecuteMode, + AnyExecuteResponse, + AnyExecuteReturnPreference, + AnyExecuteTransmissionMode + ) from weaver.processes.constants import ProcessSchemaType from weaver.processes.types import AnyProcessType from weaver.quotation.status import AnyQuoteStatus @@ -1080,6 +1092,27 @@ def execution_response(self, response): raise ValueError(f"Invalid value for '{self.__name__}.execution_response'. Must be one of {resp}") self["execution_response"] = exec_resp + @property + def execution_return(self): + # type: () -> AnyExecuteReturnPreference + ret = self.setdefault("execution_return", ExecuteReturnPreference.MINIMAL) # almost equivalent to 'document' + if ret not in ExecuteReturnPreference.values(): + ret = ExecuteReturnPreference.MINIMAL + self["execution_return"] = ret + return ret + + @execution_return.setter + def execution_return(self, return_preference): + # type: (Optional[Union[AnyExecuteReturnPreference, str]]) -> None + if return_preference is None: + exec_ret = ExecuteReturnPreference.MINIMAL + else: + exec_ret = ExecuteReturnPreference.get(return_preference) + if exec_ret not in ExecuteReturnPreference: + return_prefs = list(ExecuteReturnPreference.values()) + raise ValueError(f"Invalid value for '{self.__name__}.execution_return'. Must be one of {return_prefs}") + self["execution_return"] = exec_ret + @property def is_local(self): # type: () -> bool @@ -1509,6 +1542,7 @@ def params(self): "status_message": self.status_message, "status_location": self.status_location, "execution_response": self.execution_response, + "execution_return": self.execution_return, "execution_mode": self.execution_mode, "is_workflow": self.is_workflow, "created": self.created, diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index 3cdb115f7..962dee5cd 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -794,7 +794,7 @@ def submit_job_handler(payload, # type: ProcessExecution # Prefer header not resolved with a valid value should still resume without error is_execute_async = mode != ExecuteMode.SYNC accept_type = validate_job_accept_header(headers, mode) - exec_resp = get_job_return(job=None, body=json_body, headers=headers) # job 'none' since still doing 1st parsing + exec_resp, exec_return = get_job_return(job=None, body=json_body, headers=headers) # job 'None' since still parsing get_header("prefer", headers, pop=True) # don't care about value, just ensure removed with any header container subscribers = map_job_subscribers(json_body, settings) @@ -803,8 +803,8 @@ def submit_job_handler(payload, # type: ProcessExecution store = db.get_store(StoreJobs) # type: StoreJobs job = store.save_job(task_id=Status.ACCEPTED, process=process, service=provider_id, inputs=job_inputs, outputs=job_outputs, is_workflow=is_workflow, is_local=is_local, - execute_async=is_execute_async, execute_response=exec_resp, custom_tags=tags, user_id=user, - access=visibility, context=context, subscribers=subscribers, + execute_async=is_execute_async, execute_response=exec_resp, execute_return=exec_return, + custom_tags=tags, user_id=user, access=visibility, context=context, subscribers=subscribers, accept_type=accept_type, accept_language=language) job.save_log(logger=LOGGER, message="Job task submitted for execution.", status=Status.ACCEPTED, progress=0) job = store.update_job(job) diff --git a/weaver/store/base.py b/weaver/store/base.py index 624557117..6f02c95b8 100644 --- a/weaver/store/base.py +++ b/weaver/store/base.py @@ -12,7 +12,7 @@ from pywps import Process as ProcessWPS from weaver.datatype import Bill, Job, Process, Quote, Service, VaultFile - from weaver.execute import AnyExecuteResponse + from weaver.execute import AnyExecuteResponse, AnyExecuteReturnPreference from weaver.sort import AnySortType from weaver.status import AnyStatusSearch from weaver.typedefs import ( @@ -176,6 +176,7 @@ def save_job(self, is_local=False, # type: bool execute_async=True, # type: bool execute_response=None, # type: Optional[AnyExecuteResponse] + execute_return=None, # type: Optional[AnyExecuteReturnPreference] custom_tags=None, # type: Optional[List[str]] user_id=None, # type: Optional[int] access=None, # type: Optional[AnyVisibility] diff --git a/weaver/store/mongodb.py b/weaver/store/mongodb.py index 568e0df08..28ade6a53 100644 --- a/weaver/store/mongodb.py +++ b/weaver/store/mongodb.py @@ -63,7 +63,7 @@ from pymongo.collection import Collection - from weaver.execute import AnyExecuteResponse + from weaver.execute import AnyExecuteResponse, AnyExecuteReturnPreference from weaver.processes.types import AnyProcessType from weaver.sort import AnySortType from weaver.status import AnyStatusSearch @@ -792,6 +792,7 @@ def save_job(self, is_local=False, # type: bool execute_async=True, # type: bool execute_response=None, # type: Optional[AnyExecuteResponse] + execute_return=None, # type: Optional[AnyExecuteReturnPreference] custom_tags=None, # type: Optional[List[str]] user_id=None, # type: Optional[int] access=None, # type: Optional[AnyVisibility] @@ -830,6 +831,7 @@ def save_job(self, "status": map_status(Status.ACCEPTED), "execute_async": execute_async, "execution_response": execute_response, + "execution_return": execute_return, "is_workflow": is_workflow, "is_local": is_local, "created": created if created else now(), diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 13b630f1a..78f7e59a0 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -35,7 +35,7 @@ ServiceNotFound ) from weaver.execute import ExecuteResponse, ExecuteTransmissionMode, parse_prefer_header_return, ExecuteReturnPreference -from weaver.formats import ContentType, get_format, repr_json +from weaver.formats import ContentType, get_format, repr_json, ContentEncoding from weaver.owsexceptions import OWSNoApplicableCode, OWSNotFound from weaver.processes.constants import JobInputsOutputsSchema from weaver.processes.convert import any2wps_literal_datatype, convert_output_params_schema, get_field @@ -65,7 +65,7 @@ if TYPE_CHECKING: from typing import Any, Dict, List, Optional, Sequence, Tuple, Union - from weaver.execute import AnyExecuteResponse, AnyExecuteTransmissionMode + from weaver.execute import AnyExecuteResponse, AnyExecuteReturnPreference, AnyExecuteTransmissionMode from weaver.processes.constants import JobInputsOutputsSchemaType from weaver.typedefs import ( AnyDataStream, @@ -75,7 +75,6 @@ AnySettingsContainer, AnyUUID, AnyValueType, - ExecutionResultArray, ExecutionResultObject, ExecutionResults, ExecutionResultValue, @@ -298,7 +297,7 @@ def get_schema_query(schema, strict=True): def make_result_link(result_id, result, job_id, settings): - # type: (str, Union[ExecutionResultObject, ExecutionResultArray], AnyUUID, SettingsType) -> List[str] + # type: (str, ExecutionResultValue, AnyUUID, SettingsType) -> List[str] """ Convert a result definition as ``value`` into the corresponding ``reference`` for output transmission. @@ -342,7 +341,6 @@ def get_results( # pylint: disable=R1260 value_key=None, # type: Optional[str] schema=JobInputsOutputsSchema.OLD, # type: Optional[JobInputsOutputsSchemaType] link_references=False, # type: bool - convert_output_transmission=False, # type: bool ): # type: (...) -> Tuple[ExecutionResults, HeadersTupleType] """ Obtains the job results with extended full WPS output URL as applicable and according to configuration settings. @@ -356,11 +354,6 @@ def get_results( # pylint: disable=R1260 Selects which schema to employ for representing the output results (listing or mapping). :param link_references: If enabled, an output that was requested by reference instead of by value will be returned as ``Link`` header. - :param convert_output_transmission: - If disabled (default), data/link representation preserves original results as per their literal/complex type. - If enabled, an output that was requested as reference will be converted as an :term:`URL`, whereas - an output requested by value will be converted to its literal contents, both as needed according to - their original results literal/complex type. :returns: Tuple with: - List or mapping of all outputs each with minimally an ID and value under the requested key. @@ -404,7 +397,7 @@ def get_results( # pylint: disable=R1260 # They must be defined on their own with respective media-type/format details per item. else: array = value if isinstance(value, list) else [value] - res_multi = len(array) > 1 + for val_idx, val_item in enumerate(array): val_data = val_item if isinstance(val_item, dict) and isinstance(value, list): @@ -418,21 +411,6 @@ def get_results( # pylint: disable=R1260 is_ref = rtype == "href" out_mode, out_fmt = get_job_output_transmission(job, out_id, is_reference=is_ref) as_ref = link_references and out_mode == ExecuteTransmissionMode.REFERENCE - res_id = f"{out_id}{val_idx}" if res_multi else out_id - - # on-demand convertion to requested transmission mode, leave original data/link if not converted - if convert_output_transmission: - res_hdr, res_data = generate_or_resolve_result( - job, val_item, res_id, out_id, out_mode, out_fmt, settings - ) - if res_data is not None and is_ref: # data generated from reference - is_ref = as_ref = False - out_key = value_key or "data" # OGC schema overrides after as needed - val_data = res_data - elif res_data is None and not is_ref: # reference generated from data - is_ref = as_ref = True - out_key = "href" - val_data = res_hdr["Content-Location"] if is_ref and isinstance(val_data, str): # fix paths relative to instance endpoint, @@ -488,16 +466,16 @@ def get_results( # pylint: disable=R1260 # needed to collect and aggregate outputs of same ID first in case of array # convert any requested link references using indices if needed - headers = [] - for out_id, output in references.items(): - res_links = make_result_link(out_id, output, job.id, settings) - headers.extend([("Link", link) for link in res_links]) + headers = get_job_results_links(job, references, [], settings=settings) return outputs, headers -def get_job_return(job=None, body=None, headers=None): - # type: (Optional[Job], Optional[JSON], Optional[AnyHeadersContainer]) -> AnyExecuteResponse +def get_job_return( + job=None, # type: Optional[Job] + body=None, # type: Optional[JSON] + headers=None, # type: Optional[AnyHeadersContainer] +): # type: (...) -> Tuple[AnyExecuteResponse, AnyExecuteReturnPreference] """ Obtain the :term:`Job` result representation based on the resolution order of preferences and request parameters. @@ -508,17 +486,17 @@ def get_job_return(job=None, body=None, headers=None): body = body or {} resp = ExecuteResponse.get(body.get("response")) if resp: - return resp + return resp, ExecuteReturnPreference.MINIMAL pref = parse_prefer_header_return(headers) if pref == ExecuteReturnPreference.MINIMAL: - return ExecuteResponse.DOCUMENT + return ExecuteResponse.DOCUMENT, ExecuteReturnPreference.MINIMAL if pref == ExecuteReturnPreference.REPRESENTATION: - return ExecuteResponse.RAW + return ExecuteResponse.RAW, ExecuteReturnPreference.REPRESENTATION if not job: - return ExecuteResponse.DOCUMENT - return job.execution_response + return ExecuteResponse.DOCUMENT, ExecuteReturnPreference.MINIMAL + return job.execution_response, job.execution_return def get_job_output_transmission(job, output_id, is_reference): @@ -529,14 +507,24 @@ def get_job_output_transmission(job, output_id, is_reference): outputs = job.outputs or {} outputs = convert_output_params_schema(outputs, JobInputsOutputsSchema.OGC) out = outputs.get(output_id) or {} - mode = cast("AnyExecuteTransmissionMode", out.get("transmissionMode")) - fmt = cast("JobValueFormat", out.get("format")) + out_mode = cast("AnyExecuteTransmissionMode", out.get("transmissionMode")) + out_fmt = cast("JobValueFormat", out.get("format")) + + # raw/representation can change the output transmission mode if they are not overriding it + # document/minimal return is not checked, since it is our default, and will resolve as such anyway + if ( + not out_mode and + job.execution_return == ExecuteReturnPreference.REPRESENTATION and + job.execution_response == ExecuteResponse.RAW + ): + return ExecuteTransmissionMode.VALUE, out_fmt + # because mode can be omitted, resolve their default explicitly - if not mode and is_reference: - return ExecuteTransmissionMode.REFERENCE, fmt - if not mode and not is_reference: - return ExecuteTransmissionMode.VALUE, fmt - return mode, fmt + if not out_mode and is_reference: + return ExecuteTransmissionMode.REFERENCE, out_fmt + if not out_mode and not is_reference: + return ExecuteTransmissionMode.VALUE, out_fmt + return out_mode, out_fmt def get_job_results_response( @@ -586,35 +574,21 @@ def get_job_results_response( """ raise_job_dismissed(job, container) raise_job_bad_status(job, container) - - # FIXME: if 'return=representation' (any type) without 'transmissionMode' override -> force 'transmissionMode=value' - # (see 'test_execute_multi_output_prefer_header_return_representation') - - # FIXME: if value is JSON with 'response=document' also JSON, auto-load value from ref to embed in body - # - test_execute_single_output_response_document_default_format_json_special + settings = get_settings(container) # FIXME: apply converters (https://github.com/crim-ca/weaver/pull/548) # - test_execute_single_output_response_document_alt_format_json # - test_execute_single_output_response_document_alt_format_yaml # - test_execute_single_output_multipart_accept_alt_format - # when 'response=document', ignore 'transmissionMode=value|reference', respect it when 'response=raw' - # resolution of 'transmissionMode' for document representation will be done by its own handler function - # See: - # - https://docs.ogc.org/is/18-062r2/18-062r2.html#_response_7 (/req/core/job-results-async-document) - # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-document - is_raw = get_job_return(job, results_contents, results_headers) == ExecuteResponse.RAW - # when multipart is requested explicitly, do NOT use 'link_references' at this point - # this is to simplify multipart content generation by grouping everything under a single 'results' container - is_accept_multipart = ( - isinstance(job.accept_type, str) and - any(ctype in job.accept_type for ctype in ContentType.ANY_MULTIPART) - ) + # FIXME: remove any 'refs' not needed anymore results, refs = get_results( - job, container, value_key="value", + job, container, + value_key="value", schema=JobInputsOutputsSchema.OGC, # not strict to provide more format details - link_references=is_raw and not is_accept_multipart, - convert_output_transmission=is_raw and not is_accept_multipart, + # no link headers since they are represented differently based on request parameters + # leave it up to each following content-type/response specific representation to define them + link_references=False, ) headers = ResponseHeaders(headers or {}) @@ -624,6 +598,15 @@ def get_job_results_response( link_header = make_link_header(link) headers.add("Link", link_header) + # resolve request details to redirect for appropriate response handlers + job_resp, job_ret = get_job_return(job, results_contents, results_headers) + is_accept_multipart = ( + isinstance(job.accept_type, str) and + any(ctype in job.accept_type for ctype in ContentType.ANY_MULTIPART) + ) + is_rep = job_ret == ExecuteReturnPreference.REPRESENTATION + is_raw = job_resp == ExecuteResponse.RAW + if not is_raw and not is_accept_multipart: try: results_schema = sd.ResultsDocument() @@ -646,9 +629,12 @@ def get_job_results_response( }) ) + # resolution of 'transmissionMode' for document representation will be done by its own handler function + # - https://docs.ogc.org/is/18-062r2/18-062r2.html#_response_7 (/req/core/job-results-async-document) + # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-document # use deserialized contents such that only the applicable fields remain # (simplify compares, this is assumed by the following call) - results_json = get_job_results_document(job, results_json, container=container) + results_json = get_job_results_document(job, results_json, settings=settings) headers.extend(refs) return HTTPOk(json=results_json, headers=headers) @@ -656,53 +642,64 @@ def get_job_results_response( # Status code 204 for empty body # see: # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-ref + # - https://docs.ogc.org/DRAFTS/18-062.html#req_core_job-results-param-outputs-empty headers.extend(refs) return HTTPNoContent(headers=headers) # raw response can be data-only value, link-only or a mix of them if results: - # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-one - out_vals = list(results.items()) # type: List[Tuple[str, ExecutionResultValue]] # noqa - out_info = out_vals[0][-1] # type: ExecutionResultValue - out_type = get_any_value(out_info, key=True) - out_data = get_any_value(out_info) + # if raw representation is requested and all requested outputs resolve as links + # without explicit 'accept: multipart', then all must use link headers + # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-ref + res_refs = { + out_id: bool(get_any_value(out, key=True, file=True, data=True)) + for out_id, out in results.items() + } + out_refs = { + out_id: get_job_output_transmission(job, out_id, is_ref) == ExecuteTransmissionMode.REFERENCE + for out_id, is_ref in res_refs.items() + } + if is_raw and not is_rep and all(is_ref for is_ref in out_refs.values()): + headers = get_job_results_links(job, results, headers, settings=settings) + return HTTPNoContent(headers=headers) # multipart response + # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-multi + # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-mixed-multi + # - https://docs.ogc.org/DRAFTS/18-062.html#per_core_job-results-async-many-other-formats + # extract data to see if it happens to be an array (i.e.: 1 output "technically", but needs multipart) + out_vals = list(results.items()) # type: List[Tuple[str, ExecutionResultValue]] # noqa + out_info = out_vals[0][-1] # type: ExecutionResultValue + out_data = get_any_value(out_info) if ( (len(results) + len(refs)) > 1 or - (isinstance(out_data, list) and len(out_data) > 1) or + (isinstance(out_data, list) and len(out_data) > 1) or # single output is an array, needs multipart is_accept_multipart ): + # FIXME: remove links backtrack not needed anymore - pass results directly # backtrack link references that were generated if 'Accept: multipart/*' was omitted # while using 'response=raw' leading to at least 1 by-value output # (must force multipart with empty-part for links to respect OGC API - Processes v1.0) - # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-mixed-multi - for ref in refs: - ref_link = parse_link_header(ref[-1]) - results[ref_link["rel"]] = ref_link - # attempt sort by original results ordering to generate multipart contents consistently - out_order = list(convert_output_params_schema(job.results, JobInputsOutputsSchema.OGC)) - res_order = {out_id: results[out_id] for out_id in out_order if out_id in results} - res_array = sorted(set(results) - set(res_order)) # in case of 'out.idx' employed for arrays - res_order.update({out_id: results[out_id] for out_id in res_array}) # if missing link arrays - return get_job_results_multipart(job, res_order, headers=headers, container=container) - - # single value only - out_data = out_data[0] if isinstance(out_data, list) else out_data - if out_type == "href": - out_path = map_wps_output_location(out_data, container, exists=True, url=False) - out_type = out_info.get("type") # noqa - out_headers = get_href_headers(out_path, download_headers=True, content_headers=True, content_type=out_type) - resp = FileResponse(out_path) - resp.headers.update(out_headers) - resp.headers.update(headers) - else: - resp = HTTPOk(body=out_data, charset="UTF-8", content_type=ContentType.TEXT_PLAIN, headers=headers) + # for ref in refs: + # ref_link = parse_link_header(ref[-1]) + # results[ref_link["rel"]] = ref_link + # # attempt sort by original results ordering to generate multipart contents consistently + # out_order = list(convert_output_params_schema(job.results, JobInputsOutputsSchema.OGC)) + # res_order = {out_id: results[out_id] for out_id in out_order if out_id in results} + # res_array = sorted(set(results) - set(res_order)) # in case of 'out.idx' employed for arrays + # res_order.update({out_id: results[out_id] for out_id in res_array}) # if missing link arrays + #return get_job_results_multipart(job, res_order, headers=headers, settings=settings) + return get_job_results_multipart(job, results, headers=headers, settings=settings) + + # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-one + res_id = out_vals[0][0] + return get_job_results_single(job, res_id, out_info, headers, container=settings) + + # FIXME: this else is impossible, remove 'if results' above and dedent else: resp = HTTPOk(headers=headers) if refs: - # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-ref - # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-mixed-multi + resp.headerlist.extend(refs) return resp @@ -747,7 +744,8 @@ def generate_or_resolve_result( if is_ref: url = val - typ = result.get("type") or ContentType.APP_OCTET_STREAM + typ = result.get("type") # expected for typical link, but also check media-type variants in case pre-converted + typ = typ or get_field(result, "mime_type", search_variations=True, default=ContentType.APP_OCTET_STREAM) job_out_url = job.result_path(output_id=output_id) if url.startswith(f"/{job_out_url}/"): # job "relative" path out_url = get_wps_output_url(settings) @@ -840,8 +838,41 @@ def resolve_result_json_literal( return result -def get_job_results_document(job, results, *, container): - # type: (Job, ExecutionResults, Any, AnySettingsContainer) -> ExecutionResults +def get_job_results_links(job, references, headers, *, settings): + # type: (Job, Dict[str, ExecutionResultValue], AnyHeadersContainer, Any, SettingsType) -> AnyHeadersContainer + for out_id, output in references.items(): + res_links = make_result_link(out_id, output, job.id, settings) + headers.extend([("Link", link) for link in res_links]) + return headers + + +def get_job_results_single(job, output_id, result, headers, *, container): + # type: (Job, str, ExecutionResultObject, AnyHeadersContainer, Any, AnySettingsContainer) -> Union[HTTPOk, HTTPNoContent] + + settings = get_settings(container) + is_ref = bool(get_any_value(result, key=True, file=True, data=False)) + out_data = get_any_value(result, file=is_ref, data=not is_ref) + out_mode, out_fmt = get_job_output_transmission(job, output_id, is_ref) + # FIXME: implement (https://github.com/crim-ca/weaver/pull/548) + # for .../results/{id} transform might need to force 'Prefer' over job preference + # (explicitly request value/link contrary to resolved results/mode from the job) + if out_mode == ExecuteTransmissionMode.REFERENCE: + # FIXME: add transform for requested output format (https://github.com/crim-ca/weaver/pull/548) + # req_fmt = guess_target_format(container) where container=request + # out_fmt (see above) + # out_type = result.get("type") + # out_select = req_fmt or out_fmt or out_type (resolution order/precedence) + link = make_result_link(output_id, result, job.id, settings) + headers.extend([("Link", link[0])]) + return HTTPNoContent(headers=headers) + + # FIXME: add transform for requested output format (https://github.com/crim-ca/weaver/pull/548) + # FIXME: if encoding, use 'ContentEncoding.encode()' + relevant headers for returning that data representation + return HTTPOk(body=out_data, charset="UTF-8", content_type=ContentType.TEXT_PLAIN, headers=headers) + + +def get_job_results_document(job, results, *, settings): + # type: (Job, ExecutionResults, Any, SettingsType) -> ExecutionResults """ Generates the :term:`Job` results document response from available or requested outputs with necessary conversions. @@ -858,7 +889,6 @@ def get_job_results_document(job, results, *, container): This function assumes that schema deserialization was applied beforehand. Therefore, it will not attempt matching every possible combination of the results representation. """ - settings = get_settings(container) def make_result(result, result_id, output_id): # type: (ExecutionResultValue, str, str) -> Union[AnyValueType, ExecutionResultObject] @@ -879,7 +909,7 @@ def make_result(result, result_id, output_id): return ref c_type = headers.get("Content-Type") or "" - c_enc = headers.get("Content-Encoding") + c_enc = ContentEncoding.get(headers.get("Content-Encoding")) if not c_type or ( # note: # Explicit content-type check to consider that any additional parameter provided @@ -888,12 +918,12 @@ def make_result(result, result_id, output_id): ): value = val # use original to avoid string conversion else: - value = { - "value": data2str(data), - "mediaType": c_type, - } + value = {"mediaType": c_type} + data = data2str(data) if c_enc: + data = ContentEncoding.encode(data, c_enc) value["encoding"] = c_enc + value["value"] = data # special case of nested JSON data within the JSON document value = resolve_result_json_literal(value, out_fmt, c_type, c_enc) @@ -926,8 +956,8 @@ def make_result(result, result_id, output_id): return out_results -def get_job_results_multipart(job, results, *, headers, container): - # type: (Job, ExecutionResults, Any, AnyHeadersContainer, AnySettingsContainer) -> HTTPOk +def get_job_results_multipart(job, results, *, headers, settings): + # type: (Job, ExecutionResults, Any, AnyHeadersContainer, SettingsType) -> HTTPOk """ Generates the :term:`Job` results multipart response from available or requested outputs with necessary conversions. @@ -938,9 +968,8 @@ def get_job_results_multipart(job, results, *, headers, container): :param job: Job definition with potential metadata about requested outputs. :param results: Pre-filtered and pre-processed results in a normalized format structure. :param headers: Additional headers to include in the response. - :param container: Application settings to resolve locations. + :param settings: Application settings to resolve locations. """ - settings = get_settings(container) def add_result_parts(result_parts): # type: (List[Tuple[str, str, ExecutionResultObject]]) -> MultiPartFieldsType From 7705c9b0e3993158b5edbc5f8b86880a77e15d09 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 4 Oct 2024 15:40:12 -0400 Subject: [PATCH 40/75] fix json multipart embedded contents without newline --- .../application-packages/EchoResultsTester/package.cwl | 3 ++- tests/functional/test_wps_package.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/functional/application-packages/EchoResultsTester/package.cwl b/tests/functional/application-packages/EchoResultsTester/package.cwl index 37018f54f..5dc08d5b7 100644 --- a/tests/functional/application-packages/EchoResultsTester/package.cwl +++ b/tests/functional/application-packages/EchoResultsTester/package.cwl @@ -6,9 +6,10 @@ requirements: dockerPull: "debian:stretch-slim" InlineJavascriptRequirement: {} InitialWorkDirRequirement: + # note: use '>-' to avoid newline after the JSON contents in the generated file, tests validate that explicitly listing: - entryname: result.json - entry: | + entry: >- {"data":"$(inputs.message)"} - entryname: result.txt entry: | diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 9a9944be0..8515d209b 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -4797,7 +4797,7 @@ def test_execute_multi_output_prefer_header_return_representation(self): Content-Type: {ContentType.APP_JSON} Content-Location: {out_url}/{job_id}/output_json/result.json Content-ID: - Content-Length: 16 + Content-Length: 15 {{"data":"test"}} --{boundary}-- @@ -4868,7 +4868,7 @@ def test_execute_multi_output_response_raw_value(self): Content-Type: {ContentType.APP_JSON} Content-Location: {out_url}/{job_id}/output_json/result.json Content-ID: - Content-Length: 16 + Content-Length: 15 {output_json} --{boundary}-- @@ -5101,7 +5101,7 @@ def test_execute_multi_output_response_raw_mixed(self): Content-Type: {ContentType.APP_JSON} Content-Location: {out_url}/{job_id}/output_json/result.json Content-ID: - Content-Length: 16 + Content-Length: 15 {{"data":"test"}} --{boundary}-- From 4d3a89a8519ec626f3466cffeb38a8c347ce914c Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Sat, 5 Oct 2024 02:31:50 -0400 Subject: [PATCH 41/75] job response refactor to handle single-output with similar strategy as multi-output variants --- tests/functional/test_wps_package.py | 21 ++-- weaver/processes/execution.py | 2 +- weaver/wps_restapi/jobs/jobs.py | 2 +- weaver/wps_restapi/jobs/utils.py | 163 +++++++++++++++++---------- 4 files changed, 113 insertions(+), 75 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 8515d209b..bddc035d5 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -3998,17 +3998,17 @@ def test_execute_single_output_response_raw_value_complex(self): status = self.monitor_job(status_url, return_status=True) assert status["status"] == Status.SUCCEEDED + out_url = get_wps_output_url(self.settings) job_id = status["jobID"] results = self.app.get(f"/jobs/{job_id}/results") assert results.content_type.startswith(ContentType.APP_JSON) assert results.json == {"data": "test"} outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) - output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) assert outputs.content_type.startswith(ContentType.APP_JSON) assert outputs.json["outputs"] == { "output_json": { - "value": output_json, - "mediaType": ContentType.APP_JSON, + "href": f"{out_url}/{job_id}/output_json/result.json", + "type": ContentType.APP_JSON, }, } @@ -4049,7 +4049,8 @@ def test_execute_single_output_response_raw_reference_literal(self): results = self.app.get(f"/jobs/{job_id}/results") results_href = f"{self.url}/processes/{p_id}/jobs/{job_id}/results" output_data_href = f"{out_url}/{job_id}/output_data/output_data.txt" - output_data_link = f"<{output_data_href}>; rel=\"output_data\"; type=\"{ContentType.TEXT_PLAIN}\"" + output_data_args = f"; rel=\"output_data\"; type=\"{ContentType.TEXT_PLAIN}\"" + output_data_link = f"<{output_data_href}>{output_data_args}" assert results.status_code == 204, "No contents expected for minimal reference result." assert results.body == b"" assert results.content_type is None @@ -4178,14 +4179,13 @@ def test_execute_single_output_multipart_accept_data(self): results = resp assert ContentType.MULTIPART_MIXED in results.content_type boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] - output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) results_body = self.fix_result_multipart_indent(f""" --{boundary} Content-Type: {ContentType.APP_JSON} Content-Location: {out_url}/{job_id}/output_json/result.json Content-ID: - {output_json} + {{{"data":"test"}}} --{boundary}-- """) results_text = self.remove_result_multipart_variable(results.text) @@ -4350,10 +4350,9 @@ def test_execute_single_output_multipart_accept_alt_format(self): # validate the results can be obtained with the "real" representation result_json = self.app.get(f"/jobs/{job_id}/results/output_json", headers=self.json_headers) - output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) assert result_json.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" assert result_json.content_type == ContentType.APP_JSON - assert result_json.text == output_json + assert result_json.text == "{\"data\":\"test\"}" # FIXME: implement (https://github.com/crim-ca/weaver/pull/548) @pytest.mark.xfail(reason="not implemented") @@ -4427,10 +4426,9 @@ def test_execute_single_output_response_document_alt_format_yaml(self): # FIXME: implement (https://github.com/crim-ca/weaver/pull/548) # validate the results can be obtained with the "real" representation result_json = self.app.get(f"/jobs/{job_id}/results/output_json", headers=self.json_headers) - output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) assert result_json.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" assert result_json.content_type == ContentType.APP_JSON - assert result_json.text == output_json + assert result_json.text == "{\"data\":\"test\"}" def test_execute_single_output_response_document_alt_format_json_raw_literal(self): proc = "EchoResultsTester" @@ -4854,7 +4852,6 @@ def test_execute_multi_output_response_raw_value(self): out_url = get_wps_output_url(self.settings) results = self.app.get(f"/jobs/{job_id}/results") boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] - output_json = repr_json({"data": "test"}, separators=(",", ":"), force_string=True) results_body = self.fix_result_multipart_indent(f""" --{boundary} Content-Disposition: attachment; name="output_data" @@ -4870,7 +4867,7 @@ def test_execute_multi_output_response_raw_value(self): Content-ID: Content-Length: 15 - {output_json} + {{{"data":"test"}}} --{boundary}-- """) results_text = self.remove_result_multipart_variable(results.text) diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index 962dee5cd..3edd2af22 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -826,7 +826,7 @@ def submit_job_handler(payload, # type: ProcessExecution # when sync is successful, it must return the results direct instead of status info # see: https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execute_response if job.status == Status.SUCCEEDED: - return get_job_results_response(job, settings, headers=resp_headers) + return get_job_results_response(job, headers=resp_headers, container=settings) # otherwise return the error status body = job.json(container=settings) body["location"] = location_url diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index 749a338b1..b2921ca9c 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -418,7 +418,7 @@ def get_job_results(request): Retrieve the results of a job. """ job = get_job(request) - resp = get_job_results_response(job, request) + resp = get_job_results_response(job, container=request) return resp diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 78f7e59a0..29c872d0f 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -17,7 +17,6 @@ HTTPNotFound, HTTPOk ) -from pyramid.response import FileResponse from pyramid_celery import celery_app from requests_toolbelt.multipart.encoder import MultipartEncoder from webob.headers import ResponseHeaders @@ -53,8 +52,7 @@ get_settings, get_weaver_url, is_uuid, - make_link_header, - parse_link_header + make_link_header ) from weaver.visibility import Visibility from weaver.wps.utils import get_wps_output_dir, get_wps_output_url, map_wps_output_location @@ -66,6 +64,7 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple, Union from weaver.execute import AnyExecuteResponse, AnyExecuteReturnPreference, AnyExecuteTransmissionMode + from weaver.formats import AnyContentEncoding from weaver.processes.constants import JobInputsOutputsSchemaType from weaver.typedefs import ( AnyDataStream, @@ -73,7 +72,6 @@ AnyRequestType, AnyResponseType, AnySettingsContainer, - AnyUUID, AnyValueType, ExecutionResultObject, ExecutionResults, @@ -296,8 +294,15 @@ def get_schema_query(schema, strict=True): return schema_checked -def make_result_link(result_id, result, job_id, settings): - # type: (str, ExecutionResultValue, AnyUUID, SettingsType) -> List[str] +def make_result_link( + job, # type: Job + result, # type: ExecutionResultValue + output_id, # type: str + output_mode, # type: AnyExecuteTransmissionMode + output_format=None, # type: Optional[JobValueFormat] + *, # type: Any + settings, # type: SettingsType +): # type: (...) -> List[str] """ Convert a result definition as ``value`` into the corresponding ``reference`` for output transmission. @@ -306,31 +311,14 @@ def make_result_link(result_id, result, job_id, settings): """ values = result if isinstance(result, list) else [result] suffixes = list(f".{idx}" for idx in range(len(values))) if isinstance(result, list) else [""] - wps_url = get_wps_output_url(settings).strip("/") links = [] for suffix, value in zip(suffixes, values): - key = get_any_value(result, key=True) - if key != "href": - # literal data to be converted to link - # plain text file must be created containing the raw literal data - typ = ContentType.TEXT_PLAIN # as per '/rec/core/process-execute-sync-document-ref' - enc = "UTF-8" - out = get_wps_output_dir(settings) - val = get_any_value(value, data=True, file=False) - loc = os.path.join(str(job_id), f"{result_id}{suffix}.txt") - url = f"{wps_url}/{loc}" - path = os.path.join(out, loc) - path = get_secure_path(path) - with open(path, mode="w", encoding=enc) as out_file: - out_file.write(val) - else: - fmt = get_field(result, "format", default={"mediaType": ContentType.TEXT_PLAIN}) - typ = get_field(fmt, "mime_type", search_variations=True, default=ContentType.TEXT_PLAIN) - enc = get_field(fmt, "encoding", search_variations=True, default=None) - url = get_any_value(value, data=False, file=True) # should already include full path - if fmt == ContentType.TEXT_PLAIN and not enc: # only if text, otherwise binary content could differ - enc = "UTF-8" # default both omit/empty - link_header = make_link_header(url, rel=f"{result_id}{suffix}", type=typ, charset=enc) + result_id = f"{output_id}{suffix}" + headers, _ = generate_or_resolve_result(job, result, result_id, output_id, output_mode, output_format, settings) + url = headers["Content-Location"] + typ = headers["Content-Type"] + enc = headers.get("Content-Encoding", None) + link_header = make_link_header(url, rel=result_id, type=typ, charset=enc) links.append(link_header) return links @@ -466,7 +454,7 @@ def get_results( # pylint: disable=R1260 # needed to collect and aggregate outputs of same ID first in case of array # convert any requested link references using indices if needed - headers = get_job_results_links(job, references, [], settings=settings) + headers = get_job_results_links(job, references, {}, headers=[], settings=settings) return outputs, headers @@ -520,17 +508,15 @@ def get_job_output_transmission(job, output_id, is_reference): return ExecuteTransmissionMode.VALUE, out_fmt # because mode can be omitted, resolve their default explicitly - if not out_mode and is_reference: - return ExecuteTransmissionMode.REFERENCE, out_fmt - if not out_mode and not is_reference: - return ExecuteTransmissionMode.VALUE, out_fmt + if not out_mode: + out_mode = ExecuteTransmissionMode.REFERENCE if is_reference else ExecuteTransmissionMode.VALUE return out_mode, out_fmt def get_job_results_response( job, # type: Job - container, # type: AnySettingsContainer *, # type: Any + container, # type: AnySettingsContainer headers=None, # type: Optional[AnyHeadersContainer] results_headers=None, # type: Optional[AnyHeadersContainer] results_contents=None, # type: Optional[JSON] @@ -652,15 +638,18 @@ def get_job_results_response( # without explicit 'accept: multipart', then all must use link headers # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-ref res_refs = { - out_id: bool(get_any_value(out, key=True, file=True, data=True)) + out_id: bool(get_any_value(out, key=True, file=True, data=False)) for out_id, out in results.items() } - out_refs = { - out_id: get_job_output_transmission(job, out_id, is_ref) == ExecuteTransmissionMode.REFERENCE + out_transmissions = { + out_id: get_job_output_transmission(job, out_id, is_ref) for out_id, is_ref in res_refs.items() } - if is_raw and not is_rep and all(is_ref for is_ref in out_refs.values()): - headers = get_job_results_links(job, results, headers, settings=settings) + if is_raw and not is_rep and all( + out_mode == ExecuteTransmissionMode.REFERENCE + for out_mode, _ in out_transmissions.values() + ): + headers = get_job_results_links(job, results, out_transmissions, headers=headers, settings=settings) return HTTPNoContent(headers=headers) # multipart response @@ -693,7 +682,13 @@ def get_job_results_response( # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-one res_id = out_vals[0][0] - return get_job_results_single(job, res_id, out_info, headers, container=settings) + # FIXME: add transform for requested output format (https://github.com/crim-ca/weaver/pull/548) + # req_fmt = guess_target_format(container) where container=request + # out_fmt (see above) + # out_type = result.get("type") + # out_select = req_fmt or out_fmt or out_type (resolution order/precedence) + out_fmt = None + return get_job_results_single(job, out_info, res_id, out_fmt, headers=headers, settings=settings) # FIXME: this else is impossible, remove 'if results' above and dedent else: @@ -710,7 +705,7 @@ def generate_or_resolve_result( result_id, # type: str output_id, # type: str output_mode, # type: AnyExecuteTransmissionMode - output_format, # type: Optional # FIXME: implement (https://github.com/crim-ca/weaver/pull/548) + output_format, # type: Optional[JobValueFormat] # FIXME: implement (https://github.com/crim-ca/weaver/pull/548) settings, # type: SettingsType ): # type: (...) -> Tuple[HeadersType, Optional[AnyDataStream]] """ @@ -721,6 +716,7 @@ def generate_or_resolve_result( :param result_id: Specific identifier of the result, including any array index as applicable. :param output_id: Generic identifier of the output containing the result. :param output_mode: Desired output transmission mode. + :param output_format: Desired output transmission ``format``, with minimally the :term:`Media-Type`. :param settings: Application settings to resolve locations. :return: Resolved headers and data (as applicable) for the result. @@ -751,6 +747,7 @@ def generate_or_resolve_result( out_url = get_wps_output_url(settings) url = os.path.join(out_url, url[1:]) loc = map_wps_output_location(url, settings, exists=True, url=False) + loc = get_secure_path(loc) else: typ = get_field(result, "mime_type", search_variations=True, default=ContentType.TEXT_PLAIN) @@ -759,6 +756,7 @@ def generate_or_resolve_result( out_name = f"{result_id}.txt" job_path = job.result_path(output_id=output_id, file_name=out_name) loc = os.path.join(out_dir, job_path) + loc = get_secure_path(loc) url = map_wps_output_location(loc, settings, exists=False, url=True) if is_val and output_mode == ExecuteTransmissionMode.VALUE: @@ -797,7 +795,7 @@ def generate_or_resolve_result( def resolve_result_json_literal( result, # type: ExecutionResultValue - output_format, # type: Optional[str] + output_format, # type: Optional[JobValueFormat] content_type=None, # type: Optional[str] content_encoding=None, # type: Optional[str] ): # type: (...) -> ExecutionResultValue @@ -838,37 +836,80 @@ def resolve_result_json_literal( return result -def get_job_results_links(job, references, headers, *, settings): - # type: (Job, Dict[str, ExecutionResultValue], AnyHeadersContainer, Any, SettingsType) -> AnyHeadersContainer +def get_job_results_links( + job, # type: Job + references, # type: Dict[str, ExecutionResultValue] + transmissions, # type: Dict[str, Tuple[AnyExecuteTransmissionMode, JobValueFormat]] + headers, # type: AnyHeadersContainer + *, # type: Any + settings, # type: SettingsType +): # type: (...) -> AnyHeadersContainer + """ + Generates ``Link`` headers for all specified result references and adds them to the specified header container. + """ for out_id, output in references.items(): - res_links = make_result_link(out_id, output, job.id, settings) + out_trans = transmissions.get(out_id) + out_fmt = out_trans[-1] if out_trans else None + out_mode = ExecuteTransmissionMode.REFERENCE + res_links = make_result_link(job, output, out_id, out_mode, out_fmt, settings=settings) headers.extend([("Link", link) for link in res_links]) return headers -def get_job_results_single(job, output_id, result, headers, *, container): - # type: (Job, str, ExecutionResultObject, AnyHeadersContainer, Any, AnySettingsContainer) -> Union[HTTPOk, HTTPNoContent] +def get_job_results_single( + job, # type: Job + result, # type: ExecutionResultObject + output_id, # type: str + output_format, # type: Optional[JobValueFormat] + headers, # type: AnyHeadersContainer + *, # type: Any + settings, # type: AnySettingsContainer +): # type: (...) -> Union[HTTPOk, HTTPNoContent] + """ + Generates a single result response according to specified or resolved output transmission and format. - settings = get_settings(container) - is_ref = bool(get_any_value(result, key=True, file=True, data=False)) - out_data = get_any_value(result, file=is_ref, data=not is_ref) - out_mode, out_fmt = get_job_output_transmission(job, output_id, is_ref) + :param job: Job definition to obtain relevant path resolution. + :param result: Result to be represented. + :param output_id: Identifier of the corresponding result output. + :param output_format: Desired output format for convertion, as applicable. + :param headers: Additional headers to include in the response. + :param settings: Application settings to resolve locations. + :return: + """ # FIXME: implement (https://github.com/crim-ca/weaver/pull/548) # for .../results/{id} transform might need to force 'Prefer' over job preference # (explicitly request value/link contrary to resolved results/mode from the job) + + is_ref = bool(get_any_value(result, key=True, file=True, data=False)) + out_data = get_any_value(result, file=is_ref, data=not is_ref) + out_mode, out_fmt = get_job_output_transmission(job, output_id, is_ref) if out_mode == ExecuteTransmissionMode.REFERENCE: - # FIXME: add transform for requested output format (https://github.com/crim-ca/weaver/pull/548) - # req_fmt = guess_target_format(container) where container=request - # out_fmt (see above) - # out_type = result.get("type") - # out_select = req_fmt or out_fmt or out_type (resolution order/precedence) - link = make_result_link(output_id, result, job.id, settings) + link = make_result_link(job, result, output_id, out_mode, output_format, settings=settings) headers.extend([("Link", link[0])]) return HTTPNoContent(headers=headers) - # FIXME: add transform for requested output format (https://github.com/crim-ca/weaver/pull/548) - # FIXME: if encoding, use 'ContentEncoding.encode()' + relevant headers for returning that data representation - return HTTPOk(body=out_data, charset="UTF-8", content_type=ContentType.TEXT_PLAIN, headers=headers) + # convert value as needed since reference transmission was not requested/resolved + out_headers = {} + if is_ref: + output_mode = ExecuteTransmissionMode.VALUE + out_headers, out_data = generate_or_resolve_result( + job, + result, + output_id, + output_id, + output_mode, + output_format, + settings=settings, + ) + headers.update(out_headers) + + ctype = out_headers.get("Content-Type") + if not ctype: + ctype = get_field(result, "mediaType", search_variations=True, default=ContentType.TEXT_PLAIN) + c_enc = cast("AnyContentEncoding", headers.get("Content-Encoding") or "UTF-8") # type: AnyContentEncoding + out_data = data2str(out_data) + out_data = ContentEncoding.encode(out_data, c_enc) + return HTTPOk(body=out_data, content_type=ctype, charset=c_enc, headers=headers) def get_job_results_document(job, results, *, settings): From 4e617c421682bc6612185bbae20e89d0966bb0cd Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Sat, 5 Oct 2024 03:45:57 -0400 Subject: [PATCH 42/75] more job response handling/tests working --- tests/functional/test_wps_package.py | 27 ++++++++++++++++++++++++--- weaver/wps_restapi/jobs/utils.py | 2 ++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index bddc035d5..98701df99 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -3790,6 +3790,18 @@ def test_execute_single_output_prefer_header_return_minimal_literal_accept_json( def test_execute_single_output_prefer_header_return_minimal_complex_accept_default(self): """ For single requested output, without ``Accept`` content negotiation, its default format is returned by link. + + .. note:: + Because :term:`JSON` ``Accept`` header is **NOT** explicitly requested along the ``Prefer`` header, + the response is returned by ``Link`` header. This is different from requesting ``Accept`` :term:`JSON`, + which "forces" ``minimal`` to be mapped to ``document`` response. This is because, for a single output + combined with ``minimal`` (i.e.: requesting explicitly not to return the contents of the file), a ``Link`` + becomes required. To force the :term:`JSON` contents of the file to be returned directly, ``representation`` + must be requested instead. + + .. seealso:: + - :func:`test_execute_single_output_prefer_header_return_minimal_complex_accept_json` + - :func:`test_execute_single_output_prefer_header_return_representation_complex` """ proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) @@ -3798,6 +3810,8 @@ def test_execute_single_output_prefer_header_return_minimal_complex_accept_defau exec_headers = { "Prefer": f"return={ExecuteReturnPreference.MINIMAL}, wait=5", # sync to allow direct content response + # omitting or specifying 'Accept' any must result the same (default link), + # but test it is handled explicitly since the header would be "found" when parsing "Accept": ContentType.ANY, "Content-Type": ContentType.APP_JSON, } @@ -3860,8 +3874,13 @@ def test_execute_single_output_prefer_header_return_minimal_complex_accept_json( contents of ``output_json`` file are **NOT** directly returned in the response. .. seealso:: + - :func:`test_execute_single_output_prefer_header_return_minimal_complex_accept_default` + which returns the result by ``Link`` header, which refers to a :term:`JSON` file. + - :func:`test_execute_single_output_prefer_header_return_representation_complex` + for case of embedded ``output_json`` file contents in the response using the other ``Prefer`` return. - :func:`test_execute_single_output_response_raw_value_complex` - for case of embedded ``output_json`` file contents in the response. + for case of embedded ``output_json`` file contents in the response, + using the ``response`` parameter at :term:`Job` execution time, as alternative method to ``Prefer``. """ proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) @@ -4181,11 +4200,13 @@ def test_execute_single_output_multipart_accept_data(self): boundary = parse_kvp(results.headers["Content-Type"])["boundary"][0] results_body = self.fix_result_multipart_indent(f""" --{boundary} + Content-Disposition: attachment; name="output_json"; filename="result.json" Content-Type: {ContentType.APP_JSON} Content-Location: {out_url}/{job_id}/output_json/result.json Content-ID: + Content-Length: 15 - {{{"data":"test"}}} + {{"data":"test"}} --{boundary}-- """) results_text = self.remove_result_multipart_variable(results.text) @@ -4867,7 +4888,7 @@ def test_execute_multi_output_response_raw_value(self): Content-ID: Content-Length: 15 - {{{"data":"test"}}} + {{"data":"test"}} --{boundary}-- """) results_text = self.remove_result_multipart_variable(results.text) diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 29c872d0f..b91c25b6e 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -652,6 +652,8 @@ def get_job_results_response( headers = get_job_results_links(job, results, out_transmissions, headers=headers, settings=settings) return HTTPNoContent(headers=headers) + # FIXME: support ZIP or similar "container" output (https://github.com/crim-ca/weaver/issues/726) + # FIXME: support Metalink - needs by-reference only (https://github.com/crim-ca/weaver/issues/663) # multipart response # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-multi # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-mixed-multi From 262ebd92a5760caf4e550625cf15489ff1a6a7c7 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 7 Oct 2024 22:16:36 -0400 Subject: [PATCH 43/75] all job return working --- docs/source/processes.rst | 43 +++++++++++++++++----------- tests/functional/test_wps_package.py | 13 +++++---- weaver/wps_restapi/jobs/utils.py | 9 +++++- 3 files changed, 42 insertions(+), 23 deletions(-) diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 444227a23..20826c7f2 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -848,13 +848,13 @@ Following is a detailed listing of the expected response structure according to | | | | (literal) | - |res-data|_ | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | |na| | ``raw`` | ``reference`` | 1 | - |res-accept| | - | [#resPreferReturn]_ | | | (complex) | - |res-ref|_ | + | [#resPreferReturn]_ | | | (complex) | - |res-link|_ | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | ``representation`` | ``raw`` | ``value`` | 1 | - |res-accept| | | | | | (complex) | - |res-data|_ | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | |na| | ``raw`` | ``reference`` | 1 | - |res-accept| | - | [#resPreferReturn]_ | | | (literal) | - |res-ref|_ | + | [#resPreferReturn]_ | | | (literal) | - |res-link|_ | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | |none| | |none| | |none| | >1 | - :ref:`Results ` | | | | | | content by default [#resCTypeMulti]_ | @@ -876,24 +876,30 @@ Following is a detailed listing of the expected response structure according to | |na| | ``raw`` | ``reference`` | >1 | - :ref:`Multipart ` | | [#resPreferReturn]_ | | (for *all*) | | content with embedded part links if requested | | | | | | by ``Accept`` header [#resCTypeMulti]_ | - | | | | | - otherwise, similar to |res-ref|, but with | + | | | | | - otherwise, similar to |res-link|, but with | | | | | | a ``Link`` header for each requested output | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | |none| | ``document`` | |none| | |any| | - :ref:`Results ` | | | | | | content | | | | | | - |res-auto| [#resValRef]_ | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ - | ``minimal`` | ``document`` | |none| | |any| | - :ref:`Results ` | - | | | | | content | + | ``minimal`` | |none| | |none| | 1 | - |res-accept| | + | | | | (literal) | - |res-data|_ | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ + | ``minimal`` | |none| | |none| | 1 | - |res-accept| | + | | | | (complex) | - |res-link|_ | + +---------------------+--------------+---------------+-----------+-------------------------------------------------+ + | ``minimal`` | ``document`` | |none| | |none| | - :ref:`Results ` | + | | | | or >1 | content | | | | | | - |res-auto| [#resValRef]_ | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ - | ``minimal`` | ``document`` | ``value`` | |any| | - :ref:`Results ` | - | | | | (literal) | content | - | | | | | - using data included inline | + | ``minimal`` | ``document`` | ``value`` | |none| | - :ref:`Results ` | + | | | | or >1 | content | + | | | | (literal) | - using data included inline | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | ``minimal`` | ``document`` | ``reference`` | |any| | - :ref:`Results ` | - | | | | (complex) | content | - | | | | | - using file link reference | + | | | | or >1 | content | + | | | | (complex) | - using file link reference | +---------------------+--------------+---------------+-----------+-------------------------------------------------+ | |na| | ``document`` | ``value`` | |any| | - :ref:`Results ` | | [#resPreferReturn]_ | | | (complex) | content | @@ -915,8 +921,8 @@ Following is a detailed listing of the expected response structure according to .. |res-data| replace:: Results for a Single Output with Data .. _res-data: processes.html#job-results-raw-single-data -.. |res-ref| replace:: Results for a Single Output with Link -.. _res-ref: processes.html#job-results-raw-single-ref +.. |res-link| replace:: Results for a Single Output with Link +.. _res-link: processes.html#job-results-raw-single-ref .. important:: Typically, clients will not use ``Prefer`` header and ``response``/``transmissionMode`` body parameters @@ -970,11 +976,16 @@ Following is a detailed listing of the expected response structure according to .. [#outN] Corresponds to the number of ``outputs`` *requested* in the :ref:`proc_exec_body`, and the data type of those outputs if this distinction impacts the results. - + Note that omitting ``outputs`` (i.e.: indicated by |out-mode| with |none| in the table) is equivalent to - requesting *all* outputs offered by the :term:`Process`. To request "*no outputs at all*" - (if it makes sense for :term:`Process` to do so), - the empty mapping ``outputs: {}`` should be submitted explicitly [#resNoContent]_. + requesting *all* outputs offered by the :term:`Process`. If a :term:`Process` happens to generate only a + single output, but that ``outputs`` was omitted, the interpretation will also be as if *all* outputs were + requested, typically resulting in a response similar to |any| or ``N>1`` cases. It is important to make this + distinction from *explicitly* requesting a single output, which will return it directly in the response contents + rather than embedded within a "container" body such as the ``minimal``/``document`` response. + + To request "*no outputs at all*" (if it makes sense for :term:`Process` to do so), + the empty mapping ``outputs: {}`` should be submitted *explicitly* [#resNoContent]_. See table :ref:`table-exec-body` for an example requesting specific outputs. .. [#resNoContent] diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 98701df99..dcd404f8c 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -3829,7 +3829,7 @@ def test_execute_single_output_prefer_header_return_minimal_complex_accept_defau path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 204, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" # rely on location that should be provided to find the job ID results_url = get_header("Content-Location", resp.headers) @@ -3906,7 +3906,7 @@ def test_execute_single_output_prefer_header_return_minimal_complex_accept_json( path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" # rely on location that should be provided to find the job ID results_url = get_header("Content-Location", resp.headers) @@ -4514,11 +4514,12 @@ def test_execute_single_output_response_document_alt_format_json_raw_literal(sel }, } + # FIXME: add check of direct request of output (https://github.com/crim-ca/weaver/pull/548) # validate the results can be obtained with the "real" representation - result_json = self.app.get(f"/jobs/{job_id}/results/output_json", headers=self.json_headers) - assert result_json.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" - assert result_json.content_type == ContentType.APP_JSON - assert result_json.json == {"data": "test"} + # result_json = self.app.get(f"/jobs/{job_id}/results/output_json", headers=self.json_headers) + # assert result_json.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + # assert result_json.content_type == ContentType.APP_JSON + # assert result_json.json == {"data": "test"} def test_execute_single_output_response_document_default_format_json_special(self): """ diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index b91c25b6e..d8f6408bf 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -593,7 +593,14 @@ def get_job_results_response( is_rep = job_ret == ExecuteReturnPreference.REPRESENTATION is_raw = job_resp == ExecuteResponse.RAW - if not is_raw and not is_accept_multipart: + # if a single output is explicitly requested, the representation must be ignored and return it directly + # (single result does not matter for a process generating only one, it is the N output requested that matters) + is_single_output_minimal = ( + job.outputs is not None and len(job.outputs) == 1 and + not is_rep and ContentType.APP_JSON not in job.accept_type # alternative way to request 'minimal'/'document' + ) + + if not is_raw and not is_accept_multipart and not is_single_output_minimal: try: results_schema = sd.ResultsDocument() results_json = results_schema.deserialize(results) From 55c555aeb2a89eb25bc31aff6d856a7db05eb9f7 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 8 Oct 2024 00:27:49 -0400 Subject: [PATCH 44/75] fix workflow tests + fix job results generation for directory reference + fix job outputs schema on /jobs/jobID/inputs endpoint --- tests/functional/test_workflow.py | 8 +- weaver/processes/execution.py | 4 +- weaver/processes/wps_process_base.py | 5 +- weaver/utils.py | 21 +++- weaver/wps_restapi/colander_extras.py | 2 + weaver/wps_restapi/jobs/utils.py | 132 +++++++++++----------- weaver/wps_restapi/swagger_definitions.py | 3 +- 7 files changed, 96 insertions(+), 79 deletions(-) diff --git a/tests/functional/test_workflow.py b/tests/functional/test_workflow.py index 9d4d5098a..9d6bc12a4 100644 --- a/tests/functional/test_workflow.py +++ b/tests/functional/test_workflow.py @@ -37,7 +37,7 @@ ) from weaver import WEAVER_ROOT_DIR from weaver.config import WeaverConfiguration -from weaver.execute import ExecuteResponse, ExecuteTransmissionMode +from weaver.execute import ExecuteResponse, ExecuteTransmissionMode, ExecuteReturnPreference from weaver.formats import ContentType from weaver.processes.constants import ( CWL_REQUIREMENT_MULTIPLE_INPUT, @@ -926,7 +926,7 @@ def workflow_runner( headers=self.headers, json=execute_body) self.assert_test(lambda: resp.json.get("status") in JOB_STATUS_CATEGORIES[StatusCategory.RUNNING], message="Response process execution job status should be one of running values.") - job_location = resp.json.get("location") + job_location = resp.location job_id = resp.json.get("jobID") self.assert_test(lambda: job_id and job_location and job_location.endswith(job_id), message="Response process execution job ID must match to validate results.") @@ -988,7 +988,9 @@ def validate_test_job_execution(self, job_location_url, user_headers=None, user_ break self.assert_test(lambda: False, message=f"Unknown job execution status: '{status}'.") path = f"{job_location_url}/results" - resp = self.request("GET", path, headers=user_headers, cookies=user_cookies, status=HTTPOk.code) + resp_headers = {"Accept": ContentType.APP_JSON, "Prefer": f"return={ExecuteReturnPreference.MINIMAL}"} + resp_headers.update(user_headers or {}) + resp = self.request("GET", path, headers=resp_headers, cookies=user_cookies, status=HTTPOk.code) return resp, details def try_retrieve_logs(self, workflow_job_url, detailed_results): diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index 3edd2af22..49f4735cc 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -903,8 +903,8 @@ def validate_process_io(process, payload): :param payload: Submitted job execution body. :raises HTTPException: Corresponding error for detected invalid combination of inputs or outputs. """ - payload_inputs = convert_input_values_schema(payload.get("inputs", {}), JobInputsOutputsSchema.OLD) - payload_outputs = convert_output_params_schema(payload.get("outputs", {}), JobInputsOutputsSchema.OLD) + payload_inputs = convert_input_values_schema(payload.get("inputs", {}), JobInputsOutputsSchema.OLD) or [] + payload_outputs = convert_output_params_schema(payload.get("outputs", {}), JobInputsOutputsSchema.OLD) or [] for io_type, io_payload, io_process in [ ("inputs", payload_inputs, process.inputs), diff --git a/weaver/processes/wps_process_base.py b/weaver/processes/wps_process_base.py index baee0877c..d3155946a 100644 --- a/weaver/processes/wps_process_base.py +++ b/weaver/processes/wps_process_base.py @@ -10,7 +10,7 @@ from weaver.base import Constants from weaver.exceptions import PackageExecutionError -from weaver.execute import ExecuteMode, ExecuteResponse, ExecuteTransmissionMode +from weaver.execute import ExecuteMode, ExecuteResponse, ExecuteTransmissionMode, ExecuteReturnPreference from weaver.formats import ContentType, repr_json from weaver.processes.constants import PACKAGE_COMPLEX_TYPES, PACKAGE_DIRECTORY_TYPE, PACKAGE_FILE_TYPE, OpenSearchField from weaver.processes.convert import get_cwl_io_type @@ -535,7 +535,8 @@ def get_results(self, monitor_reference): """ # use '/results' endpoint instead of '/outputs' to ensure support with other result_url = f"{monitor_reference}/results" - response = self.make_request(method="GET", url=result_url, retry=True) + result_headers = {"Prefer": f"return={ExecuteReturnPreference.MINIMAL}"} + response = self.make_request(method="GET", url=result_url, headers=result_headers, retry=True) response.raise_for_status() contents = response.json() diff --git a/weaver/utils.py b/weaver/utils.py index c2575c860..c0e1c3e55 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -36,6 +36,7 @@ from botocore.exceptions import ClientError, HTTPClientError from bs4 import BeautifulSoup from celery.app import Celery +from dateutil.parser import parse as parse_dt from mypy_boto3_s3.literals import RegionName from pyramid.config import Configurator from pyramid.exceptions import ConfigurationError @@ -1241,6 +1242,7 @@ def get_href_headers( href = path if not any(href.startswith(proto) for proto in ["file", "http", "https", "s3"]): href = f"file://{os.path.abspath(path)}" + href += "/" if (path.endswith("/") and not href.endswith("/")) else "" f_enc = None f_size = None f_type = None @@ -1249,9 +1251,18 @@ def get_href_headers( # handle directory if path.endswith("/"): download_headers = False - listing = fetch_directory(href, out_dir="", out_method=OutputMethod.META, settings=settings, **option_kwargs) - f_modified = sorted([get_header("Last-Modified", meta, concat=True) for meta in listing])[-1] - f_size = sum(get_header("Size", meta) for meta in listing) + dir_path = path[7:] if path.startswith("file://") else path + listing = fetch_directory( + href, + # files will not be "fetched" under the director since using 'META' output method, + # but the actual path is needed to get the file os.stats, to obtain their metadata + out_dir=dir_path, + out_method=OutputMethod.META, + settings=settings, + **option_kwargs, + ) + f_modified = parse_dt(sorted([get_header("Last-Modified", meta, concat=True) for meta in listing])[-1]) + f_size = sum(int(get_header("Content-Length", meta, default=0)) for meta in listing) f_type = ContentType.APP_DIR # handle single file @@ -1269,7 +1280,7 @@ def get_href_headers( s3_file = s3_client.head_object(Bucket=s3_bucket, Key=file_key) f_type = content_type or s3_file["ResponseMetadata"]["HTTPHeaders"]["ContentType"] f_size = s3_file["ResponseMetadata"]["HTTPHeaders"]["Size"] - f_modified = s3_file["ResponseMetadata"]["HTTPHeaders"]["LastModified"] + f_modified = parse_dt(s3_file["ResponseMetadata"]["HTTPHeaders"]["LastModified"]) except (ClientError, HTTPClientError): if not missing_ok: raise @@ -1279,7 +1290,7 @@ def get_href_headers( if resp.status_code != 200 and not missing_ok: raise ValueError(f"Could not obtain file reference metadata from [{href}]") if resp.status_code == 200: - f_modified = resp.last_modified + f_modified = parse_dt(resp.last_modified) f_type = content_type or resp.content_type f_size = resp.content_length f_enc = resp.content_encoding diff --git a/weaver/wps_restapi/colander_extras.py b/weaver/wps_restapi/colander_extras.py index 83fec8cdd..4803d3edb 100644 --- a/weaver/wps_restapi/colander_extras.py +++ b/weaver/wps_restapi/colander_extras.py @@ -2239,6 +2239,8 @@ def _deserialize_keyword(self, cstruct): # not a single valid sub-node was found if self.missing is colander.drop: return colander.drop + if self.missing is None and cstruct in [None, colander.null]: + return None # add the invalid sub-errors to the parent oneOf for reporting each error case individually invalid = colander.Invalid(node=self, msg=message, value=cstruct) diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index d8f6408bf..6d96d9d13 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -640,72 +640,71 @@ def get_job_results_response( return HTTPNoContent(headers=headers) # raw response can be data-only value, link-only or a mix of them - if results: - # if raw representation is requested and all requested outputs resolve as links - # without explicit 'accept: multipart', then all must use link headers - # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-ref - res_refs = { - out_id: bool(get_any_value(out, key=True, file=True, data=False)) - for out_id, out in results.items() - } - out_transmissions = { - out_id: get_job_output_transmission(job, out_id, is_ref) - for out_id, is_ref in res_refs.items() - } - if is_raw and not is_rep and all( - out_mode == ExecuteTransmissionMode.REFERENCE - for out_mode, _ in out_transmissions.values() - ): - headers = get_job_results_links(job, results, out_transmissions, headers=headers, settings=settings) - return HTTPNoContent(headers=headers) - - # FIXME: support ZIP or similar "container" output (https://github.com/crim-ca/weaver/issues/726) - # FIXME: support Metalink - needs by-reference only (https://github.com/crim-ca/weaver/issues/663) - # multipart response - # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-multi - # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-mixed-multi - # - https://docs.ogc.org/DRAFTS/18-062.html#per_core_job-results-async-many-other-formats - # extract data to see if it happens to be an array (i.e.: 1 output "technically", but needs multipart) - out_vals = list(results.items()) # type: List[Tuple[str, ExecutionResultValue]] # noqa - out_info = out_vals[0][-1] # type: ExecutionResultValue - out_data = get_any_value(out_info) - if ( - (len(results) + len(refs)) > 1 or - (isinstance(out_data, list) and len(out_data) > 1) or # single output is an array, needs multipart - is_accept_multipart - ): - # FIXME: remove links backtrack not needed anymore - pass results directly - # backtrack link references that were generated if 'Accept: multipart/*' was omitted - # while using 'response=raw' leading to at least 1 by-value output - # (must force multipart with empty-part for links to respect OGC API - Processes v1.0) - # for ref in refs: - # ref_link = parse_link_header(ref[-1]) - # results[ref_link["rel"]] = ref_link - # # attempt sort by original results ordering to generate multipart contents consistently - # out_order = list(convert_output_params_schema(job.results, JobInputsOutputsSchema.OGC)) - # res_order = {out_id: results[out_id] for out_id in out_order if out_id in results} - # res_array = sorted(set(results) - set(res_order)) # in case of 'out.idx' employed for arrays - # res_order.update({out_id: results[out_id] for out_id in res_array}) # if missing link arrays - #return get_job_results_multipart(job, res_order, headers=headers, settings=settings) - return get_job_results_multipart(job, results, headers=headers, settings=settings) - - # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-one - res_id = out_vals[0][0] - # FIXME: add transform for requested output format (https://github.com/crim-ca/weaver/pull/548) - # req_fmt = guess_target_format(container) where container=request - # out_fmt (see above) - # out_type = result.get("type") - # out_select = req_fmt or out_fmt or out_type (resolution order/precedence) - out_fmt = None - return get_job_results_single(job, out_info, res_id, out_fmt, headers=headers, settings=settings) + # if raw representation is requested and all requested outputs resolve as links + # without explicit 'accept: multipart', then all must use link headers + # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-ref + res_refs = { + out_id: bool(get_any_value(out, key=True, file=True, data=False)) + for out_id, out in results.items() + } + out_transmissions = { + out_id: get_job_output_transmission(job, out_id, is_ref) + for out_id, is_ref in res_refs.items() + } + if is_raw and not is_rep and all( + out_mode == ExecuteTransmissionMode.REFERENCE + for out_mode, _ in out_transmissions.values() + ): + headers = get_job_results_links(job, results, out_transmissions, headers=headers, settings=settings) + return HTTPNoContent(headers=headers) - # FIXME: this else is impossible, remove 'if results' above and dedent - else: - resp = HTTPOk(headers=headers) - if refs: + # FIXME: support ZIP or similar "container" output (https://github.com/crim-ca/weaver/issues/726) + # FIXME: support Metalink - needs by-reference only (https://github.com/crim-ca/weaver/issues/663) + # multipart response + # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-multi + # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-mixed-multi + # - https://docs.ogc.org/DRAFTS/18-062.html#per_core_job-results-async-many-other-formats + # extract data to see if it happens to be an array (i.e.: 1 output "technically", but needs multipart) + out_vals = list(results.items()) # type: List[Tuple[str, ExecutionResultValue]] # noqa + out_info = out_vals[0][-1] # type: ExecutionResultValue + out_data = get_any_value(out_info) + if ( + (len(results) + len(refs)) > 1 or + (isinstance(out_data, list) and len(out_data) > 1) or # single output is an array, needs multipart + is_accept_multipart + ): + # FIXME: remove links backtrack not needed anymore - pass results directly + # backtrack link references that were generated if 'Accept: multipart/*' was omitted + # while using 'response=raw' leading to at least 1 by-value output + # (must force multipart with empty-part for links to respect OGC API - Processes v1.0) + # for ref in refs: + # ref_link = parse_link_header(ref[-1]) + # results[ref_link["rel"]] = ref_link + # # attempt sort by original results ordering to generate multipart contents consistently + # out_order = list(convert_output_params_schema(job.results, JobInputsOutputsSchema.OGC)) + # res_order = {out_id: results[out_id] for out_id in out_order if out_id in results} + # res_array = sorted(set(results) - set(res_order)) # in case of 'out.idx' employed for arrays + # res_order.update({out_id: results[out_id] for out_id in res_array}) # if missing link arrays + #return get_job_results_multipart(job, res_order, headers=headers, settings=settings) + return get_job_results_multipart(job, results, headers=headers, settings=settings) + + # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-one + res_id = out_vals[0][0] + # FIXME: add transform for requested output format (https://github.com/crim-ca/weaver/pull/548) + # req_fmt = guess_target_format(container) where container=request + # out_fmt (see above) + # out_type = result.get("type") + # out_select = req_fmt or out_fmt or out_type (resolution order/precedence) + out_fmt = None + return get_job_results_single(job, out_info, res_id, out_fmt, headers=headers, settings=settings) - resp.headerlist.extend(refs) - return resp + # FIXME: this else is impossible, remove 'if results' above and dedent + # else: + # resp = HTTPOk(headers=headers) + # if refs: + # + # resp.headerlist.extend(refs) + # return resp def generate_or_resolve_result( @@ -778,8 +777,9 @@ def generate_or_resolve_result( with open(loc, mode="w", encoding="utf-8") as out_file: out_file.write(data2str(val)) - if is_ref and output_mode == ExecuteTransmissionMode.VALUE: - res_data = io.FileIO(loc, mode="rb") + if is_ref and output_mode == ExecuteTransmissionMode.VALUE and typ != ContentType.APP_DIR: + res_path = loc[7:] if loc.startswith("file://") else loc + res_data = io.FileIO(res_path, mode="rb") res_headers = get_href_headers( loc, diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index aae08425f..96697e165 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -4149,7 +4149,8 @@ class ExecuteInputOutputs(ExtendedMappingSchema): ), # NOTE: # Explicitly submitted {} or [] means that *no outputs* are requested. - # This must be distinguished from 'all outputs' requested, which is done by omiting 'outputs' field entirely. + # This must be distinguished from 'all outputs' requested, which is done by omitting 'outputs' field entirely. + missing=None, default=None, ) From 4a65359f2e099134f1f2288d60ffdcb7b2575907 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 8 Oct 2024 00:51:34 -0400 Subject: [PATCH 45/75] fix parsing of prefer header with comma/semi-column variants --- tests/test_execute.py | 4 ++-- weaver/execute.py | 8 +++++--- weaver/utils.py | 14 ++++++++++++++ 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/tests/test_execute.py b/tests/test_execute.py index 4cace41a0..d942fed31 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -63,10 +63,10 @@ def test_prefer_header_execute_mode(headers, support, expected, extra_prefer): ["headers", "expected"], [ # 1st variant is considered as 1 Prefer header with all values supplied simultaneously + ({"Prefer": "respond-async; wait=4"}, (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), # 2nd variant is considered as 2 Prefer headers, each with their respective value - # (this is because urllib, under the hood, concatenates the list of header-values using ';' separator) + # (this is because urllib, under the hood, concatenates the list of header-values using ',' separator) ({"Prefer": "respond-async, wait=4"}, (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), - ({"Prefer": "respond-async; wait=4"}, (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), ] ) def test_parse_prefer_header_execute_mode_flexible(headers, expected): diff --git a/weaver/execute.py b/weaver/execute.py index a3649ca63..9d3b7b60a 100644 --- a/weaver/execute.py +++ b/weaver/execute.py @@ -4,7 +4,7 @@ from pyramid.httpexceptions import HTTPBadRequest from weaver.base import Constants -from weaver.utils import get_header, parse_kvp +from weaver.utils import get_header, parse_kvp, transform_json if TYPE_CHECKING: from typing import List, Optional, Union, Tuple @@ -160,7 +160,9 @@ def parse_prefer_header_execute_mode( # /req/core/process-execute-default-execution-mode (C) return ExecuteMode.SYNC, wait_max, {} - params = parse_kvp(prefer, pair_sep=",", multi_value_sep=None) + params1 = parse_kvp(prefer, pair_sep=",", multi_value_sep=None) + params2 = parse_kvp(prefer, pair_sep=";", multi_value_sep=None) + params = transform_json(params1, extend=params2) wait = wait_max if "wait" in params: try: @@ -169,7 +171,7 @@ def parse_prefer_header_execute_mode( # since 'wait' is the only referenced that users integers, it is guaranteed to be a misuse raise ValueError("Invalid 'wait' with comma-separated values.") if not len(params["wait"]) == 1: - raise ValueError("Too many values.") + raise ValueError("Too many 'wait' values.") wait = params["wait"][0] if not str.isnumeric(wait) or "." in wait or wait.startswith("-"): raise ValueError("Invalid integer for 'wait' in seconds.") diff --git a/weaver/utils.py b/weaver/utils.py index c0e1c3e55..cdf2e4671 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -3891,6 +3891,7 @@ def transform_json(json_data, # type: Dict[str, JSON] rename=None, # type: Optional[Dict[AnyKey, Any]] remove=None, # type: Optional[List[AnyKey]] add=None, # type: Optional[Dict[AnyKey, Any]] + extend=None, # type: Optional[Dict[AnyKey, Any]] replace_values=None, # type: Optional[Dict[AnyKey, Any]] replace_func=None, # type: Optional[Dict[AnyKey, Callable[[Any], Any]]] ): # type: (...) -> Dict[str, JSON] @@ -3913,6 +3914,7 @@ def transform_json(json_data, # type: Dict[str, JSON] :param rename: rename matched fields key name to the associated value name. :param remove: remove matched fields by name. :param add: add or override the fields names with associated values. + :param extend: add or extend the fields names with associated values. :param replace_values: replace matched values by the associated new values regardless of field names. :param replace_func: Replace values under matched fields by name with the returned value from the associated function. @@ -3923,6 +3925,7 @@ def transform_json(json_data, # type: Dict[str, JSON] rename = rename or {} remove = remove or [] add = add or {} + extend = extend or {} replace_values = replace_values or {} replace_func = replace_func or {} @@ -3939,6 +3942,17 @@ def transform_json(json_data, # type: Dict[str, JSON] for k, v in add.items(): json_data[k] = v + # extend + for k, v in extend.items(): + v = v if isinstance(v, list) else [v] + if k in json_data: + if isinstance(json_data[k], list): + json_data[k].extend(v) + else: + json_data[k] = [json_data[k]] + v + else: + json_data[k] = v + # replace values for key, value in json_data.items(): for old_value, new_value in replace_values.items(): From 9eac571beb7d6f0c31351d283f126ca28d4d9c90 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 8 Oct 2024 01:10:34 -0400 Subject: [PATCH 46/75] handle backward compatible output transmissionMode lookup --- weaver/processes/convert.py | 1 + 1 file changed, 1 insertion(+) diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index e5ad521d6..6d18ab928 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -1957,6 +1957,7 @@ def convert_output_params_schema(outputs, schema): if schema == JobInputsOutputsSchema.OGC: out_dict = {} for out in outputs: + out = dict(out) # type: ignore # avoid modifying reference out_id = get_any_id(out, pop=True) out_dict[out_id] = out return out_dict From 545de42dcb46455433d563f5a522d62e4a8a8767 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 8 Oct 2024 02:38:02 -0400 Subject: [PATCH 47/75] =?UTF-8?q?[wip]=C2=A0update=20tests=20with=20new=20?= =?UTF-8?q?job=20return=20behaviors?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/functional/test_builtin.py | 116 ++++++++++++++++++--------- tests/functional/test_wps_package.py | 2 +- weaver/execute.py | 1 + 3 files changed, 82 insertions(+), 37 deletions(-) diff --git a/tests/functional/test_builtin.py b/tests/functional/test_builtin.py index bbef70fba..d23b94272 100644 --- a/tests/functional/test_builtin.py +++ b/tests/functional/test_builtin.py @@ -27,6 +27,8 @@ from weaver.wps_restapi import swagger_definitions as sd if TYPE_CHECKING: + from typing import Any, Tuple + from weaver.typedefs import ExecutionInputs, ExecutionOutputs, ExecutionResults, JSON, ProcessExecution @@ -112,6 +114,7 @@ def test_jsonarray2netcdf_describe_ogc_schema(self): assert body["outputTransmission"] == [ExecuteTransmissionMode.REFERENCE, ExecuteTransmissionMode.VALUE] def setup_jsonarray2netcdf_inputs(self, stack, use_temp_file=False): + # type: (contextlib.ExitStack[Any], bool) -> Tuple[JSON, str] if use_temp_file: dir_path = tempfile.gettempdir() url_path = f"file://{dir_path}" @@ -449,7 +452,7 @@ def test_echo_process_describe(self): ] def setup_echo_process_execution_body(self, stack): - # type: (contextlib.ExitStack) -> ProcessExecution + # type: (contextlib.ExitStack[Any]) -> ProcessExecution tmp_dir = stack.enter_context(tempfile.TemporaryDirectory()) # pylint: disable=R1732 tmp_feature_collection_geojson = stack.enter_context( tempfile.NamedTemporaryFile(suffix=".geojson", mode="w", dir=tmp_dir) # pylint: disable=R1732 @@ -547,6 +550,12 @@ def setup_echo_process_execution_body(self, stack): } } } + # ensure outputs are not filtered, request all explicitly, + # but auto-resolve transmissionMode/format for missing ones + missing_outputs = {out.replace("Input", "Output") for out in inputs} + for out in missing_outputs: + if out not in outputs: + outputs[out] = {} body = { "inputs": inputs, "outputs": outputs, @@ -597,8 +606,8 @@ def test_echo_process_execute_inputs_valid_schema(self): io_val.update(io_defaults[io_key]) assert payload == body - def validate_echo_process_results(self, results, inputs): - # type: (ExecutionResults, ExecutionInputs) -> None + def validate_echo_process_results(self, results, inputs, outputs): + # type: (ExecutionResults, ExecutionInputs, ExecutionOutputs) -> None """ Validate that the outputs from the example ``EchoProcess``. @@ -626,39 +635,44 @@ def validate_echo_process_results(self, results, inputs): "arrayOutput", ]: in_id = out_id.replace("Output", "Input") - out_val = results[out_id].get("value", results[out_id]) - assert out_val == inputs[in_id] + if isinstance(results[out_id], dict) and "value" in results[out_id]: + res_val = results[out_id].get("value", results[out_id]) + else: + res_val = results[out_id] + assert res_val == inputs[in_id] # special literal/bbox object handling - for out_id, out_fields_map in [ + for out_id, res_fields_map in [ ( "measureOutput", [ - (["value", "measurement"], ["value"]), + (["value", "measurement"], []), # ["value"]), # now returned directly for literal ] ), ( "boundingBoxOutput", [ - (["bbox"], ["value", "bbox"]), - (Crs(inputs["boundingBoxInput"]["crs"]).getcodeurn(), ["value", "crs"]), + (["bbox"], ["bbox"]), + (Crs(inputs["boundingBoxInput"]["crs"]).getcodeurn(), ["crs"]), ] ), ]: in_id = out_id.replace("Output", "Input") - for field_map in out_fields_map: + for field_map in res_fields_map: in_val_nested = inputs[in_id] - out_val_nested = results[out_id] + res_val_nested = results[out_id] if isinstance(field_map[0], list): for nested_field in field_map[0]: in_val_nested = in_val_nested[nested_field] else: in_val_nested = field_map[0] for nested_field in field_map[1]: - out_val_nested = out_val_nested[nested_field] - assert out_val_nested == in_val_nested + res_val_nested = res_val_nested[nested_field] + assert res_val_nested == in_val_nested # complex outputs, contents should be the same, but stage-out URL is expected + outputs = copy.deepcopy(outputs) + outputs = {out["id"]: out for out in outputs} if isinstance(outputs, list) else outputs for out_id in [ "complexObjectOutput", "geometryOutput", @@ -667,20 +681,16 @@ def validate_echo_process_results(self, results, inputs): ]: in_id = out_id.replace("Output", "Input") in_items = copy.deepcopy(inputs[in_id]) - out_items = copy.deepcopy(results[out_id]) + out_items = copy.deepcopy(outputs[out_id]) + res_items = copy.deepcopy(results[out_id]) in_items = [in_items] if isinstance(in_items, dict) else in_items out_items = [out_items] if isinstance(out_items, dict) else out_items - assert len(in_items) == len(out_items) - for in_def, out_def in zip(in_items, out_items): - assert "href" in out_def + res_items = [res_items] if isinstance(res_items, dict) else res_items + assert len(in_items) == len(res_items) + for in_def, out_def, res_def in zip(in_items, out_items, res_items): # inputs use local paths (mocked by test for "remote" locations) or literal JSON in_path = in_def.pop("href", None) in_path = in_path[7:] if str(in_path).startswith("file://") else in_path - out_url = out_def.pop("href") # compare the rest of the metadata after - out_path = map_wps_output_location(out_url, self.settings, url=False) - # use binary comparison since some contents are binary and others not - with open(out_path, mode="rb") as out_file: - out_data = out_file.read() in_as_data = not in_path if in_as_data: in_data = in_def.pop("value") @@ -689,18 +699,47 @@ def validate_echo_process_results(self, results, inputs): else: with open(in_path, mode="rb") as in_file: in_data = in_file.read() - assert out_data == in_data - # even if the input was provided directly as JSON, - # the output will be provided as reference (return=minimal) - if in_def != {}: - in_type = in_def["mediaType"] if in_as_data else in_def["type"] - assert out_def["type"] == in_type, ( - "Since explicit format was specified, the same is expected as output" - ) + + # validate output result against requested output transmission mode + out_mode = out_def.get("transmissionMode", ExecuteTransmissionMode.REFERENCE) + if out_mode == ExecuteTransmissionMode.REFERENCE: + assert "href" in res_def + assert "value" not in res_def + res_url = res_def.pop("href") # compare the rest of the metadata after + res_path = map_wps_output_location(res_url, self.settings, url=False) + # use binary comparison since some contents are binary and others not + with open(res_path, mode="rb") as res_file: + res_data = res_file.read() + assert res_data == in_data + # even if the input was provided directly as JSON, + # the output will be provided as reference (return=minimal) + if in_def != {}: + in_type = in_def["mediaType"] if in_as_data else in_def["type"] + assert res_def["type"] == in_type, ( + "Since explicit format was specified, the same is expected as output" + ) + else: + assert res_def["type"] == ContentType.APP_JSON, ( + "Since no explicit format was specified, at least needs to be JSON" + ) else: - assert out_def["type"] == ContentType.APP_JSON, ( - "Since no explicit format was specified, at least needs to be JSON" - ) + assert "href" not in res_def + assert "value" in res_def + res_data = res_def.pop("value") # compare the rest of the metadata after + res_data = (json.dumps(res_data) if isinstance(res_data, dict) else res_data).encode() + res_data = ContentEncoding.decode(res_data) if in_def.get("encoding") == "base64" else res_data + assert res_data == in_data + # even if the input was provided directly as JSON, + # the output will be provided as reference (return=minimal) + if in_def != {}: + in_type = in_def["mediaType"] if in_as_data else in_def["type"] + assert res_def["mediaType"] == in_type, ( + "Since explicit format was specified, the same is expected as output" + ) + else: + assert res_def["mediaType"] == ContentType.APP_JSON, ( + "Since no explicit format was specified, at least needs to be JSON" + ) def test_echo_process_execute_sync(self): """ @@ -728,14 +767,19 @@ def test_echo_process_execute_sync(self): # since sync, results are directly available instead of job status # even if results are returned directly (instead of status), # status location link is available for reference as needed - assert "Location" in resp.headers + assert "Location" not in resp.headers + link_headers = [ref for hdr, ref in resp.headerlist if hdr == "Link"] + link_relations = ["status", "monitor"] + link_job_status = [link for link in link_headers if any(f"rel=\"{rel}\"" in link for rel in link_relations)] + assert len(link_job_status) == len(link_relations) + # validate sync was indeed applied (in normal situation, not considering mock test that runs in sync) assert resp.headers["Preference-Applied"] == headers["Prefer"] # following details should not be available since results are returned in sync instead of async job status for field in ["status", "created", "finished", "duration", "progress"]: assert field not in resp.json results = resp.json - self.validate_echo_process_results(results, body["inputs"]) + self.validate_echo_process_results(results, body["inputs"], body["outputs"]) def test_echo_process_execute_async(self): """ @@ -773,7 +817,7 @@ def test_echo_process_execute_async(self): job_url = resp.json["location"] results = self.monitor_job(job_url) - self.validate_echo_process_results(results, body["inputs"]) + self.validate_echo_process_results(results, body["inputs"], body["outputs"]) def test_jsonarray2netcdf_process(): diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index dcd404f8c..7061fb79a 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -2267,7 +2267,7 @@ def test_execute_job_with_bbox(self): expect_bbox = {"bbox": bbox["bbox"], "crs": "urn:ogc:def:crs:OGC:1.3:CRS84"} assert results assert "bboxOutput" in results - assert results["bboxOutput"]["value"] == expect_bbox, ( + assert results["bboxOutput"] == expect_bbox, ( "Expected the BBOX CRS URI to be interpreted and validated by known WPS definitions." ) diff --git a/weaver/execute.py b/weaver/execute.py index 9d3b7b60a..1814fb5f5 100644 --- a/weaver/execute.py +++ b/weaver/execute.py @@ -170,6 +170,7 @@ def parse_prefer_header_execute_mode( # 'wait=x,y,z' parsed as 'wait=x' and 'y' / 'z' parameters on their own # since 'wait' is the only referenced that users integers, it is guaranteed to be a misuse raise ValueError("Invalid 'wait' with comma-separated values.") + params["wait"] = list(set(params["wait"])) # allow duplicates silently because of extend/merge strategy if not len(params["wait"]) == 1: raise ValueError("Too many 'wait' values.") wait = params["wait"][0] From b0b61c9445fe7d6562407a447e9def31719a8a0e Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 8 Oct 2024 13:01:49 -0400 Subject: [PATCH 48/75] fix resolution of S3 references from job response returns --- tests/functional/test_wps_package.py | 4 +-- tests/functional/test_wps_provider.py | 4 +-- tests/functional/utils.py | 2 +- tests/wps_restapi/test_jobs.py | 26 ++++++++++------- weaver/utils.py | 40 ++++++++++++++------------- weaver/wps_restapi/jobs/utils.py | 28 +++++++++++++------ 6 files changed, 62 insertions(+), 42 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 7061fb79a..44345977e 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -5520,8 +5520,8 @@ def test_execute_application_package_process_with_bucket_results(self): {"id": "input_with_s3", "href": test_bucket_ref}, ], "outputs": [ - {"id": "output_from_http", "transmissionMode": ExecuteTransmissionMode.VALUE}, - {"id": "output_from_s3", "transmissionMode": ExecuteTransmissionMode.VALUE}, + {"id": "output_from_http", "transmissionMode": ExecuteTransmissionMode.REFERENCE}, + {"id": "output_from_s3", "transmissionMode": ExecuteTransmissionMode.REFERENCE}, ] } with contextlib.ExitStack() as stack_exec: diff --git a/tests/functional/test_wps_provider.py b/tests/functional/test_wps_provider.py index 235669964..e7a2566d4 100644 --- a/tests/functional/test_wps_provider.py +++ b/tests/functional/test_wps_provider.py @@ -181,7 +181,7 @@ def test_register_describe_execute_ncdump(self, mock_responses): "mode": ExecuteMode.ASYNC, "response": ExecuteResponse.DOCUMENT, "inputs": [{"id": "dataset", "href": exec_file}], - "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.VALUE}] + "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.REFERENCE}] } status_url = f"{resources.TEST_REMOTE_SERVER_URL}/status.xml" output_url = f"{resources.TEST_REMOTE_SERVER_URL}/output.txt" @@ -224,7 +224,7 @@ def test_register_describe_execute_ncdump(self, mock_responses): wps_url = self.settings["weaver.wps_output_url"] output_url = f"{wps_url}/{job_id}/output/output.txt" output_path = f"{wps_dir}/{job_id}/output/output.txt" - assert results["output"]["format"]["mediaType"] == ContentType.TEXT_PLAIN + assert results["output"]["type"] == ContentType.TEXT_PLAIN assert results["output"]["href"] == output_url with open(output_path, mode="r", encoding="utf-8") as out_file: data = out_file.read() diff --git a/tests/functional/utils.py b/tests/functional/utils.py index 21712a836..9e19d1ce2 100644 --- a/tests/functional/utils.py +++ b/tests/functional/utils.py @@ -28,7 +28,7 @@ from weaver.datatype import Job from weaver.formats import ContentType from weaver.processes.builtin import get_builtin_reference_mapping -from weaver.processes.constants import ProcessSchema +from weaver.processes.constants import JobInputsOutputsSchema, ProcessSchema from weaver.processes.wps_package import get_application_requirement from weaver.status import Status from weaver.utils import fully_qualified_name, get_weaver_url, load_file diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index ea89d50d6..bcfc0b92e 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -1531,6 +1531,12 @@ def test_job_results_errors(self): def test_jobs_inputs_outputs_validations(self): """ Ensure that inputs/outputs submitted or returned can be represented and validated across various formats. + + .. versionchanged:: 6.0 + The ``response`` parameter does not use ``document`` by default anymore. + Internally, the ``document`` remains the default strategy if none was specified, + but allow detecting explicitly when this parameter is omitted, since alternative + using the ``Prefer: return`` header can be employed as well. """ default_trans_mode = {"transmissionMode": ExecuteTransmissionMode.VALUE} @@ -1541,7 +1547,7 @@ def test_jobs_inputs_outputs_validations(self): "inputs": {}, "outputs": None, "mode": ExecuteMode.AUTO, - "response": ExecuteResponse.DOCUMENT + # "response": ExecuteResponse.DOCUMENT } job_in_none = sd.Execute().deserialize({"outputs": {"random": default_trans_mode}}) @@ -1551,7 +1557,7 @@ def test_jobs_inputs_outputs_validations(self): "inputs": {}, "outputs": {"random": default_trans_mode}, "mode": ExecuteMode.AUTO, - "response": ExecuteResponse.DOCUMENT + # "response": ExecuteResponse.DOCUMENT } job_in_empty_dict = sd.Execute().deserialize({"inputs": {}, "outputs": {"random": default_trans_mode}}) @@ -1561,7 +1567,7 @@ def test_jobs_inputs_outputs_validations(self): "inputs": {}, "outputs": {"random": default_trans_mode}, "mode": ExecuteMode.AUTO, - "response": ExecuteResponse.DOCUMENT + # "response": ExecuteResponse.DOCUMENT } job_in_empty_list = sd.Execute().deserialize({"inputs": [], "outputs": {"random": default_trans_mode}}) @@ -1571,7 +1577,7 @@ def test_jobs_inputs_outputs_validations(self): "inputs": [], "outputs": {"random": default_trans_mode}, "mode": ExecuteMode.AUTO, - "response": ExecuteResponse.DOCUMENT + # "response": ExecuteResponse.DOCUMENT } job_out_none = sd.Execute().deserialize({"inputs": {"random": "ok"}}) @@ -1581,7 +1587,7 @@ def test_jobs_inputs_outputs_validations(self): "inputs": {"random": "ok"}, "outputs": None, "mode": ExecuteMode.AUTO, - "response": ExecuteResponse.DOCUMENT + # "response": ExecuteResponse.DOCUMENT } job_out_empty_dict = sd.Execute().deserialize({"inputs": {"random": "ok"}, "outputs": {}}) @@ -1591,7 +1597,7 @@ def test_jobs_inputs_outputs_validations(self): "inputs": {"random": "ok"}, "outputs": {}, "mode": ExecuteMode.AUTO, - "response": ExecuteResponse.DOCUMENT + # "response": ExecuteResponse.DOCUMENT } job_out_empty_list = sd.Execute().deserialize({"inputs": {"random": "ok"}, "outputs": []}) @@ -1601,7 +1607,7 @@ def test_jobs_inputs_outputs_validations(self): "inputs": {"random": "ok"}, "outputs": [], "mode": ExecuteMode.AUTO, - "response": ExecuteResponse.DOCUMENT + # "response": ExecuteResponse.DOCUMENT } job_out_defined = sd.Execute().deserialize({ @@ -1614,7 +1620,7 @@ def test_jobs_inputs_outputs_validations(self): "inputs": {"random": "ok"}, "outputs": {"random": {"transmissionMode": ExecuteTransmissionMode.REFERENCE}}, "mode": ExecuteMode.AUTO, - "response": ExecuteResponse.DOCUMENT + # "response": ExecuteResponse.DOCUMENT } with self.assertRaises(colander.Invalid): @@ -1768,7 +1774,7 @@ def test_job_statistics_response(self): {"value": 3, "mediaType": ContentType.APP_YAML} ]}, {"test": [ - {"value": "1", "mediaType": ContentType.APP_JSON}, + {"value": 1, "mediaType": ContentType.APP_JSON}, # special JSON case, it is loaded inline in document {"value": "2", "mediaType": "text/special"}, {"value": "3", "mediaType": ContentType.APP_YAML} ]}, @@ -1777,5 +1783,5 @@ def test_job_statistics_response(self): ) def test_get_job_results_document(results, expected): job = Job(task_id="test", outputs={}) - output = get_job_results_document(job, results, container={}) + output = get_job_results_document(job, results, settings={}) assert output == expected diff --git a/weaver/utils.py b/weaver/utils.py index cdf2e4671..49e890b19 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -95,7 +95,7 @@ TypeVar, Union ) - from typing_extensions import NotRequired, TypeAlias, TypedDict, TypeGuard, Unpack + from typing_extensions import NotRequired, Required, TypeAlias, TypedDict, TypeGuard, Unpack from mypy_boto3_s3.client import S3Client @@ -119,7 +119,6 @@ HeadersType, JSON, KVP, - KVP_Item, Link, Literal, Number, @@ -160,9 +159,10 @@ "Date": str, "Last-Modified": str, "Content-ID": NotRequired[str], - "Content-Type": NotRequired[str], + "Content-Type": Required[str], "Content-Length": NotRequired[str], - "Content-Location": NotRequired[str], + "Content-Encoding": NotRequired[str], + "Content-Location": str, "Content-Disposition": NotRequired[str], }, total=False) _OutputMethod = "OutputMethod" # type: TypeAlias # pylint: disable=C0103,invalid-name @@ -1261,8 +1261,11 @@ def get_href_headers( settings=settings, **option_kwargs, ) - f_modified = parse_dt(sorted([get_header("Last-Modified", meta, concat=True) for meta in listing])[-1]) - f_size = sum(int(get_header("Content-Length", meta, default=0)) for meta in listing) + if listing: + f_modified = parse_dt(sorted([get_header("Last-Modified", meta, concat=True) for meta in listing])[-1]) + f_size = sum(int(get_header("Content-Length", meta, default=0)) for meta in listing) + else: # either empty directory, filtered contents, or failed to retrieve listing + f_size = "0" f_type = ContentType.APP_DIR # handle single file @@ -1273,14 +1276,17 @@ def get_href_headers( if path.startswith("s3://") or path.startswith("https://s3."): try: + s3_region = None + if path.startswith("https://s3."): + path, s3_region = resolve_s3_from_http(path) s3_params = resolve_s3_http_options(**options["http"], **kwargs) - s3_region = options["s3"].pop("region_name", None) + s3_region = s3_region or options["s3"].pop("region_name", None) s3_client = boto3.client("s3", region_name=s3_region, **s3_params) # type: S3Client s3_bucket, file_key = path[5:].split("/", 1) s3_file = s3_client.head_object(Bucket=s3_bucket, Key=file_key) - f_type = content_type or s3_file["ResponseMetadata"]["HTTPHeaders"]["ContentType"] - f_size = s3_file["ResponseMetadata"]["HTTPHeaders"]["Size"] - f_modified = parse_dt(s3_file["ResponseMetadata"]["HTTPHeaders"]["LastModified"]) + f_type = content_type or s3_file["ContentType"] + f_size = s3_file["ContentLength"] + f_modified = s3_file["LastModified"] except (ClientError, HTTPClientError): if not missing_ok: raise @@ -2630,7 +2636,7 @@ def fetch_file(file_reference, # type: str settings=None, # type: Optional[AnySettingsContainer] callback=None, # type: Optional[Callable[[str], None]] **option_kwargs, # type: Unpack[Union[SchemeOptions, RequestOptions]] - ): # type: (...) -> str + ): # type: (...) -> Path """ Fetches a file from local path, AWS-S3 bucket or remote URL, and dumps its content to the output directory. @@ -2717,7 +2723,7 @@ def fetch_file(file_reference, # type: str def adjust_file_local(file_reference, file_outdir, out_method): - # type: (str, str, OutputMethod) -> AnyOutputResult + # type: (str, str, OutputMethod) -> Path """ Adjusts the input file reference to the output location with the requested handling method. @@ -2955,21 +2961,17 @@ def fetch_files_s3(location, # type: str s3_files = filter_directory_patterns(s3_files, include, exclude, matcher, key=lambda _file: _file["Key"]) if out_method == OutputMethod.META: - # FIXME: extra metadata needed? - # Key/Size/LastModified available from listing directly - # ContentType needs head object additional request per item - # s3_meta = (s3_client.head_object(Bucket=s3_bucket, Key=file_key) for file_key in s3_files) - # s3_meta = (file_meta["ResponseMetadata"]["HTTPHeaders"] for file_meta in s3_meta) + s3_files = list(s3_files) # ensure generator is not pre-exhausted by following loop for file_meta in s3_files: # type: MetadataResult file_key = file_meta.pop("Key") file_meta["Content-Location"] = f"{base_url}{file_key}" - return list(s3_files) + return s3_files s3_files = [file["Key"] for file in s3_files] # create directories in advance to avoid potential errors in case many workers try to generate the same one base_url = base_url.rstrip("/") - sub_dirs = {os.path.split(path)[0] for path in s3_files if "://" not in path or path.startswith(base_url)} + sub_dirs = {os.path.split(str(path))[0] for path in s3_files if "://" not in path or path.startswith(base_url)} sub_dirs = [os.path.join(out_dir, path.replace(base_url, "").lstrip("/")) for path in sub_dirs] for _dir in reversed(sorted(sub_dirs)): os.makedirs(_dir, exist_ok=True) diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 6d96d9d13..4f3713d81 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -42,6 +42,7 @@ from weaver.store.base import StoreJobs, StoreProcesses, StoreServices from weaver.utils import ( data2str, + fetch_file, get_any_id, get_any_value, get_header, @@ -744,18 +745,19 @@ def generate_or_resolve_result( # work with local files (since we have them), to avoid unnecessary loopback request # then, rewrite the locations after generating their headers to obtain the final result URL - # FIXME: Handle S3 output storage. Should multipart response even be allowed in this case? - if is_ref: url = val typ = result.get("type") # expected for typical link, but also check media-type variants in case pre-converted typ = typ or get_field(result, "mime_type", search_variations=True, default=ContentType.APP_OCTET_STREAM) job_out_url = job.result_path(output_id=output_id) + wps_out_url = get_wps_output_url(settings) if url.startswith(f"/{job_out_url}/"): # job "relative" path - out_url = get_wps_output_url(settings) - url = os.path.join(out_url, url[1:]) - loc = map_wps_output_location(url, settings, exists=True, url=False) - loc = get_secure_path(loc) + url = os.path.join(wps_out_url, url[1:]) + if url.startswith(wps_out_url): + loc = map_wps_output_location(url, settings, exists=True, url=False) + loc = get_secure_path(loc) + else: + loc = url # remote storage, S3, etc. else: typ = get_field(result, "mime_type", search_variations=True, default=ContentType.TEXT_PLAIN) @@ -766,6 +768,8 @@ def generate_or_resolve_result( loc = os.path.join(out_dir, job_path) loc = get_secure_path(loc) url = map_wps_output_location(loc, settings, exists=False, url=True) + loc = loc[7:] if loc.startswith("file://") else loc + is_local = loc.startswith("/") if is_val and output_mode == ExecuteTransmissionMode.VALUE: res_data = io.StringIO() @@ -778,7 +782,15 @@ def generate_or_resolve_result( out_file.write(data2str(val)) if is_ref and output_mode == ExecuteTransmissionMode.VALUE and typ != ContentType.APP_DIR: - res_path = loc[7:] if loc.startswith("file://") else loc + res_path = loc + if not is_local: + # reference is a remote file, but by-value requested explicitly + # try to retrieve its content locally to return it + wps_out_dir = get_wps_output_dir(settings) + job_out_dir = job.result_path(output_id=output_id) + job_out_dir = os.path.join(wps_out_dir, job_out_dir) + os.makedirs(job_out_dir, exist_ok=True) + res_path = fetch_file(res_path, job_out_dir, settings=settings) res_data = io.FileIO(res_path, mode="rb") res_headers = get_href_headers( @@ -797,7 +809,7 @@ def generate_or_resolve_result( if output_mode == ExecuteTransmissionMode.REFERENCE: res_data = None res_headers["Content-Length"] = "0" - if not os.path.exists(loc): + if not os.path.exists(loc) and is_local: res_headers.pop("Content-Location", None) return res_headers, res_data From 4ad2c760e6e085b409870f78a50ff726a0b5ff0f Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 8 Oct 2024 13:12:45 -0400 Subject: [PATCH 49/75] fix WPS execute dispatch to job results response --- tests/functional/test_wps_package.py | 6 ++++-- weaver/processes/convert.py | 4 +++- weaver/wps/service.py | 7 ++++++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 44345977e..ce6f6ca70 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -1922,8 +1922,10 @@ def test_execute_job_with_accept_languages(self): else: # job not even created assert code == 406, f"Error code should indicate not acceptable header for: [{lang}]" - desc = resp.json.get("description") - assert "language" in desc and lang in desc, "Expected error description to indicate bad language" + detail = resp.json.get("detail") + assert "language" in detail and lang in detail, ( + "Expected error description to indicate bad language" + ) @mocked_aws_config @mocked_aws_s3 diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index 6d18ab928..ae6e675c7 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -3389,11 +3389,13 @@ def wps2json_job_payload(wps_request, wps_process): else: data_output = wps_request.outputs[oid] if as_ref: - data_output["transmissionMode"] = ExecuteTransmissionMode.VALUE + data_output["transmissionMode"] = ExecuteTransmissionMode.REFERENCE else: data_output["transmissionMode"] = ExecuteTransmissionMode.VALUE data_output["id"] = oid data["outputs"].append(data_output) + if not data["outputs"]: + data.pop("outputs") # ensure not 'no output' filter return data diff --git a/weaver/wps/service.py b/weaver/wps/service.py index 84359d510..a5ffe306e 100644 --- a/weaver/wps/service.py +++ b/weaver/wps/service.py @@ -208,10 +208,15 @@ def _submit_job(self, wps_request): args = get_request_args(req) tags = args.get("tags", "").split(",") + ["xml", f"wps-{wps_request.version}"] data = wps2json_job_payload(wps_request, wps_process) + headers = dict(req.headers) + headers.update({ + "Accept": ContentType.APP_JSON, + "Content-Type": ContentType.APP_JSON, + }) resp = submit_job_handler( data, self.settings, proc.processEndpointWPS1, process=proc, is_local=True, is_workflow=is_workflow, visibility=Visibility.PUBLIC, - language=wps_request.language, tags=tags, headers=dict(req.headers), context=ctx + language=wps_request.language, tags=tags, headers=headers, context=ctx ) # enforced JSON results with submitted data that includes 'response=document' # use 'json_body' to work with any 'response' implementation From fb93ef56ce34aa74c8de80d6b958ec5181487d6c Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 8 Oct 2024 13:54:03 -0400 Subject: [PATCH 50/75] fix OGC-API dispatch in workflow using literals --- tests/functional/test_workflow.py | 16 +++++------ weaver/processes/wps_process_base.py | 40 ++++++++++++++++++++-------- 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/tests/functional/test_workflow.py b/tests/functional/test_workflow.py index 9d6bc12a4..6d0d1c758 100644 --- a/tests/functional/test_workflow.py +++ b/tests/functional/test_workflow.py @@ -1525,14 +1525,14 @@ def test_workflow_passthrough_expressions(self): log_full_trace=True, ) assert result == { - "code1": {"value": 123456}, - "code2": {"value": 123456}, - "integer1": {"value": 3}, - "integer2": {"value": 3}, - "message1": {"value": "msg"}, - "message2": {"value": "msg"}, - "number1": {"value": 3.1416}, - "number2": {"value": 3.1416}, + "code1": 123456, + "code2": 123456, + "integer1": 3, + "integer2": 3, + "message1": "msg", + "message2": "msg", + "number1": 3.1416, + "number2": 3.1416, } def test_workflow_multi_input_and_subworkflow(self): diff --git a/weaver/processes/wps_process_base.py b/weaver/processes/wps_process_base.py index d3155946a..06f18c543 100644 --- a/weaver/processes/wps_process_base.py +++ b/weaver/processes/wps_process_base.py @@ -188,7 +188,7 @@ def format_inputs(self, job_inputs): return job_inputs def format_outputs(self, job_outputs): - # type: (JobOutputs) -> Union[JobOutputs, JobCustomOutputs] + # type: (JobOutputs) -> Optional[Union[JobOutputs, JobCustomOutputs]] """ Implementation dependent operations to configure expected outputs for :term:`Job` execution. @@ -199,8 +199,11 @@ def format_outputs(self, job_outputs): return job_outputs @abc.abstractmethod - def dispatch(self, process_inputs, process_outputs): - # type: (Union[JobInputs, JobCustomInputs], Union[JobOutputs, JobCustomOutputs]) -> JobMonitorReference + def dispatch( + self, + process_inputs, # type: Union[JobInputs, JobCustomInputs] + process_outputs, # type: Optional[Union[JobOutputs, JobCustomOutputs]] + ): # type: (...) -> JobMonitorReference """ Implementation dependent operations to dispatch the :term:`Job` execution to the remote :term:`Process`. @@ -443,20 +446,27 @@ def __init__(self, self.process = process def format_outputs(self, job_outputs): - # type: (JobOutputs) -> JobOutputs - for output in job_outputs: - output.update({"transmissionMode": ExecuteTransmissionMode.VALUE}) - return job_outputs + # type: (JobOutputs) -> Optional[JobOutputs] + # note: + # - Because OGC-API will be requested with 'document'/'minimal' response, + # output transmission mode will auto-resolve as data/link according to type. + # - Because 'job_outputs' originate from CWL 'expected_outputs' in this case, + # only the 'type: File' outputs are listed. Providing these outputs explicitly + # will cause the filter-output mechanism to omit all literals from results. + # Therefore, omit any output indication entirely. + # - Use 'None' instead of '{}' or '[]' to avoid "no output" request. + return None def dispatch(self, process_inputs, process_outputs): - # type: (JobInputs, JobOutputs) -> str + # type: (JobInputs, Optional[JobOutputs]) -> str LOGGER.debug("Execute process %s request for [%s]", self.process_type, self.process) execute_body = { "mode": ExecuteMode.ASYNC, "response": ExecuteResponse.DOCUMENT, "inputs": process_inputs, - "outputs": process_outputs } + if process_outputs is not None: # don't insert to avoid filter-output by explicit empty dict/list + execute_body["outputs"] = process_outputs LOGGER.debug("Execute process %s body for [%s]:\n%s", self.process_type, self.process, repr_json(execute_body)) request_url = self.url + sd.process_jobs_service.path.format(process_id=self.process) response = self.make_request(method="POST", url=request_url, json=execute_body, retry=True) @@ -535,7 +545,10 @@ def get_results(self, monitor_reference): """ # use '/results' endpoint instead of '/outputs' to ensure support with other result_url = f"{monitor_reference}/results" - result_headers = {"Prefer": f"return={ExecuteReturnPreference.MINIMAL}"} + result_headers = { + "Accept": ContentType.APP_JSON, + "Prefer": f"return={ExecuteReturnPreference.MINIMAL}", + } response = self.make_request(method="GET", url=result_url, headers=result_headers, retry=True) response.raise_for_status() contents = response.json() @@ -552,9 +565,14 @@ def get_results(self, monitor_reference): contents = maybe_outputs # rebuild the expected (old) list format for calling method - if isinstance(contents, dict) and all(get_any_value(out) is not None for out in contents.values()): + if isinstance(contents, dict) and all( + (get_any_value(out) if isinstance(out, dict) else out) is not None + for out in contents.values() + ): outputs = [] for out_id, out_val in contents.items(): + if not isinstance(out_val, dict): + out_val = {"value": out_val} out_val.update({"id": out_id}) outputs.append(out_val) contents = outputs From 166f321c9cb50e89623521aaab44e6afd425c438 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 8 Oct 2024 14:27:50 -0400 Subject: [PATCH 51/75] fix CLI to align with job return behavior --- weaver/cli.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/weaver/cli.py b/weaver/cli.py index 3d1b910c2..66a672659 100644 --- a/weaver/cli.py +++ b/weaver/cli.py @@ -1175,6 +1175,7 @@ def execute(self, request_retries=None, # type: Optional[int] output_format=None, # type: Optional[AnyOutputFormat] output_refs=None, # type: Optional[Iterable[str]] + output_filter=None, # type: Optional[Sequence[str]] output_context=None, # type: Optional[str] ): # type: (...) -> OperationResult """ @@ -1231,6 +1232,8 @@ def execute(self, containing the data. outputs that refer to a file reference will simply contain that URL reference as link. With value transmission mode (default behavior when outputs are not specified in this list), outputs are returned as direct values (literal or href) within the response content body. + :param output_filter: + Indicates a list of outputs to omit from the results. If unspecified (default), all outputs are returned. :param output_context: Specify an output context for which the `Weaver` instance should attempt storing the :term:`Job` results under the nested location of its configured :term:`WPS` outputs. Note that the instance is not required @@ -1252,8 +1255,7 @@ def execute(self, "mode": ExecuteMode.ASYNC, "inputs": values, "response": ExecuteResponse.DOCUMENT, - # FIXME: allow filtering 'outputs' (https://github.com/crim-ca/weaver/issues/380) - "outputs": {} + "outputs": {}, } if subscribers: LOGGER.debug("Adding job execution subscribers:\n%s", Lazify(lambda: repr_json(subscribers, indent=2))) @@ -1268,17 +1270,19 @@ def execute(self, outputs = result.body.get("outputs") output_refs = set(output_refs or []) for output_id in outputs: + if output_filter and output_id in output_filter: + continue if output_id in output_refs: # If any 'reference' is requested explicitly, must switch to 'response=raw' # since 'response=document' ignores 'transmissionMode' definitions. data["response"] = ExecuteResponse.RAW # Use 'value' to have all outputs reported in body as 'value/href' rather than 'Link' headers. - out_mode = ExecuteTransmissionMode.REFERENCE + out_mode = {"transmissionMode": ExecuteTransmissionMode.REFERENCE} else: - # make sure to set value to outputs not requested as reference in case another one needs reference - # mode doesn't matter if no output by reference requested since 'response=document' would be used - out_mode = ExecuteTransmissionMode.VALUE - data["outputs"][output_id] = {"transmissionMode": out_mode} + out_mode = {} # auto-resolution + data["outputs"][output_id] = out_mode + if not data["outputs"]: + data.pop("outputs") # avoid no-output request LOGGER.info("Executing [%s] with inputs:\n%s", process_id, OutputFormat.convert(values, OutputFormat.JSON_STR)) desc_url = self._get_process_url(base, process_id, provider_id) @@ -1712,6 +1716,7 @@ def results(self, job_reference, # type: str out_dir=None, # type: Optional[str] download=False, # type: bool + download_links=None, # type: Optional[Sequence[str]] url=None, # type: Optional[str] auth=None, # type: Optional[AuthBase] headers=None, # type: Optional[AnyHeadersContainer] @@ -1727,6 +1732,10 @@ def results(self, :param job_reference: Either the full :term:`Job` status URL or only its UUID. :param out_dir: Output directory where to store downloaded files if requested (default: CURDIR/JobID/). :param download: Download any file reference found within results (CAUTION: could transfer lots of data!). + :param download_links: + Output IDs that are expected in ``Link`` headers, and that should be downloaded as well. + This is not performed automatically since there can be a lot of ``Links`` in responses, + and output IDs could have conflicting ``rel`` names with other indicative links. :param url: Instance URL if not already provided during client creation. :param auth: Instance authentication handler if not already created during client creation. @@ -1759,6 +1768,11 @@ def results(self, outputs = res_out.body headers = res_out.headers out_links = res_out.links(["Link"]) + out_links_meta = [(link, parse_link_header(link[-1])) for link in list(out_links.items())] + out_links = [ + link for link, meta in out_links_meta + if not meta["href"].startswith(job_url) and meta["rel"] in (download_links or []) + ] if not res_out.success or not (isinstance(res_out.body, dict) or len(out_links)): # pragma: no cover return OperationResult(False, "Could not retrieve any output results from job.", outputs, headers) if not download: From cf09163c5cb096d468a4a6609f4334d6078eb8a5 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 8 Oct 2024 14:38:39 -0400 Subject: [PATCH 52/75] fix output auto-ref workflow --- .../application-packages/WorkflowChainCopy/execute.json | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/functional/application-packages/WorkflowChainCopy/execute.json b/tests/functional/application-packages/WorkflowChainCopy/execute.json index 963ea522b..bfb98a775 100644 --- a/tests/functional/application-packages/WorkflowChainCopy/execute.json +++ b/tests/functional/application-packages/WorkflowChainCopy/execute.json @@ -6,11 +6,5 @@ "id": "files", "href": "https://mocked-file-server.com/test-file.txt" } - ], - "outputs": [ - { - "id": "output", - "transmissionMode": "value" - } ] } From 4c6c2b6af77d3c5ab61846afd46ed0d3ccf8d098 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 8 Oct 2024 14:56:33 -0400 Subject: [PATCH 53/75] fix prefer header parsing --- tests/functional/test_wps_package.py | 2 +- weaver/execute.py | 7 +++---- weaver/wps_restapi/jobs/utils.py | 27 ++------------------------- 3 files changed, 6 insertions(+), 30 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index ce6f6ca70..094bc8813 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -2404,7 +2404,7 @@ def test_execute_job_with_context_output_dir(self): "mode": ExecuteMode.ASYNC, "response": ExecuteResponse.DOCUMENT, "inputs": [{"id": "message", "value": "test"}], - "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.VALUE}] + "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.REFERENCE}] } headers = dict(self.json_headers) diff --git a/weaver/execute.py b/weaver/execute.py index 1814fb5f5..920878091 100644 --- a/weaver/execute.py +++ b/weaver/execute.py @@ -4,7 +4,7 @@ from pyramid.httpexceptions import HTTPBadRequest from weaver.base import Constants -from weaver.utils import get_header, parse_kvp, transform_json +from weaver.utils import get_header, parse_kvp if TYPE_CHECKING: from typing import List, Optional, Union, Tuple @@ -160,9 +160,8 @@ def parse_prefer_header_execute_mode( # /req/core/process-execute-default-execution-mode (C) return ExecuteMode.SYNC, wait_max, {} - params1 = parse_kvp(prefer, pair_sep=",", multi_value_sep=None) - params2 = parse_kvp(prefer, pair_sep=";", multi_value_sep=None) - params = transform_json(params1, extend=params2) + # allow both listing of multiple 'Prefer' headers and single 'Prefer' header with multi-param ';' separated + params = parse_kvp(prefer.replace(";", ","), pair_sep=",", multi_value_sep=None) wait = wait_max if "wait" in params: try: diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 4f3713d81..f616047d5 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -569,7 +569,7 @@ def get_job_results_response( # - test_execute_single_output_multipart_accept_alt_format # FIXME: remove any 'refs' not needed anymore - results, refs = get_results( + results, _ = get_results( job, container, value_key="value", schema=JobInputsOutputsSchema.OGC, # not strict to provide more format details @@ -629,7 +629,6 @@ def get_job_results_response( # use deserialized contents such that only the applicable fields remain # (simplify compares, this is assumed by the following call) results_json = get_job_results_document(job, results_json, settings=settings) - headers.extend(refs) return HTTPOk(json=results_json, headers=headers) if not results: # avoid schema validation error if all by reference @@ -637,7 +636,6 @@ def get_job_results_response( # see: # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-ref # - https://docs.ogc.org/DRAFTS/18-062.html#req_core_job-results-param-outputs-empty - headers.extend(refs) return HTTPNoContent(headers=headers) # raw response can be data-only value, link-only or a mix of them @@ -670,23 +668,10 @@ def get_job_results_response( out_info = out_vals[0][-1] # type: ExecutionResultValue out_data = get_any_value(out_info) if ( - (len(results) + len(refs)) > 1 or + len(results) > 1 or (isinstance(out_data, list) and len(out_data) > 1) or # single output is an array, needs multipart is_accept_multipart ): - # FIXME: remove links backtrack not needed anymore - pass results directly - # backtrack link references that were generated if 'Accept: multipart/*' was omitted - # while using 'response=raw' leading to at least 1 by-value output - # (must force multipart with empty-part for links to respect OGC API - Processes v1.0) - # for ref in refs: - # ref_link = parse_link_header(ref[-1]) - # results[ref_link["rel"]] = ref_link - # # attempt sort by original results ordering to generate multipart contents consistently - # out_order = list(convert_output_params_schema(job.results, JobInputsOutputsSchema.OGC)) - # res_order = {out_id: results[out_id] for out_id in out_order if out_id in results} - # res_array = sorted(set(results) - set(res_order)) # in case of 'out.idx' employed for arrays - # res_order.update({out_id: results[out_id] for out_id in res_array}) # if missing link arrays - #return get_job_results_multipart(job, res_order, headers=headers, settings=settings) return get_job_results_multipart(job, results, headers=headers, settings=settings) # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-one @@ -699,14 +684,6 @@ def get_job_results_response( out_fmt = None return get_job_results_single(job, out_info, res_id, out_fmt, headers=headers, settings=settings) - # FIXME: this else is impossible, remove 'if results' above and dedent - # else: - # resp = HTTPOk(headers=headers) - # if refs: - # - # resp.headerlist.extend(refs) - # return resp - def generate_or_resolve_result( job, # type: Job From 8dda7cb48813acb96c380facb7dec89e7b926ef0 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 8 Oct 2024 15:30:09 -0400 Subject: [PATCH 54/75] fix CLI with output from Link headers with explicit param --- CHANGES.rst | 4 +++- tests/functional/test_cli.py | 4 +++- tests/functional/test_docker_app.py | 4 ++-- weaver/cli.py | 23 ++++++++++++++++------- 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 281639ae9..67682a3bf 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -12,7 +12,9 @@ Changes Changes: -------- -- No change. +- Add ``output_links``/``-oL`` parameter to Python client and CLI to retrieve ``Link`` headers as `Job` results. + Due to the multiple ``Link`` headers returned by `Job` results, this cannot be performed automatically without + the assumption of which ``rel`` links correspond to actual output IDs to extract. Fixes: ------ diff --git a/tests/functional/test_cli.py b/tests/functional/test_cli.py index 5fb99a7be..6a157bc67 100644 --- a/tests/functional/test_cli.py +++ b/tests/functional/test_cli.py @@ -1604,6 +1604,7 @@ def test_execute_result_by_reference(self): "-j", job_id, "-wH", # must display header to get 'Link' "-F", OutputFormat.YAML, + "-oL", "output" ], trim=False, entrypoint=weaver_cli, @@ -1624,8 +1625,9 @@ def test_execute_result_by_reference(self): "-j", job_id, "-wH", # must display header to get 'Link' "-F", OutputFormat.YAML, + "-oL", "output", "-D", - "-O", out_tmp + "-O", out_tmp, ], trim=False, entrypoint=weaver_cli, diff --git a/tests/functional/test_docker_app.py b/tests/functional/test_docker_app.py index adbdc72ff..503157344 100644 --- a/tests/functional/test_docker_app.py +++ b/tests/functional/test_docker_app.py @@ -160,7 +160,7 @@ def test_execute_wps_rest_resp_json(self): {"id": "file", "href": tmp_file.name}, ], "outputs": [ - {"id": self.out_key, "transmissionMode": ExecuteTransmissionMode.VALUE}, + {"id": self.out_key, "transmissionMode": ExecuteTransmissionMode.REFERENCE}, ] } for mock_exec in mocked_execute_celery(): @@ -360,7 +360,7 @@ def test_execute_docker_embedded_python_script(self): {"id": "cost", "value": cost} ], "outputs": [ - {"id": "quote", "transmissionMode": ExecuteTransmissionMode.VALUE}, + {"id": "quote", "transmissionMode": ExecuteTransmissionMode.REFERENCE}, ] } resp = mocked_sub_requests(self.app, "POST", path, json=body, headers=self.json_headers, only_local=True) diff --git a/weaver/cli.py b/weaver/cli.py index 66a672659..e63800937 100644 --- a/weaver/cli.py +++ b/weaver/cli.py @@ -24,7 +24,7 @@ from weaver import __meta__ from weaver.datatype import AutoBase from weaver.exceptions import PackageRegistrationError -from weaver.execute import ExecuteMode, ExecuteResponse, ExecuteTransmissionMode +from weaver.execute import ExecuteMode, ExecuteResponse, ExecuteTransmissionMode, ExecuteReturnPreference from weaver.formats import ContentEncoding, ContentType, OutputFormat, get_content_type, get_format, repr_json from weaver.processes.constants import ProcessSchema from weaver.processes.convert import ( @@ -1716,7 +1716,6 @@ def results(self, job_reference, # type: str out_dir=None, # type: Optional[str] download=False, # type: bool - download_links=None, # type: Optional[Sequence[str]] url=None, # type: Optional[str] auth=None, # type: Optional[AuthBase] headers=None, # type: Optional[AnyHeadersContainer] @@ -1725,6 +1724,7 @@ def results(self, request_timeout=None, # type: Optional[int] request_retries=None, # type: Optional[int] output_format=None, # type: Optional[AnyOutputFormat] + output_links=None, # type: Optional[Sequence[str]] ): # type: (...) -> OperationResult """ Obtain the results of a successful :term:`Job` execution. @@ -1732,10 +1732,6 @@ def results(self, :param job_reference: Either the full :term:`Job` status URL or only its UUID. :param out_dir: Output directory where to store downloaded files if requested (default: CURDIR/JobID/). :param download: Download any file reference found within results (CAUTION: could transfer lots of data!). - :param download_links: - Output IDs that are expected in ``Link`` headers, and that should be downloaded as well. - This is not performed automatically since there can be a lot of ``Links`` in responses, - and output IDs could have conflicting ``rel`` names with other indicative links. :param url: Instance URL if not already provided during client creation. :param auth: Instance authentication handler if not already created during client creation. @@ -1748,6 +1744,10 @@ def results(self, :param request_timeout: Maximum timout duration (seconds) to wait for a response when performing HTTP requests. :param request_retries: Amount of attempt to retry HTTP requests in case of failure. :param output_format: Select an alternate output representation of the result body contents. + :param output_links: + Output IDs that are expected in ``Link`` headers, and that should be retrieved (or downloaded) as results. + This is not performed automatically since there can be a lot of ``Links`` in responses, and output IDs + could have conflicting ``rel`` names with other indicative links. :returns: Result details and local paths if downloaded. """ job_id, job_url = self._parse_job_ref(job_reference, url) @@ -1759,6 +1759,11 @@ def results(self, # with this endpoint, outputs IDs are directly at the root of the body result_url = f"{job_url}/results" LOGGER.info("Retrieving results from [%s]", result_url) + headers = headers or {} + headers.update({ + "Accept": ContentType.APP_JSON, + "Prefer": f"return={ExecuteReturnPreference.MINIMAL}", + }) resp = self._request("GET", result_url, headers=self._headers, x_headers=headers, settings=self._settings, auth=auth, request_timeout=request_timeout, request_retries=request_retries) @@ -1771,7 +1776,7 @@ def results(self, out_links_meta = [(link, parse_link_header(link[-1])) for link in list(out_links.items())] out_links = [ link for link, meta in out_links_meta - if not meta["href"].startswith(job_url) and meta["rel"] in (download_links or []) + if not meta["href"].startswith(job_url) and meta["rel"] in (output_links or []) ] if not res_out.success or not (isinstance(res_out.body, dict) or len(out_links)): # pragma: no cover return OperationResult(False, "Could not retrieve any output results from job.", outputs, headers) @@ -2941,6 +2946,10 @@ def make_parser(): help="Output directory where to store downloaded files from job results if requested " "(default: ``${CURDIR}/{JobID}/``)." ) + op_results.add_argument( + "-oL", "--output-link", dest="output_links", nargs="+", + help="Output IDs in 'Link' headers to retrieve as results for matching relationship ('rel') links." + ) op_upload = WeaverArgumentParser( "upload", From 192bf48f40886975bdc2cea04b80f57ee8e7dbb8 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 8 Oct 2024 16:01:41 -0400 Subject: [PATCH 55/75] fix more tests assuming old behaviors --- tests/functional/test_builtin.py | 45 ++++++++++++++++++-------------- tests/functional/utils.py | 3 ++- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/tests/functional/test_builtin.py b/tests/functional/test_builtin.py index d23b94272..2e917d9d8 100644 --- a/tests/functional/test_builtin.py +++ b/tests/functional/test_builtin.py @@ -21,6 +21,7 @@ from weaver.execute import ExecuteControlOption, ExecuteMode, ExecuteResponse, ExecuteTransmissionMode from weaver.formats import ContentEncoding, ContentType, get_format, repr_json from weaver.processes.builtin import file_index_selector, jsonarray2netcdf, metalink2netcdf, register_builtin_processes +from weaver.processes.constants import JobInputsOutputsSchema from weaver.status import Status from weaver.utils import create_metalink, fully_qualified_name from weaver.wps.utils import map_wps_output_location @@ -140,12 +141,15 @@ def validate_jsonarray2netcdf_results(self, results, outputs, data, links): assert "output" in results, "Expected result ID 'output' in response body" assert isinstance(results["output"], dict), "Container of result ID 'output' should be a dict" assert "href" in results["output"] - assert "format" in results["output"] - fmt = results["output"]["format"] # type: JSON - assert isinstance(fmt, dict), "Result format should be provided with content details" - assert "mediaType" in fmt - assert isinstance(fmt["mediaType"], str), "Result format Content-Type should be a single string definition" - assert fmt["mediaType"] == ContentType.APP_NETCDF, "Result 'output' format expected to be NetCDF file" + assert "format" not in results["output"] # old format not applied in results anymore + # fmt = results["output"]["format"] # type: JSON + # assert isinstance(fmt, dict), "Result format should be provided with content details" + # assert "mediaType" in fmt + # assert isinstance(fmt["mediaType"], str), "Result format Content-Type should be a single string definition" + # assert fmt["mediaType"] == ContentType.APP_NETCDF, "Result 'output' format expected to be NetCDF file" + assert results["output"]["type"] == ContentType.APP_NETCDF, ( + "Result 'output' format expected to be NetCDF file" + ) nc_href = results["output"]["href"] assert isinstance(nc_href, str) and len(nc_href) elif links: @@ -210,7 +214,7 @@ def test_jsonarray2netcdf_execute_async(self): body.update({ "mode": ExecuteMode.ASYNC, "response": ExecuteResponse.DOCUMENT, - "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.VALUE}], + "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.REFERENCE}], }) for mock_exec in mocked_execute_celery(): stack_exec.enter_context(mock_exec) @@ -237,14 +241,18 @@ def test_jsonarray2netcdf_execute_async(self): self.validate_jsonarray2netcdf_results(results, outputs, nc_data, None) - def test_jsonarray2netcdf_execute_async_output_by_reference_dontcare_response_document(self): + def test_jsonarray2netcdf_execute_async_output_by_reference_response_document(self): """ - Jobs submitted with ``response=document`` are not impacted by ``transmissionMode``. + Jobs submitted with ``response=document`` with ``transmissionMode`` by reference. The results schema should always be returned when document is requested. .. seealso:: https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-document + + .. versionchanged:: 6.0 + Removed the "don't care" aspect of the test, since ``transmissionMode`` is now respected. + Therefore, ``transmissionMode=reference`` is explicitly requested. """ with contextlib.ExitStack() as stack_exec: body, nc_data = self.setup_jsonarray2netcdf_inputs(stack_exec) @@ -576,6 +584,8 @@ def test_echo_process_execute_inputs_valid_schema(self): if the inputs failing schema validation happened to be optional, those could not be propagated correctly. .. versionadded:: 4.35 + .. versionadded:: 6.0 + Modified defaults that are not the same anymore to allow alternative request combinations. """ with contextlib.ExitStack() as stack: body = self.setup_echo_process_execution_body(stack) @@ -583,7 +593,8 @@ def test_echo_process_execute_inputs_valid_schema(self): expect_defaults = { "$schema": sd.Execute._schema, "mode": ExecuteMode.AUTO, - "response": ExecuteResponse.DOCUMENT, + # not auto-default anymore, but default in code if omitted, to allow 'Prefer' override + # "response": ExecuteResponse.DOCUMENT, } expect_input_defaults = { "measureInput": {"mediaType": ContentType.APP_JSON}, @@ -591,9 +602,10 @@ def test_echo_process_execute_inputs_valid_schema(self): "complexObjectInput": {"mediaType": ContentType.APP_JSON}, } expect_output_defaults = { - "imagesOutput": {"transmissionMode": ExecuteTransmissionMode.VALUE}, - "geometryOutput": {"transmissionMode": ExecuteTransmissionMode.VALUE}, - "featureCollectionOutput": {"transmissionMode": ExecuteTransmissionMode.VALUE}, + # 'value' is not default anymore, to allow auto-resolution of data/link by result literal/complex type + "imagesOutput": {}, # {"transmissionMode": ExecuteTransmissionMode.VALUE}, + "geometryOutput": {}, # {"transmissionMode": ExecuteTransmissionMode.VALUE}, + "featureCollectionOutput": {}, # {"transmissionMode": ExecuteTransmissionMode.VALUE}, } body.update(expect_defaults) for io_holder, io_defaults in [("inputs", expect_input_defaults), ("outputs", expect_output_defaults)]: @@ -792,13 +804,6 @@ def test_echo_process_execute_async(self): body.update({ "mode": ExecuteMode.ASYNC, "response": ExecuteResponse.DOCUMENT, - "outputs": [ - { - "id": input_id.replace("Input", "Output"), - "transmissionMode": ExecuteTransmissionMode.VALUE, - } - for input_id in body["inputs"] - ], }) for mock_exec in mocked_execute_celery(): stack_exec.enter_context(mock_exec) diff --git a/tests/functional/utils.py b/tests/functional/utils.py index 9e19d1ce2..5e4a0583e 100644 --- a/tests/functional/utils.py +++ b/tests/functional/utils.py @@ -507,7 +507,8 @@ def check_job_status(_resp, running=False): check_job_status(resp) if return_status or expect_failed: return resp.json - resp = self.app.get(f"{status_url}/results", headers=self.json_headers) + params = {"schema": JobInputsOutputsSchema.OGC} # not strict to preserve old 'format' field + resp = self.app.get(f"{status_url}/results", params=params, headers=self.json_headers) assert resp.status_code == 200, f"Error job info:\n{resp.text}" return resp.json From 8dbeb4fd7b11d26b97b8215b27070f4b505ba95b Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 8 Oct 2024 17:49:00 -0400 Subject: [PATCH 56/75] fix more tests --- tests/functional/test_builtin.py | 65 +++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 22 deletions(-) diff --git a/tests/functional/test_builtin.py b/tests/functional/test_builtin.py index 2e917d9d8..1b69ce723 100644 --- a/tests/functional/test_builtin.py +++ b/tests/functional/test_builtin.py @@ -21,7 +21,6 @@ from weaver.execute import ExecuteControlOption, ExecuteMode, ExecuteResponse, ExecuteTransmissionMode from weaver.formats import ContentEncoding, ContentType, get_format, repr_json from weaver.processes.builtin import file_index_selector, jsonarray2netcdf, metalink2netcdf, register_builtin_processes -from weaver.processes.constants import JobInputsOutputsSchema from weaver.status import Status from weaver.utils import create_metalink, fully_qualified_name from weaver.wps.utils import map_wps_output_location @@ -134,24 +133,31 @@ def setup_jsonarray2netcdf_inputs(self, stack, use_temp_file=False): body = {"inputs": [{"id": "input", "href": f"{url_path}/{os.path.basename(tmp_json.name)}"}]} return body, nc_data - def validate_jsonarray2netcdf_results(self, results, outputs, data, links): + def validate_jsonarray2netcdf_results(self, results, outputs, data, links, exec_body): # first validate format of OGC-API results if results is not None: assert isinstance(results, dict) assert "output" in results, "Expected result ID 'output' in response body" assert isinstance(results["output"], dict), "Container of result ID 'output' should be a dict" - assert "href" in results["output"] assert "format" not in results["output"] # old format not applied in results anymore - # fmt = results["output"]["format"] # type: JSON - # assert isinstance(fmt, dict), "Result format should be provided with content details" - # assert "mediaType" in fmt - # assert isinstance(fmt["mediaType"], str), "Result format Content-Type should be a single string definition" - # assert fmt["mediaType"] == ContentType.APP_NETCDF, "Result 'output' format expected to be NetCDF file" - assert results["output"]["type"] == ContentType.APP_NETCDF, ( - "Result 'output' format expected to be NetCDF file" - ) - nc_href = results["output"]["href"] - assert isinstance(nc_href, str) and len(nc_href) + out_defs = {out["id"]: out for out in exec_body["outputs"]} + nc_href = None + if out_defs.get("output", {}).get("transmissionMode") == ExecuteTransmissionMode.VALUE: + assert "value" in results["output"] + assert "mediaType" in results["output"] + assert results["output"]["value"] == data + assert results["output"]["mediaType"] == ContentType.APP_NETCDF + assert "href" not in results["output"] + assert "type" not in results["output"] + else: + assert "href" in results["output"] + assert results["output"]["type"] == ContentType.APP_NETCDF, ( + "Result 'output' format expected to be NetCDF file" + ) + nc_href = results["output"]["href"] + assert isinstance(nc_href, str) and len(nc_href) + assert "value" not in results["output"] + assert "mediaType" not in results["output"] elif links: assert isinstance(links, list) and len(links) == 1 and isinstance(links[0], tuple) assert "rel=\"output\"" in links[0][1] @@ -232,6 +238,8 @@ def test_jsonarray2netcdf_execute_async(self): assert "outputs" not in resp.json job_url = resp.json["location"] + assert "Location" in resp.headers + assert resp.headers["Location"] == job_url results = self.monitor_job(job_url) output_url = f"{job_url}/outputs" @@ -239,7 +247,7 @@ def test_jsonarray2netcdf_execute_async(self): assert resp.status_code == 200, f"Error job outputs:\n{repr_json(resp.text, indent=2)}" outputs = resp.json - self.validate_jsonarray2netcdf_results(results, outputs, nc_data, None) + self.validate_jsonarray2netcdf_results(results, outputs, nc_data, None, body) def test_jsonarray2netcdf_execute_async_output_by_reference_response_document(self): """ @@ -285,7 +293,7 @@ def test_jsonarray2netcdf_execute_async_output_by_reference_response_document(se assert resp.status_code == 200, f"Error job outputs:\n{resp.text}" outputs = resp.json - self.validate_jsonarray2netcdf_results(results, outputs, nc_data, result_links) + self.validate_jsonarray2netcdf_results(results, outputs, nc_data, result_links, body) def test_jsonarray2netcdf_execute_async_output_by_value_response_raw(self): """ @@ -329,7 +337,7 @@ def test_jsonarray2netcdf_execute_async_output_by_value_response_raw(self): assert resp.status_code == 200, f"Error job outputs:\n{resp.text}" outputs = resp.json - self.validate_jsonarray2netcdf_results(None, outputs, nc_data, result_links) + self.validate_jsonarray2netcdf_results(None, outputs, nc_data, result_links, body) def test_jsonarray2netcdf_execute_async_output_by_reference_response_raw(self): """ @@ -370,7 +378,7 @@ def test_jsonarray2netcdf_execute_async_output_by_reference_response_raw(self): assert resp.status_code == 200, f"Error job outputs:\n{repr_json(resp.text, indent=2)}" outputs = resp.json - self.validate_jsonarray2netcdf_results(None, outputs, nc_data, result_links) + self.validate_jsonarray2netcdf_results(None, outputs, nc_data, result_links, body) def test_jsonarray2netcdf_execute_sync(self): """ @@ -398,16 +406,28 @@ def test_jsonarray2netcdf_execute_sync(self): # since sync, results are directly available instead of job status # even if results are returned directly (instead of status), - # status location link is available for reference as needed - assert "Location" in resp.headers + # status link is available for reference as needed + # however, 'Location' header is not provided since there is no need to redirect + assert "Location" not in resp.headers + link_headers = [ref for hdr, ref in resp.headerlist if hdr == "Link"] + link_relations = ["status", "monitor"] + link_job_status = [link for link in link_headers if any(f"rel=\"{rel}\"" in link for rel in link_relations)] + assert len(link_job_status) == len(link_relations) # validate sync was indeed applied (in normal situation, not considering mock test that runs in sync) assert resp.headers["Preference-Applied"] == headers["Prefer"] # following details should not be available since results are returned in sync instead of async job status for field in ["status", "created", "finished", "duration", "progress"]: assert field not in resp.json + # since sync response is represented as 'document', + # the 'Content-Location' header must indicate the Job Results endpoint + # that allows retrieving the same results at a later time + assert "Content-Location" in resp.headers + assert resp.headers["Content-Location"].endswith("/results") + job_results_url = resp.headers["Content-Location"] + job_url = job_results_url.rsplit("/results", 1)[0] + # validate that job can still be found and its metadata are defined although executed in sync - job_url = resp.headers["Location"] resp = self.app.get(job_url, headers=self.json_headers) assert resp.status_code == 200 assert resp.content_type == ContentType.APP_JSON @@ -427,7 +447,7 @@ def test_jsonarray2netcdf_execute_sync(self): assert resp.status_code == 200, f"Error job outputs:\n{repr_json(resp.text, indent=2)}" outputs = resp.json - self.validate_jsonarray2netcdf_results(results, outputs, nc_data, None) + self.validate_jsonarray2netcdf_results(results, outputs, nc_data, None, body) def test_echo_process_describe(self): resp = self.app.get("/processes/EchoProcess", headers=self.json_headers) @@ -778,7 +798,8 @@ def test_echo_process_execute_sync(self): # since sync, results are directly available instead of job status # even if results are returned directly (instead of status), - # status location link is available for reference as needed + # status link is available for reference as needed + # however, 'Location' header is not provided since there is no need to redirect assert "Location" not in resp.headers link_headers = [ref for hdr, ref in resp.headerlist if hdr == "Link"] link_relations = ["status", "monitor"] From 20fc82689377eba91a4c3b1541e93f1295fce5bd Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 8 Oct 2024 18:00:01 -0400 Subject: [PATCH 57/75] linting fixes --- tests/functional/test_workflow.py | 2 +- tests/functional/test_wps_package.py | 6 +++--- tests/processes/test_wps_package.py | 1 + tests/wps_restapi/test_jobs.py | 2 +- weaver/cli.py | 2 +- weaver/execute.py | 3 ++- weaver/processes/constants.py | 1 + weaver/processes/convert.py | 2 +- weaver/processes/execution.py | 2 +- weaver/processes/wps_process_base.py | 2 +- weaver/utils.py | 2 +- weaver/wps_restapi/jobs/utils.py | 30 ++++++++++++++-------------- 12 files changed, 29 insertions(+), 26 deletions(-) diff --git a/tests/functional/test_workflow.py b/tests/functional/test_workflow.py index 6d0d1c758..c0ef941d9 100644 --- a/tests/functional/test_workflow.py +++ b/tests/functional/test_workflow.py @@ -37,7 +37,7 @@ ) from weaver import WEAVER_ROOT_DIR from weaver.config import WeaverConfiguration -from weaver.execute import ExecuteResponse, ExecuteTransmissionMode, ExecuteReturnPreference +from weaver.execute import ExecuteResponse, ExecuteReturnPreference, ExecuteTransmissionMode from weaver.formats import ContentType from weaver.processes.constants import ( CWL_REQUIREMENT_MULTIPLE_INPUT, diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 094bc8813..d79f04581 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -3543,12 +3543,12 @@ def setUpClass(cls) -> None: def setUp(self) -> None: self.process_store.clear_processes() - + @staticmethod def remove_result_format(results): """ Remove the results ``format`` property to simplify test comparions. - + For backward compatibility, the ``format`` property is inserted in result definitions when represented as :term:`JSON`, on top of the :term:`OGC` compliant ``type``, ``mediaType``, etc. of the "format" schema for qualified values and link references. @@ -3593,7 +3593,7 @@ def fix_result_multipart_indent(results): res_indent = len(results) - len(res_dedent) res_spaces = " " * res_indent res_dedent = res_dedent.replace(f"\n{res_spaces}", "\r\n") # indented line - res_dedent = res_dedent.replace(f"\n\r\n", "\r\n\r\n") # empty line (header/body separator) + res_dedent = res_dedent.replace("\n\r\n", "\r\n\r\n") # empty line (header/body separator) res_dedent = res_dedent.replace("\r\r", "\r") # in case windows res_dedent = res_dedent.rstrip("\n ") # last line often indented less because of closing multiline string return res_dedent diff --git a/tests/processes/test_wps_package.py b/tests/processes/test_wps_package.py index cba546e41..0d896636d 100644 --- a/tests/processes/test_wps_package.py +++ b/tests/processes/test_wps_package.py @@ -154,6 +154,7 @@ def __init__(self, shell_command, arguments=None, with_message_input=True): super(MockProcess, self).__init__(body) +@pytest.mark.flaky(reruns=2, reruns_delay=1) def test_stdout_stderr_logging_for_commandline_tool_success(caplog): """ Execute a process and assert that stdout is correctly logged to log file upon successful process execution. diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index bcfc0b92e..30fa1f754 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -41,12 +41,12 @@ from weaver.visibility import Visibility from weaver.warning import TimeZoneInfoAlreadySetWarning from weaver.wps_restapi import swagger_definitions as sd +from weaver.wps_restapi.jobs.utils import get_job_results_document from weaver.wps_restapi.swagger_definitions import ( DATETIME_INTERVAL_CLOSED_SYMBOL, DATETIME_INTERVAL_OPEN_END_SYMBOL, DATETIME_INTERVAL_OPEN_START_SYMBOL ) -from weaver.wps_restapi.jobs.utils import get_job_results_document if TYPE_CHECKING: from typing import Iterable, List, Optional, Tuple, Union diff --git a/weaver/cli.py b/weaver/cli.py index e63800937..729946e8e 100644 --- a/weaver/cli.py +++ b/weaver/cli.py @@ -24,7 +24,7 @@ from weaver import __meta__ from weaver.datatype import AutoBase from weaver.exceptions import PackageRegistrationError -from weaver.execute import ExecuteMode, ExecuteResponse, ExecuteTransmissionMode, ExecuteReturnPreference +from weaver.execute import ExecuteMode, ExecuteResponse, ExecuteReturnPreference, ExecuteTransmissionMode from weaver.formats import ContentEncoding, ContentType, OutputFormat, get_content_type, get_format, repr_json from weaver.processes.constants import ProcessSchema from weaver.processes.convert import ( diff --git a/weaver/execute.py b/weaver/execute.py index 920878091..37ca6f3eb 100644 --- a/weaver/execute.py +++ b/weaver/execute.py @@ -7,7 +7,7 @@ from weaver.utils import get_header, parse_kvp if TYPE_CHECKING: - from typing import List, Optional, Union, Tuple + from typing import List, Optional, Tuple, Union from weaver.typedefs import AnyHeadersContainer, HeadersType, Literal @@ -43,6 +43,7 @@ ExecuteTransmissionModeReferenceType, ExecuteTransmissionModeValueType, ] + # pylint: disable=C0103,invalid-name ExecuteCollectionFormatType_STAC = Literal["stac-collection"] ExecuteCollectionFormatType_OGC_COVERAGE = Literal["ogc-coverage-collection"] ExecuteCollectionFormatType_OGC_FEATURES = Literal["ogc-features-collection"] diff --git a/weaver/processes/constants.py b/weaver/processes/constants.py index faf892e46..827ed4c3f 100644 --- a/weaver/processes/constants.py +++ b/weaver/processes/constants.py @@ -353,6 +353,7 @@ class OpenSearchField(Constants): OAS_LITERAL_TYPES ) +# pylint: disable=C0103,invalid-name ProcessSchemaOGCType = Literal["OGC", "ogc"] ProcessSchemaOLDType = Literal["OLD", "old"] ProcessSchemaWPSType = Literal["WPS", "wps"] diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index ae6e675c7..2810635cc 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -126,9 +126,9 @@ from requests.models import Response from weaver.processes.constants import ( - JobInputsOutputsSchemaType, JobInputsOutputsSchemaAnyOGCType, JobInputsOutputsSchemaAnyOLDType, + JobInputsOutputsSchemaType, ProcessSchemaType, WPS_DataType, WPS_LiteralData_Type diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index 49f4735cc..e12e5cd93 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -58,7 +58,7 @@ load_pywps_config ) from weaver.wps_restapi import swagger_definitions as sd -from weaver.wps_restapi.jobs.utils import get_job_results_response, get_job_submission_response, get_job_return +from weaver.wps_restapi.jobs.utils import get_job_results_response, get_job_return, get_job_submission_response from weaver.wps_restapi.processes.utils import resolve_process_tag LOGGER = logging.getLogger(__name__) diff --git a/weaver/processes/wps_process_base.py b/weaver/processes/wps_process_base.py index 06f18c543..909c74bb0 100644 --- a/weaver/processes/wps_process_base.py +++ b/weaver/processes/wps_process_base.py @@ -10,7 +10,7 @@ from weaver.base import Constants from weaver.exceptions import PackageExecutionError -from weaver.execute import ExecuteMode, ExecuteResponse, ExecuteTransmissionMode, ExecuteReturnPreference +from weaver.execute import ExecuteMode, ExecuteResponse, ExecuteReturnPreference from weaver.formats import ContentType, repr_json from weaver.processes.constants import PACKAGE_COMPLEX_TYPES, PACKAGE_DIRECTORY_TYPE, PACKAGE_FILE_TYPE, OpenSearchField from weaver.processes.convert import get_cwl_io_type diff --git a/weaver/utils.py b/weaver/utils.py index 49e890b19..58ad608a7 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -1200,7 +1200,7 @@ def get_href_headers( missing_ok=False, # type: bool settings=None, # type: Optional[SettingsType] **option_kwargs, # type: Unpack[Union[SchemeOptions, RequestOptions]] - ): # type: (...) -> MetadataResult +): # type: (...) -> MetadataResult """ Obtain headers applicable for the provided file or directory reference. diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index f616047d5..4e0835f88 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -1,5 +1,4 @@ import io - import math import os import shutil @@ -33,8 +32,8 @@ ServiceNotAccessible, ServiceNotFound ) -from weaver.execute import ExecuteResponse, ExecuteTransmissionMode, parse_prefer_header_return, ExecuteReturnPreference -from weaver.formats import ContentType, get_format, repr_json, ContentEncoding +from weaver.execute import ExecuteResponse, ExecuteReturnPreference, ExecuteTransmissionMode, parse_prefer_header_return +from weaver.formats import ContentEncoding, ContentType, get_format, repr_json from weaver.owsexceptions import OWSNoApplicableCode, OWSNotFound from weaver.processes.constants import JobInputsOutputsSchema from weaver.processes.convert import any2wps_literal_datatype, convert_output_params_schema, get_field @@ -79,8 +78,8 @@ ExecutionResultValue, HeadersTupleType, HeadersType, - JSON, JobValueFormat, + JSON, PyramidRequest, SettingsType ) @@ -301,7 +300,7 @@ def make_result_link( output_id, # type: str output_mode, # type: AnyExecuteTransmissionMode output_format=None, # type: Optional[JobValueFormat] - *, # type: Any + *, # force named keyword arguments after settings, # type: SettingsType ): # type: (...) -> List[str] """ @@ -315,7 +314,7 @@ def make_result_link( links = [] for suffix, value in zip(suffixes, values): result_id = f"{output_id}{suffix}" - headers, _ = generate_or_resolve_result(job, result, result_id, output_id, output_mode, output_format, settings) + headers, _ = generate_or_resolve_result(job, value, result_id, output_id, output_mode, output_format, settings) url = headers["Content-Location"] typ = headers["Content-Type"] enc = headers.get("Content-Encoding", None) @@ -387,7 +386,7 @@ def get_results( # pylint: disable=R1260 else: array = value if isinstance(value, list) else [value] - for val_idx, val_item in enumerate(array): + for val_item in array: val_data = val_item if isinstance(val_item, dict) and isinstance(value, list): rtype = "href" if get_any_value(val_item, key=True, file=True, data=False) else "data" @@ -516,7 +515,7 @@ def get_job_output_transmission(job, output_id, is_reference): def get_job_results_response( job, # type: Job - *, # type: Any + *, # force named keyword arguments after container, # type: AnySettingsContainer headers=None, # type: Optional[AnyHeadersContainer] results_headers=None, # type: Optional[AnyHeadersContainer] @@ -839,7 +838,7 @@ def get_job_results_links( references, # type: Dict[str, ExecutionResultValue] transmissions, # type: Dict[str, Tuple[AnyExecuteTransmissionMode, JobValueFormat]] headers, # type: AnyHeadersContainer - *, # type: Any + *, # force named keyword arguments after settings, # type: SettingsType ): # type: (...) -> AnyHeadersContainer """ @@ -860,7 +859,7 @@ def get_job_results_single( output_id, # type: str output_format, # type: Optional[JobValueFormat] headers, # type: AnyHeadersContainer - *, # type: Any + *, # force named keyword arguments after settings, # type: AnySettingsContainer ): # type: (...) -> Union[HTTPOk, HTTPNoContent] """ @@ -881,6 +880,7 @@ def get_job_results_single( is_ref = bool(get_any_value(result, key=True, file=True, data=False)) out_data = get_any_value(result, file=is_ref, data=not is_ref) out_mode, out_fmt = get_job_output_transmission(job, output_id, is_ref) + output_format = output_format or out_fmt if out_mode == ExecuteTransmissionMode.REFERENCE: link = make_result_link(job, result, output_id, out_mode, output_format, settings=settings) headers.extend([("Link", link[0])]) @@ -932,13 +932,13 @@ def get_job_results_document(job, results, *, settings): def make_result(result, result_id, output_id): # type: (ExecutionResultValue, str, str) -> Union[AnyValueType, ExecutionResultObject] if isinstance(result, dict): - key = get_any_value(result, key=True) + is_ref = bool(get_any_value(result, key=True, file=True, data=False)) val = get_any_value(result) else: - key = "value" + is_ref = False val = result result = {"value": val} - out_mode, out_fmt = get_job_output_transmission(job, result_id, is_reference=(key == "href")) + out_mode, out_fmt = get_job_output_transmission(job, result_id, is_reference=is_ref) headers, data = generate_or_resolve_result(job, result, result_id, output_id, out_mode, out_fmt, settings) if data is None: ref = { @@ -1026,8 +1026,8 @@ def add_result_parts(result_parts): } yield res_id, (None, sub_multi, None, sub_headers) - key = get_any_value(result, key=True) - out_mode, out_fmt = get_job_output_transmission(job, out_id, is_reference=(key == "href")) + is_ref = bool(get_any_value(result, key=True, file=True, data=False)) + out_mode, out_fmt = get_job_output_transmission(job, out_id, is_reference=is_ref) res_headers, res_data = generate_or_resolve_result(job, result, res_id, out_id, out_mode, out_fmt, settings) c_type = res_headers.get("Content-Type") c_loc = res_headers.get("Content-Location") From a7e579272f463222c4d933c6d59976c3aea46981 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 9 Oct 2024 01:21:51 -0400 Subject: [PATCH 58/75] fix docs lint --- docs/source/processes.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 20826c7f2..204dc13ca 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -727,8 +727,8 @@ Execution Mode ~~~~~~~~~~~~~~~~~~~~~ In order to select how to execute a :term:`Process`, either |synchronously|_ or |asynchronously|_, the ``Prefer`` header -should be specified. If omitted, `Weaver` defaults to |asynchronous|_ execution. To execute |asynchronously|_ explicitly, -``Prefer: respond-async`` should be used. Otherwise, the |synchronous|_ execution can be requested +should be specified. If omitted, `Weaver` defaults to |asynchronous|_ execution. To execute |asynchronously|_ +explicitly, ``Prefer: respond-async`` should be used. Otherwise, the |synchronous|_ execution can be requested with ``Prefer: wait=X`` where ``X`` is the duration in seconds to wait for a response. If no worker becomes available within that time, or if this value is greater than the ``weaver.execute_sync_max_wait`` setting (see :ref:`detail `), the :term:`Job` will @@ -1005,7 +1005,7 @@ Following is a detailed listing of the expected response structure according to the :ref:`Multipart Results ` structure using ``multipart`` contents (:rfc:`2046#section-5.1`) is employed by default, unless *all* requested outputs resolve to a :ref:`File Reference `. In such case, the references will be contained - in ``Link`` headers, similar to the |res-ref|_ response, but with multiple links for all requested outputs. + in ``Link`` headers, similar to the |res-link|_ response, but with multiple links for all requested outputs. When resolved as ``multipart``, the representation of each part (as literal data or link reference [#resValRef]_) is established by the ``transmissionMode`` parameter combinations, or as applicable according to the ``Accept`` From 1669b5b00a14ce4a1783ceaea796b83f2c49b113 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 9 Oct 2024 10:17:47 -0400 Subject: [PATCH 59/75] fix doc links --- docs/source/processes.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 204dc13ca..da09da485 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -22,10 +22,10 @@ .. |synchronously| replace:: *synchronously* .. |asynchronous| replace:: *asynchronous* .. |asynchronously| replace:: *asynchronously* -.. _synchronous: processes.html#execution-mode -.. _synchronously: processes.html#execution-mode -.. _asynchronous: processes.html#execution-mode -.. _asynchronously: processes.html#execution-mode +.. _synchronous: `proc_exec_mode`_ +.. _synchronously: `proc_exec_mode`_ +.. _asynchronous: `proc_exec_mode`_ +.. _asynchronously: `proc_exec_mode`_ ********** Processes @@ -561,8 +561,8 @@ inputs/outputs-level metadata can be updated. Similarly, the following request would produce a ``MINOR`` revision of ``test-process``. Since both ``PATCH`` and ``MINOR`` level contents are defined for update, the higher ``MINOR`` revision is required. In this case ``MINOR`` is -required because ``jobControlOptions`` (forced to asynchronous execution for following versions) would break any future -request made by users that would expect the :term:`Process` to run (or support) synchronous execution. +required because ``jobControlOptions`` (forced to |asynchronous|_ execution for following versions) would break any +future request made by users that would expect the :term:`Process` to run (or support) |synchronous|_ execution. Notice that this time, the :term:`Process` reference does not indicate the revision in the path (no ``:1.2.4`` part). This automatically resolves to the updated revision ``test-process:1.2.4`` that became the new latest revision following @@ -919,10 +919,10 @@ Following is a detailed listing of the expected response structure according to .. |res-auto| replace:: *using automatic resolution of data/link representation* .. |res-data| replace:: Results for a Single Output with Data -.. _res-data: processes.html#job-results-raw-single-data +.. _res-data: `job-results-raw-single-data`_ .. |res-link| replace:: Results for a Single Output with Link -.. _res-link: processes.html#job-results-raw-single-ref +.. _res-link: `job-results-raw-single-ref`_ .. important:: Typically, clients will not use ``Prefer`` header and ``response``/``transmissionMode`` body parameters @@ -2091,7 +2091,7 @@ simplified data access mechanism without having to deal will all possible combin potentially returned by :ref:`proc_exec_results`. .. |output_netcdf| replace:: ``output_netcdf.nc`` -.. _output_netcdf: processes.html#job-outputs-mapping +.. _output_netcdf: `job-outputs-mapping`_ .. _proc_op_job_results: From b7c0bc51c631a4dd8ec1666143bdc1aa2253d5f3 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 9 Oct 2024 11:46:16 -0400 Subject: [PATCH 60/75] update conformance and changelog --- CHANGES.rst | 36 +++++++++++++++++--- weaver/wps_restapi/api.py | 71 ++++++++++++++++++++++++++------------- 2 files changed, 80 insertions(+), 27 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 67682a3bf..d09e21024 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -12,13 +12,41 @@ Changes Changes: -------- +- Add support of ``response: raw`` execution request body parameter as alternative to ``response: document``, + which allows directly returning the result contents or ``Link`` headers rather then embedding them in a `JSON` + response (fixes `#376 `_). +- Add support of ``Prefer: return=minimal`` and ``Prefer: return=representation`` header as alternative method + to request the ``response: document`` and ``response: raw`` parameters + (fixes `#414 `_). + Minor differences exist according to supplied ``transmissionMode`` and the original data/link results. + See `Process Execution `_ + documentation for details. +- Add support of ``outputs`` execution request body parameter to filter returned outputs from + the ``GET /jobs/{jobId}/results`` (async) or returned directly (sync) from ``POST /processes/{processId}/execution`` + (fixes `#380 _`). +- Add support of ``Accept: multipart/*`` and ``Accept: multipart/mixed`` when submitting an execution to obtain + the results as multiple parts embedded within the response contents. Parts are represented with their default + data/link representation, unless overridden by corresponding ``transmissionMode`` per output ID. - Add ``output_links``/``-oL`` parameter to Python client and CLI to retrieve ``Link`` headers as `Job` results. Due to the multiple ``Link`` headers returned by `Job` results, this cannot be performed automatically without the assumption of which ``rel`` links correspond to actual output IDs to extract. - -Fixes: ------- -- No change. +- Update documentation with a mapping of *Process Execution Results* according to + submitted ``response`` body parameter (*OGC API - Processes v1.0*), + the ``Prefer: return`` header (*OGC API - Processes v2.0*), the requested ``Accept`` header, + and any relevant ``transmissionMode`` request body overrides per filtered ``outputs``. + +Fixes: +------ +- Fix ``GET /jobs/{jobId}/inputs`` contents to correctly return the submitted ``outputs`` definition + for `Process` execution (fixes `#715 `_). +- Fix missing ``Link`` header with ``rel: monitor`` relationship in the created `Job` responses + (fixes `#596 `_). +- Fix missing ``/rec/core/link-header`` definition in ``GET /conformance`` response reporting + that ``Link`` headers are returned for corresponding references of a given request + (fixes `#378 `_). +- Fix ``transmissionMode: value`` that was ignored for ``response: document`` if the output was represented by default + as a *complex* file URL, and ``transmissionMode: reference`` that was ignored if the output was *literal* data. + The ``transmissionMode`` will now return the appropriate inline data or URL as requested. .. _changes_5.9.0: diff --git a/weaver/wps_restapi/api.py b/weaver/wps_restapi/api.py index a736df12b..7425399d8 100644 --- a/weaver/wps_restapi/api.py +++ b/weaver/wps_restapi/api.py @@ -116,7 +116,7 @@ def get_conformance(category, settings): f"{ogcapi_common}/rec/core/html", ] if ogcapi_proc_html else []) + [ f"{ogcapi_common}/rec/core/json", - # ogcapi_common + "/rec/core/link-header", + f"{ogcapi_common}/rec/core/link-header", # FIXME: error details (for all below: https://github.com/crim-ca/weaver/issues/320) # ogcapi_common + "/rec/core/problem-details", # ogcapi_common + "/rec/core/query-param-capitalization", @@ -196,8 +196,9 @@ def get_conformance(category, settings): f"{ogcapi_proc_core}/conf/core/job-results-exception-no-such-job", f"{ogcapi_proc_core}/conf/core/job-results-exception-results-not-ready", f"{ogcapi_proc_core}/conf/core/job-results-failed", - f"{ogcapi_proc_core}/conf/core/job-results-param-outputs", - f"{ogcapi_proc_core}/conf/core/job-results-param-outputs-empty", + # FIXME: results 'outputs' query parameter (https://github.com/crim-ca/weaver/issues/733) + # f"{ogcapi_proc_core}/conf/core/job-results-param-outputs", + # f"{ogcapi_proc_core}/conf/core/job-results-param-outputs-empty", f"{ogcapi_proc_core}/conf/core/job-results-param-outputs-omit", # FIXME: https://github.com/crim-ca/weaver/issues/380 # f"{ogcapi_proc_core}/conf/core/job-results-param-outputs-response", @@ -216,20 +217,18 @@ def get_conformance(category, settings): f"{ogcapi_proc_core}/conf/core/process-execute-default-outputs", f"{ogcapi_proc_core}/conf/core/process-execute-input-array", f"{ogcapi_proc_core}/conf/core/process-execute-input-inline-bbox", - # FIXME: support byte/binary type (string + format:byte) ? - # f"{ogcapi_proc_core}/conf/core/process-execute-input-inline-binary", + f"{ogcapi_proc_core}/conf/core/process-execute-input-inline-binary", f"{ogcapi_proc_core}/conf/core/process-execute-input-inline-mixed", f"{ogcapi_proc_core}/conf/core/process-execute-input-inline-object", f"{ogcapi_proc_core}/conf/core/process-execute-input-validation", f"{ogcapi_proc_core}/conf/core/process-execute-inputs", - f"{ogcapi_proc_core}/conf/core/process-execute-input-validation", f"{ogcapi_proc_core}/conf/core/process-execute-op", f"{ogcapi_proc_core}/conf/core/process-execute-request", f"{ogcapi_proc_core}/conf/core/process-execute-success-async", - f"{ogcapi_proc_core}/conf/core/process-execute-sync-many-json", # FIXME: https://github.com/crim-ca/weaver/issues/18 # f"{ogcapi_proc_core}/conf/core/process-execute-sync-one", - # f"{ogcapi_proc_core}/conf/core/process-execute-sync-default-content", + f"{ogcapi_proc_core}/conf/core/process-execute-sync-default-content", + f"{ogcapi_proc_core}/conf/core/process-execute-sync-many-json", f"{ogcapi_proc_core}/conf/core/process-list", f"{ogcapi_proc_core}/conf/core/process-list-op", f"{ogcapi_proc_core}/conf/core/process-list-success", @@ -248,6 +247,8 @@ def get_conformance(category, settings): f"{ogcapi_proc_core}/conf/json/content", f"{ogcapi_proc_core}/conf/json/definition", f"{ogcapi_proc_core}/conf/job-list", + # FIXME: KVP exec (https://github.com/crim-ca/weaver/issues/607, https://github.com/crim-ca/weaver/issues/445) + # f"{ogcapi_proc_core}/conf/kvp-execute", f"{ogcapi_proc_core}/conf/oas30", # FIXME: https://github.com/crim-ca/weaver/issues/231 # List all supported requirements, recommendations and abstract tests @@ -274,14 +275,13 @@ def get_conformance(category, settings): f"{ogcapi_proc_core}/rec/core/http-head", f"{ogcapi_proc_core}/rec/core/job-status", f"{ogcapi_proc_core}/rec/core/job-results-async-many-json-prefer-none", - # FIXME: https://github.com/crim-ca/weaver/issues/414 - # f"{ogcapi_proc_core}/rec/core/job-results-async-many-json-prefer-minimal", - # f"{ogcapi_proc_core}/rec/core/job-results-async-many-json-prefer-representation", - # f"{ogcapi_proc_core}/per/core/job-results-async-many-other-formats", + f"{ogcapi_proc_core}/rec/core/job-results-async-many-json-prefer-minimal", + f"{ogcapi_proc_core}/rec/core/job-results-async-many-json-prefer-representation", + f"{ogcapi_proc_core}/per/core/job-results-async-many-other-formats", f"{ogcapi_proc_core}/rec/core/process-execute-sync-many-json-prefer-none", - # f"{ogcapi_proc_core}/rec/core/process-execute-sync-many-json-prefer-minimal", - # f"{ogcapi_proc_core}/rec/core/process-execute-sync-many-json-prefer-representation", - # f"{ogcapi_proc_core}/rec/core/link-header", + f"{ogcapi_proc_core}/rec/core/process-execute-sync-many-json-prefer-minimal", + f"{ogcapi_proc_core}/rec/core/process-execute-sync-many-json-prefer-representation", + f"{ogcapi_proc_core}/rec/core/link-header", f"{ogcapi_proc_core}/rec/core/ogc-process-description", f"{ogcapi_proc_core}/rec/core/problem-details", f"{ogcapi_proc_core}/rec/core/process-execute-handle-prefer", @@ -311,12 +311,19 @@ def get_conformance(category, settings): f"{ogcapi_proc_core}/req/core/job-results-failed", f"{ogcapi_proc_core}/req/core/job-results", f"{ogcapi_proc_core}/req/core/job-results-async-document", - # FIXME: support raw multipart (https://github.com/crim-ca/weaver/issues/376) - # f"{ogcapi_proc_core}/req/core/job-results-async-raw-mixed-multi", + f"{ogcapi_proc_core}/req/core/job-results-async-many", + # FIXME: /results/{id} (https://github.com/crim-ca/weaver/issues/18) + # f"{ogcapi_proc_core}/req/core/job-results-async-one", + f"{ogcapi_proc_core}/req/core/job-results-async-raw-mixed-multi", f"{ogcapi_proc_core}/req/core/job-results-async-raw-ref", - # f"{ogcapi_proc_core}/req/core/job-results-async-raw-value-multi", + f"{ogcapi_proc_core}/req/core/job-results-async-raw-value-multi", f"{ogcapi_proc_core}/req/core/job-results-async-raw-value-one", f"{ogcapi_proc_core}/req/core/job-results-success-sync", + # FIXME: results 'outputs' query parameter (https://github.com/crim-ca/weaver/issues/733) + # f"{ogcapi_proc_core}/req/core/job-results-param-outputs", + # f"{ogcapi_proc_core}/req/core/job-results-param-outputs-empty", + f"{ogcapi_proc_core}/req/core/job-results-param-outputs-omit", + # f"{ogcapi_proc_core}/req/core/job-results-param-outputs-response", f"{ogcapi_proc_core}/req/core/job-success", f"{ogcapi_proc_core}/req/core/landingpage-op", f"{ogcapi_proc_core}/req/core/landingpage-success", @@ -329,8 +336,7 @@ def get_conformance(category, settings): f"{ogcapi_proc_core}/req/core/process-execute-default-outputs", f"{ogcapi_proc_core}/req/core/process-execute-input-array", f"{ogcapi_proc_core}/req/core/process-execute-input-inline-bbox", - # FIXME: support byte/binary type (string + format:byte) ? - # f"{ogcapi_proc_core}/req/core/process-execute-input-inline-binary", + f"{ogcapi_proc_core}/req/core/process-execute-input-inline-binary", f"{ogcapi_proc_core}/req/core/process-execute-input-mixed-type", f"{ogcapi_proc_core}/req/core/process-execute-input-inline-object", f"{ogcapi_proc_core}/req/core/process-execute-input-validation", @@ -339,10 +345,9 @@ def get_conformance(category, settings): f"{ogcapi_proc_core}/req/core/process-execute-request", f"{ogcapi_proc_core}/req/core/process-execute-success-async", f"{ogcapi_proc_core}/req/core/process-execute-sync-document", - # f"{ogcapi_proc_core}/req/core/process-execute-sync-raw-mixed-multi", + f"{ogcapi_proc_core}/req/core/process-execute-sync-raw-mixed-multi", f"{ogcapi_proc_core}/req/core/process-execute-sync-raw-ref", - # FIXME: support raw multipart (https://github.com/crim-ca/weaver/issues/376) - # f"{ogcapi_proc_core}/req/core/process-execute-sync-raw-value-multi", + f"{ogcapi_proc_core}/req/core/process-execute-sync-raw-value-multi", f"{ogcapi_proc_core}/req/core/process-execute-sync-raw-value-one", f"{ogcapi_proc_core}/req/core/pl-limit-definition", f"{ogcapi_proc_core}/req/core/pl-limit-response", @@ -370,6 +375,26 @@ def get_conformance(category, settings): f"{ogcapi_proc_core}/req/job-list/processid-response", f"{ogcapi_proc_core}/req/job-list/type-definition", f"{ogcapi_proc_core}/req/job-list/type-response", + # FIXME: KVP exec (https://github.com/crim-ca/weaver/issues/607, https://github.com/crim-ca/weaver/issues/445) + # f"{ogcapi_proc_core}/req/kvp-execute", + # f"{ogcapi_proc_core}/req/kvp-execute/process-execute-op", + # f"{ogcapi_proc_core}/req/kvp-execute/f-definition", + # f"{ogcapi_proc_core}/req/kvp-execute/f-response", + # f"{ogcapi_proc_core}/req/kvp-execute/prefer-definition", + # f"{ogcapi_proc_core}/req/kvp-execute/input-query-parameters", + # f"{ogcapi_proc_core}/req/kvp-execute/input-query-parameter-values", + # f"{ogcapi_proc_core}/req/kvp-execute/string-input-value", + # f"{ogcapi_proc_core}/req/kvp-execute/numeric-input-value", + # f"{ogcapi_proc_core}/req/kvp-execute/boolean-input-value", + # f"{ogcapi_proc_core}/req/kvp-execute/complex-input-value", + # f"{ogcapi_proc_core}/req/kvp-execute/array-input-value", + # f"{ogcapi_proc_core}/req/kvp-execute/binary-input-value", + # f"{ogcapi_proc_core}/req/kvp-execute/binary-input-value-qualified", + # f"{ogcapi_proc_core}/req/kvp-execute/bbox-input-value", + # f"{ogcapi_proc_core}/req/kvp-execute/bbox-crs-input-value", + # f"{ogcapi_proc_core}/req/kvp-execute/input-by-reference", + # f"{ogcapi_proc_core}/req/kvp-execute/input-cardinality", + # f"{ogcapi_proc_core}/req/kvp-execute/output", f"{ogcapi_proc_core}/req/oas30", # OpenAPI 3.0 f"{ogcapi_proc_core}/req/oas30/completeness", f"{ogcapi_proc_core}/req/oas30/exceptions-codes", From 78e039db2c5df0cd8f776c72ae02664c0fc9e02c Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 9 Oct 2024 12:09:39 -0400 Subject: [PATCH 61/75] update docs about transmissionMode and filtered outputs (relates to #380) --- .../job_execute_outputs_transmission.json | 8 ++++ docs/source/processes.rst | 39 ++++++++++++------- weaver/wps_restapi/api.py | 3 +- 3 files changed, 33 insertions(+), 17 deletions(-) create mode 100644 docs/examples/job_execute_outputs_transmission.json diff --git a/docs/examples/job_execute_outputs_transmission.json b/docs/examples/job_execute_outputs_transmission.json new file mode 100644 index 000000000..331bf3c63 --- /dev/null +++ b/docs/examples/job_execute_outputs_transmission.json @@ -0,0 +1,8 @@ +{ + "inputs": {"<...>": "<...>"}, + "outputs": { + "output-default": {}, + "output-by-value": {"transmissionMode": "value"}, + "output-by-ref": {"transmissionMode": "reference"} + } +} diff --git a/docs/source/processes.rst b/docs/source/processes.rst index da09da485..6bb45073d 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -699,27 +699,36 @@ The ``outputs`` section defines, for each ``id`` available from the :term:`Proce report the produced outputs from a successful :term:`Job` execution. The method under which each output will be returned depends on the negotiated :ref:`proc_exec_mode` and :ref:`proc_exec_results`. -When an output corresponds to a file produced by the :term:`Application Package`, and stored locally, the -result will typically (unless requested otherwise), be exposed externally using the returned reference :term:`URL`. +When an output corresponds to a :ref:`File Reference ` produced by the :term:`Application Package`, +and stored locally, the result will typically (unless requested otherwise), be exposed externally using the returned +reference :term:`URL`. For outputs that correspond to literal data, such as plain strings or numbers, `Weaver` will typically prefer returning the ``value`` directly. However, alternate link representations can also be obtained if specified in the -execution request. +execution request, using ``transmissionMode`` overrides for the desired outputs. -When the ``outputs`` section is omitted, it simply means that the :term:`Process` to be executed should return all -outputs it offers in the created :ref:`Job Results `. In such case, because no representation modes -is specified for individual outputs, `Weaver` automatically selects ``reference`` for files as it makes all outputs -more easily accessible with distinct :term:`URL` afterwards, and ``values`` for literal data to obtain them directly. -If the ``outputs`` section is specified, but that one of the ``outputs`` defined in +When the ``outputs`` section is omitted, it simply means that the :term:`Process` to be executed +should return *all* outputs it offers in the created :ref:`Job Results `. +If the ``outputs`` section is specified, but that one of the *requested outputs* [#outN]_ defined in the :ref:`Process Description ` is not specified, this indicates that the :term:`Job` should -omit this output from the produced results. +omit this output from the produced results. When *requested outputs* are specified without any ``transmissionMode``, +the ``reference`` representation is used automatically for :ref:`File Reference ` as it makes all +outputs more easily accessible with distinct :term:`URL` afterwards, and ``value`` is used for literal data to +obtain them directly (inline in the response). Opposite ``value``/``reference`` representations can be requested +explicitly, for each respective output, using the ``transmissionMode`` as presented below. -.. fixme: -.. todo:: - Filtering of ``outputs`` not implemented (everything always available). - https://github.com/crim-ca/weaver/issues/380 +.. warning:: + When using ``outputs`` in the request body, one necessarily introduces filtering indications of results to + be returned. If *all* outputs are desired, some of which override ``transmissionMode`` and others letting + their representation auto-resolve, explicit ``{}`` mapping must be indicated to avoid filtering them out. + +.. literalinclude:: ../examples/job_execute_outputs_transmission.json + :language: json + :caption: Requesting Filtered Outputs with Transmission Mode Overrides + :name: job-execute-outputs-transmission -Other parameters presented in the above examples, namely ``mode`` and ``response`` are further detailed in -the following :ref:`proc_exec_mode` and :ref:`proc_exec_results` sections. +When ``transmissionMode`` is specified for a given output, its result representation will override any other +parameter that would otherwise affect its automatic or "informed" resolution of the output representation. +These parameters are further detailed in the following :ref:`proc_exec_mode` and :ref:`proc_exec_results` sections. .. _proc_exec_mode: diff --git a/weaver/wps_restapi/api.py b/weaver/wps_restapi/api.py index 7425399d8..8a8f80bd6 100644 --- a/weaver/wps_restapi/api.py +++ b/weaver/wps_restapi/api.py @@ -200,8 +200,7 @@ def get_conformance(category, settings): # f"{ogcapi_proc_core}/conf/core/job-results-param-outputs", # f"{ogcapi_proc_core}/conf/core/job-results-param-outputs-empty", f"{ogcapi_proc_core}/conf/core/job-results-param-outputs-omit", - # FIXME: https://github.com/crim-ca/weaver/issues/380 - # f"{ogcapi_proc_core}/conf/core/job-results-param-outputs-response", + f"{ogcapi_proc_core}/conf/core/job-results-param-outputs-response", f"{ogcapi_proc_core}/conf/core/job-results-success-sync", f"{ogcapi_proc_core}/conf/core/job-success", f"{ogcapi_proc_core}/conf/core/landingpage-op", From ef7670ef507b40005978427a9e9bbc1c3d0893c4 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 9 Oct 2024 12:28:18 -0400 Subject: [PATCH 62/75] add --output-filter CLI option (relates to #380) --- CHANGES.rst | 8 +++++--- weaver/cli.py | 16 +++++++++++----- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index d09e21024..d734c2e08 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -27,9 +27,11 @@ Changes: - Add support of ``Accept: multipart/*`` and ``Accept: multipart/mixed`` when submitting an execution to obtain the results as multiple parts embedded within the response contents. Parts are represented with their default data/link representation, unless overridden by corresponding ``transmissionMode`` per output ID. -- Add ``output_links``/``-oL`` parameter to Python client and CLI to retrieve ``Link`` headers as `Job` results. - Due to the multiple ``Link`` headers returned by `Job` results, this cannot be performed automatically without - the assumption of which ``rel`` links correspond to actual output IDs to extract. +- Add ``output_links``/``-oL``/``--output-link`` parameter to Python client and CLI to retrieve ``Link`` headers + as `Job` results. Due to the multiple ``Link`` headers returned by `Job` results, this cannot be performed + automatically without the assumption of which ``rel`` links correspond to actual output IDs to extract. +- Add ``output_filter``/``--oF``/``--output-filter`` parameter to Python client and CLI to indicate + any ``outputs`` to be filtered when submitting the `Process` execution. - Update documentation with a mapping of *Process Execution Results* according to submitted ``response`` body parameter (*OGC API - Processes v1.0*), the ``Prefer: return`` header (*OGC API - Processes v2.0*), the requested ``Accept`` header, diff --git a/weaver/cli.py b/weaver/cli.py index 729946e8e..990888107 100644 --- a/weaver/cli.py +++ b/weaver/cli.py @@ -2772,10 +2772,6 @@ def make_parser(): Example: ``-I message='Hello Weaver' -I value:int=1234 -I file:File=data.xml@mediaType=text/xml`` """) ) - # FIXME: allow filtering 'outputs' (https://github.com/crim-ca/weaver/issues/380) - # Only specified ones are returned, if none specified, return all. - # op_execute.add_argument( - # "-O", "--output", op_execute.add_argument( "-R", "--ref", "--reference", metavar="REFERENCE", dest="output_refs", action="append", help=inspect.cleandoc(""" @@ -2795,6 +2791,14 @@ def make_parser(): Example: ``-R output-one -R output-two`` """) ) + op_execute.add_argument( + "-oF", "--output-filter", metavar="OUTPUT", dest="output_filter", nargs=1, action="append", + help=( + "Output ID to be omitted in the submitted process execution. " + "Subsequent results of the corresponding job will omit the specified output in the responses. " + "The option Can be specified multiple times for multiple outputs to be filtered out." + ) + ) op_execute_output_context = op_execute.add_mutually_exclusive_group() op_execute_output_context.add_argument( "-oP", "--output-public", dest="output_context", const="public", action="store_const", @@ -2946,8 +2950,10 @@ def make_parser(): help="Output directory where to store downloaded files from job results if requested " "(default: ``${CURDIR}/{JobID}/``)." ) + # FIXME: support filtering outputs on 'jobs/{jobId}/results/{id}' (https://github.com/crim-ca/weaver/issues/18) + # reuse same '-oF' parameter as for 'outputs' submitted during 'execute' operation op_results.add_argument( - "-oL", "--output-link", dest="output_links", nargs="+", + "-oL", "--output-link", dest="output_links", nargs=1, action="append", help="Output IDs in 'Link' headers to retrieve as results for matching relationship ('rel') links." ) From cd055242c002cb22ad7b43c3a7efb792e26d8234 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 9 Oct 2024 13:53:45 -0400 Subject: [PATCH 63/75] update Preference-Applied header with requested Prefer return if applicable --- CHANGES.rst | 2 + tests/functional/test_wps_package.py | 108 +++++++++++++++++++++------ weaver/execute.py | 36 +++++++++ weaver/processes/execution.py | 17 ++++- weaver/wps_restapi/jobs/utils.py | 20 +++-- 5 files changed, 155 insertions(+), 28 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index d734c2e08..71632904c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -32,6 +32,8 @@ Changes: automatically without the assumption of which ``rel`` links correspond to actual output IDs to extract. - Add ``output_filter``/``--oF``/``--output-filter`` parameter to Python client and CLI to indicate any ``outputs`` to be filtered when submitting the `Process` execution. +- Update ``Preference-Applied`` header reported by execution responses to + include ``return=minimal`` or ``return=representation`` as applicable by the requested ``Prefer`` header. - Update documentation with a mapping of *Process Execution Results* according to submitted ``response`` body parameter (*OGC API - Processes v1.0*), the ``Prefer: return`` header (*OGC API - Processes v2.0*), the requested ``Accept`` header, diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index d79f04581..fdbda4999 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -3604,8 +3604,9 @@ def test_execute_single_output_prefer_header_return_representation_literal(self) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) + prefer_header = f"return={ExecuteReturnPreference.REPRESENTATION}, respond-async" exec_headers = { - "Prefer": f"return={ExecuteReturnPreference.REPRESENTATION}, respond-async" + "Prefer": prefer_header } exec_headers.update(self.json_headers) exec_content = { @@ -3628,6 +3629,8 @@ def test_execute_single_output_prefer_header_return_representation_literal(self) status_url = resp.json["location"] status = self.monitor_job(status_url, return_status=True) assert status["status"] == Status.SUCCEEDED + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") job_id = status["jobID"] results = self.app.get(f"/jobs/{job_id}/results") @@ -3647,8 +3650,9 @@ def test_execute_single_output_prefer_header_return_representation_complex(self) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) + prefer_header = f"return={ExecuteReturnPreference.REPRESENTATION}, respond-async" exec_headers = { - "Prefer": f"return={ExecuteReturnPreference.REPRESENTATION}, respond-async" + "Prefer": prefer_header } exec_headers.update(self.json_headers) exec_content = { @@ -3671,6 +3675,8 @@ def test_execute_single_output_prefer_header_return_representation_complex(self) status_url = resp.json["location"] status = self.monitor_job(status_url, return_status=True) assert status["status"] == Status.SUCCEEDED + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") job_id = status["jobID"] out_url = get_wps_output_url(self.settings) @@ -3696,8 +3702,9 @@ def test_execute_single_output_prefer_header_return_minimal_literal_accept_defau body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) + prefer_header = f"return={ExecuteReturnPreference.MINIMAL}; wait=5" # sync to allow direct content response exec_headers = { - "Prefer": f"return={ExecuteReturnPreference.MINIMAL}, wait=5", # sync to allow direct content response + "Prefer": prefer_header, "Accept": ContentType.ANY, "Content-Type": ContentType.APP_JSON, } @@ -3716,6 +3723,8 @@ def test_execute_single_output_prefer_header_return_minimal_literal_accept_defau resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") # rely on location that should be provided to find the job ID results_url = get_header("Content-Location", resp.headers) @@ -3745,9 +3754,10 @@ def test_execute_single_output_prefer_header_return_minimal_literal_accept_json( p_id = self.fully_qualified_test_process_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) - + + prefer_header = f"return={ExecuteReturnPreference.MINIMAL}, wait=5" # sync to allow direct content response exec_headers = { - "Prefer": f"return={ExecuteReturnPreference.MINIMAL}, wait=5", # sync to allow direct content response + "Prefer": prefer_header, "Accept": ContentType.APP_JSON, "Content-Type": ContentType.APP_JSON, } @@ -3766,6 +3776,8 @@ def test_execute_single_output_prefer_header_return_minimal_literal_accept_json( resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") # rely on location that should be provided to find the job ID results_url = get_header("Content-Location", resp.headers) @@ -3810,8 +3822,9 @@ def test_execute_single_output_prefer_header_return_minimal_complex_accept_defau body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) + prefer_header = f"return={ExecuteReturnPreference.MINIMAL}, wait=5" # sync to allow direct content response exec_headers = { - "Prefer": f"return={ExecuteReturnPreference.MINIMAL}, wait=5", # sync to allow direct content response + "Prefer": prefer_header, # omitting or specifying 'Accept' any must result the same (default link), # but test it is handled explicitly since the header would be "found" when parsing "Accept": ContentType.ANY, @@ -3832,6 +3845,8 @@ def test_execute_single_output_prefer_header_return_minimal_complex_accept_defau resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 204, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") # rely on location that should be provided to find the job ID results_url = get_header("Content-Location", resp.headers) @@ -3889,8 +3904,9 @@ def test_execute_single_output_prefer_header_return_minimal_complex_accept_json( body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) + prefer_header = f"return={ExecuteReturnPreference.MINIMAL}, wait=5" # sync to allow direct content response exec_headers = { - "Prefer": f"return={ExecuteReturnPreference.MINIMAL}, wait=5", # sync to allow direct content response + "Prefer": prefer_header, "Accept": ContentType.APP_JSON, "Content-Type": ContentType.APP_JSON, } @@ -3909,6 +3925,8 @@ def test_execute_single_output_prefer_header_return_minimal_complex_accept_json( resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") # rely on location that should be provided to find the job ID results_url = get_header("Content-Location", resp.headers) @@ -3993,8 +4011,9 @@ def test_execute_single_output_response_raw_value_complex(self): body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) + prefer_header = "respond-async" exec_headers = { - "Prefer": "respond-async" + "Prefer": prefer_header } exec_headers.update(self.json_headers) exec_content = { @@ -4013,6 +4032,8 @@ def test_execute_single_output_response_raw_value_complex(self): resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") # request status instead of results since not expecting 'document' JSON in this case status_url = resp.json["location"] @@ -4039,8 +4060,9 @@ def test_execute_single_output_response_raw_reference_literal(self): body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) + prefer_header = "respond-async" exec_headers = { - "Prefer": "respond-async" + "Prefer": prefer_header } exec_headers.update(self.json_headers) exec_content = { @@ -4059,6 +4081,8 @@ def test_execute_single_output_response_raw_reference_literal(self): resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") # request status instead of results since not expecting 'document' JSON in this case status_url = resp.json["location"] @@ -4095,8 +4119,9 @@ def test_execute_single_output_response_raw_reference_complex(self): body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) + prefer_header = "respond-async" exec_headers = { - "Prefer": "respond-async" + "Prefer": prefer_header } exec_headers.update(self.json_headers) exec_content = { @@ -4115,6 +4140,8 @@ def test_execute_single_output_response_raw_reference_complex(self): resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") # request status instead of results since not expecting 'document' JSON in this case status_url = resp.json["location"] @@ -4185,6 +4212,7 @@ def test_execute_single_output_multipart_accept_data(self): resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" not in resp.headers # rely on location that should be provided to find the job ID results_url = get_header("Content-Location", resp.headers) @@ -4260,6 +4288,7 @@ def test_execute_single_output_multipart_accept_link(self): resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" not in resp.headers # rely on location that should be provided to find the job ID results_url = get_header("Content-Location", resp.headers) @@ -4333,6 +4362,7 @@ def test_execute_single_output_multipart_accept_alt_format(self): resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" not in resp.headers # rely on location that should be provided to find the job ID results_url = get_header("Content-Location", resp.headers) @@ -4408,6 +4438,7 @@ def test_execute_single_output_response_document_alt_format_yaml(self): resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" not in resp.headers # rely on location that should be provided to find the job ID results_url = get_header("Content-Location", resp.headers) @@ -4487,6 +4518,7 @@ def test_execute_single_output_response_document_alt_format_json_raw_literal(sel resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" not in resp.headers # rely on location that should be provided to find the job ID results_url = get_header("Content-Location", resp.headers) @@ -4569,6 +4601,7 @@ def test_execute_single_output_response_document_default_format_json_special(sel resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" not in resp.headers # rely on location that should be provided to find the job ID results_url = get_header("Content-Location", resp.headers) @@ -4619,10 +4652,11 @@ def test_execute_multi_output_multipart_accept(self, multipart_header): # No 'response' nor 'Prefer: return' to ensure resolution is done by 'Accept' header # without 'Accept' using multipart, it is expected that JSON document is used # Also, use 'Prefer: wait' to avoid 'respond-async', since async always respond with the Job status. + prefer_header = "wait=5" exec_headers = { "Accept": multipart_header, "Content-Type": ContentType.APP_JSON, - "Prefer": "wait=5", + "Prefer": prefer_header, } exec_content = { "inputs": { @@ -4643,6 +4677,8 @@ def test_execute_multi_output_multipart_accept(self, multipart_header): resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") # rely on location that should be provided to find the job ID results_url = get_header("Content-Location", resp.headers) @@ -4746,10 +4782,11 @@ def test_execute_multi_output_multipart_accept_async_alt_acceptable(self): body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) + prefer_header = "respond-async" exec_headers = { "Accept": f"{ContentType.MULTIPART_MIXED}, {ContentType.APP_JSON}", "Content-Type": ContentType.APP_JSON, - "Prefer": "respond-async", + "Prefer": prefer_header, } exec_content = { "inputs": { @@ -4766,6 +4803,8 @@ def test_execute_multi_output_multipart_accept_async_alt_acceptable(self): assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" assert resp.content_type == ContentType.APP_JSON, "Expect JSON instead of Multipart because of error." assert "status" in resp.json, "Expected a JSON Job Status response." + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") def test_execute_multi_output_prefer_header_return_representation(self): proc = "EchoResultsTester" @@ -4773,8 +4812,9 @@ def test_execute_multi_output_prefer_header_return_representation(self): body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) + prefer_header = f"return={ExecuteReturnPreference.REPRESENTATION}, respond-async" exec_headers = { - "Prefer": f"return={ExecuteReturnPreference.REPRESENTATION}, respond-async", + "Prefer": prefer_header, "Content-Type": ContentType.APP_JSON, } exec_content = { @@ -4796,6 +4836,8 @@ def test_execute_multi_output_prefer_header_return_representation(self): resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") # request status instead of results since not expecting 'document' JSON in this case status_url = resp.json["location"] @@ -4845,8 +4887,9 @@ def test_execute_multi_output_response_raw_value(self): body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) + prefer_header = "respond-async" exec_headers = { - "Prefer": "respond-async" + "Prefer": prefer_header } exec_headers.update(self.json_headers) exec_content = { @@ -4866,6 +4909,8 @@ def test_execute_multi_output_response_raw_value(self): resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") # request status instead of results since not expecting 'document' JSON in this case status_url = resp.json["location"] @@ -4923,8 +4968,9 @@ def test_execute_multi_output_response_raw_reference_default_links(self): body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) + prefer_header = "respond-async" exec_headers = { - "Prefer": "respond-async" + "Prefer": prefer_header } exec_headers.update(self.json_headers) exec_content = { @@ -4944,6 +4990,8 @@ def test_execute_multi_output_response_raw_reference_default_links(self): resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") # request status instead of results since not expecting 'document' JSON in this case status_url = resp.json["location"] @@ -4998,10 +5046,11 @@ def test_execute_multi_output_response_raw_reference_accept_multipart(self): # No 'response' nor 'Prefer: return' to ensure resolution is done by 'Accept' header # without 'Accept' using multipart, it is expected that JSON document is used # Also, use 'Prefer: wait' to avoid 'respond-async', since async always respond with the Job status. + prefer_header = "wait=5" exec_headers = { "Accept": ContentType.MULTIPART_MIXED, "Content-Type": ContentType.APP_JSON, - "Prefer": "wait=5", + "Prefer": prefer_header, } exec_content = { "inputs": { @@ -5019,6 +5068,8 @@ def test_execute_multi_output_response_raw_reference_accept_multipart(self): resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") # rely on location that should be provided to find the job ID results_url = get_header("Content-Location", resp.headers) @@ -5070,8 +5121,9 @@ def test_execute_multi_output_response_raw_mixed(self): body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) + prefer_header = "respond-async" exec_headers = { - "Prefer": "respond-async" + "Prefer": prefer_header } exec_headers.update(self.json_headers) exec_content = { @@ -5092,6 +5144,8 @@ def test_execute_multi_output_response_raw_mixed(self): resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") # request status instead of results since not expecting 'document' JSON in this case status_url = resp.json["location"] @@ -5155,8 +5209,9 @@ def test_execute_multi_output_prefer_header_return_minimal_defaults(self): body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) + prefer_header = f"return={ExecuteReturnPreference.MINIMAL}, respond-async" exec_headers = { - "Prefer": f"return={ExecuteReturnPreference.MINIMAL}, respond-async" + "Prefer": prefer_header } exec_headers.update(self.json_headers) exec_content = { @@ -5178,6 +5233,8 @@ def test_execute_multi_output_prefer_header_return_minimal_defaults(self): resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") status_url = resp.json["location"] status = self.monitor_job(status_url, return_status=True) @@ -5220,8 +5277,9 @@ def test_execute_multi_output_prefer_header_return_minimal_override_transmission body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) + prefer_header = f"return={ExecuteReturnPreference.MINIMAL}, respond-async" exec_headers = { - "Prefer": f"return={ExecuteReturnPreference.MINIMAL}, respond-async" + "Prefer": prefer_header } exec_headers.update(self.json_headers) exec_content = { @@ -5244,6 +5302,8 @@ def test_execute_multi_output_prefer_header_return_minimal_override_transmission resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") status_url = resp.json["location"] status = self.monitor_job(status_url, return_status=True) @@ -5293,8 +5353,9 @@ def test_execute_multi_output_response_document_defaults(self): body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) + prefer_header = f"return={ExecuteReturnPreference.MINIMAL}, respond-async" exec_headers = { - "Prefer": f"return={ExecuteReturnPreference.MINIMAL}, respond-async" + "Prefer": prefer_header } exec_headers.update(self.json_headers) exec_content = { @@ -5316,6 +5377,8 @@ def test_execute_multi_output_response_document_defaults(self): resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") status_url = resp.json["location"] status = self.monitor_job(status_url, return_status=True) @@ -5354,8 +5417,9 @@ def test_execute_multi_output_response_document_mixed(self): body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) + prefer_header = "respond-async" exec_headers = { - "Prefer": "respond-async" + "Prefer": prefer_header } exec_headers.update(self.json_headers) exec_content = { @@ -5379,6 +5443,8 @@ def test_execute_multi_output_response_document_mixed(self): resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") status_url = resp.json["location"] status = self.monitor_job(status_url, return_status=True) diff --git a/weaver/execute.py b/weaver/execute.py index 37ca6f3eb..1e7f60c26 100644 --- a/weaver/execute.py +++ b/weaver/execute.py @@ -9,6 +9,7 @@ if TYPE_CHECKING: from typing import List, Optional, Tuple, Union + from weaver.datatype import Job from weaver.typedefs import AnyHeadersContainer, HeadersType, Literal ExecutionModeAutoType = Literal["auto"] @@ -220,3 +221,38 @@ def parse_prefer_header_execute_mode( if wait: # default used, not a supplied preference return ExecuteMode.SYNC, wait, {} return ExecuteMode.ASYNC, None, {} + + +def update_preference_applied_return_header( + job, # type: Job + request_headers, # type: Optional[AnyHeadersContainer] + response_headers, # type: Optional[AnyHeadersContainer] +): # type: (...) -> AnyHeadersContainer + """ + Updates the ``Preference-Applied`` header according to available information. + + :param job: Job where the desired return preference has be resolved. + :param request_headers: + :param response_headers: + :return: + """ + response_headers = response_headers or {} + + if not request_headers: + return response_headers + + request_prefer_return = parse_prefer_header_return(request_headers) + if not request_prefer_return: + return response_headers + + if job.execution_return != request_prefer_return: + return response_headers + + applied_prefer_header = get_header("Preference-Applied", response_headers) + if applied_prefer_header: + applied_prefer_header = f"return={request_prefer_return}; {applied_prefer_header}" + else: + applied_prefer_header = f"return={request_prefer_return}" + + response_headers.update({"Preference-Applied": applied_prefer_header}) + return response_headers diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index e12e5cd93..2ffae24de 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -1,3 +1,4 @@ +import copy import logging import os from time import sleep @@ -15,7 +16,12 @@ from weaver.database import get_db from weaver.datatype import Process, Service -from weaver.execute import ExecuteControlOption, ExecuteMode, parse_prefer_header_execute_mode +from weaver.execute import ( + ExecuteControlOption, + ExecuteMode, + parse_prefer_header_execute_mode, + update_preference_applied_return_header +) from weaver.formats import AcceptLanguage, ContentType, clean_media_type_format, map_cwl_media_type, repr_json from weaver.notify import map_job_subscribers, notify_job_subscribers from weaver.owsexceptions import OWSInvalidParameterValue, OWSNoApplicableCode @@ -795,6 +801,7 @@ def submit_job_handler(payload, # type: ProcessExecution is_execute_async = mode != ExecuteMode.SYNC accept_type = validate_job_accept_header(headers, mode) exec_resp, exec_return = get_job_return(job=None, body=json_body, headers=headers) # job 'None' since still parsing + req_headers = copy.deepcopy(headers or {}) get_header("prefer", headers, pop=True) # don't care about value, just ensure removed with any header container subscribers = map_job_subscribers(json_body, settings) @@ -826,7 +833,12 @@ def submit_job_handler(payload, # type: ProcessExecution # when sync is successful, it must return the results direct instead of status info # see: https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execute_response if job.status == Status.SUCCEEDED: - return get_job_results_response(job, headers=resp_headers, container=settings) + return get_job_results_response( + job, + request_headers=req_headers, + response_headers=resp_headers, + container=settings, + ) # otherwise return the error status body = job.json(container=settings) body["location"] = location_url @@ -849,6 +861,7 @@ def submit_job_handler(payload, # type: ProcessExecution "status": map_status(Status.ACCEPTED), "location": location_url } + resp_headers = update_preference_applied_return_header(job, req_headers, resp_headers) resp = get_job_submission_response(body, resp_headers) return resp diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 4e0835f88..b646c83a0 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -32,7 +32,13 @@ ServiceNotAccessible, ServiceNotFound ) -from weaver.execute import ExecuteResponse, ExecuteReturnPreference, ExecuteTransmissionMode, parse_prefer_header_return +from weaver.execute import ( + ExecuteResponse, + ExecuteReturnPreference, + ExecuteTransmissionMode, + parse_prefer_header_return, + update_preference_applied_return_header +) from weaver.formats import ContentEncoding, ContentType, get_format, repr_json from weaver.owsexceptions import OWSNoApplicableCode, OWSNotFound from weaver.processes.constants import JobInputsOutputsSchema @@ -517,7 +523,8 @@ def get_job_results_response( job, # type: Job *, # force named keyword arguments after container, # type: AnySettingsContainer - headers=None, # type: Optional[AnyHeadersContainer] + request_headers=None, # type: Optional[AnyHeadersContainer] + response_headers=None, # type: Optional[AnyHeadersContainer] results_headers=None, # type: Optional[AnyHeadersContainer] results_contents=None, # type: Optional[JSON] ): # type: (...) -> AnyResponseType @@ -554,7 +561,8 @@ def get_job_results_response( :param job: Job for which to generate the results response, which contains the originally submitted parameters. :param container: Application settings. - :param headers: Additional headers to provide in the response. + :param request_headers: Original headers submitted to the request that leads to this response. + :param response_headers: Additional headers to provide in the response. :param results_headers: Headers that override originally submitted job parameters when requesting results. :param results_contents: Body contents that override originally submitted job parameters when requesting results. """ @@ -567,7 +575,6 @@ def get_job_results_response( # - test_execute_single_output_response_document_alt_format_yaml # - test_execute_single_output_multipart_accept_alt_format - # FIXME: remove any 'refs' not needed anymore results, _ = get_results( job, container, value_key="value", @@ -577,7 +584,7 @@ def get_job_results_response( link_references=False, ) - headers = ResponseHeaders(headers or {}) + headers = ResponseHeaders(response_headers or {}) headers.pop("Location", None) headers.setdefault("Content-Location", job.results_url(container)) for link in job.links(container, self_link="results"): @@ -600,6 +607,9 @@ def get_job_results_response( not is_rep and ContentType.APP_JSON not in job.accept_type # alternative way to request 'minimal'/'document' ) + headers = update_preference_applied_return_header(job, request_headers, headers) + + # document/minimal response if not is_raw and not is_accept_multipart and not is_single_output_minimal: try: results_schema = sd.ResultsDocument() From f69406d22440f2df238375c9e68b6b1d4aab3ce3 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 9 Oct 2024 13:55:58 -0400 Subject: [PATCH 64/75] update missing docstring args --- weaver/execute.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/weaver/execute.py b/weaver/execute.py index 1e7f60c26..4f893593f 100644 --- a/weaver/execute.py +++ b/weaver/execute.py @@ -232,9 +232,9 @@ def update_preference_applied_return_header( Updates the ``Preference-Applied`` header according to available information. :param job: Job where the desired return preference has be resolved. - :param request_headers: - :param response_headers: - :return: + :param request_headers: Original request headers, to look for any ``Prefer: return``. + :param response_headers: Already generated response headers, to extend ``Preference-Applied`` header as needed. + :return: Updated response headers with any resolved return preference. """ response_headers = response_headers or {} From ae9e7a3b1893b1ba83fb98548c48b72ecfdd1a33 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 9 Oct 2024 13:57:39 -0400 Subject: [PATCH 65/75] fix linting --- tests/functional/test_wps_package.py | 2 +- weaver/cli.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index fdbda4999..7ed6afc2c 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -3754,7 +3754,7 @@ def test_execute_single_output_prefer_header_return_minimal_literal_accept_json( p_id = self.fully_qualified_test_process_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) - + prefer_header = f"return={ExecuteReturnPreference.MINIMAL}, wait=5" # sync to allow direct content response exec_headers = { "Prefer": prefer_header, diff --git a/weaver/cli.py b/weaver/cli.py index 990888107..152bd85d4 100644 --- a/weaver/cli.py +++ b/weaver/cli.py @@ -2953,7 +2953,7 @@ def make_parser(): # FIXME: support filtering outputs on 'jobs/{jobId}/results/{id}' (https://github.com/crim-ca/weaver/issues/18) # reuse same '-oF' parameter as for 'outputs' submitted during 'execute' operation op_results.add_argument( - "-oL", "--output-link", dest="output_links", nargs=1, action="append", + "-oL", "--output-link", dest="output_links", nargs=1, action="append", help="Output IDs in 'Link' headers to retrieve as results for matching relationship ('rel') links." ) From cd0d474b46b617d4cc97ea5b80ff404e68ff2fb9 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 9 Oct 2024 15:17:58 -0400 Subject: [PATCH 66/75] ignore false-postivie doc8 D000 for indirect references (relates to https://github.com/PyCQA/doc8/issues/171) --- setup.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.cfg b/setup.cfg index 5175060cf..4b450a14b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -108,6 +108,8 @@ exclude = [doc8] max-line-length = 120 ignore-path = docs/build,docs/source/autoapi +# FIXME: ignore false positives (https://github.com/PyCQA/doc8/issues/171) +ignore-path-errors = docs/source/processes.rst;D000, [docformatter] recursive = true From b3a7ca1e42daf12ecb0a825ed1c203eb48f2c6dd Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 9 Oct 2024 18:46:52 -0400 Subject: [PATCH 67/75] update doc8>=1.1.2 to allow 'ignore-path-errors' option --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index b915739b2..5125204be 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -8,7 +8,7 @@ bandit bump2version codacy-coverage coverage -doc8>=0.8.1 +doc8>=1.1.2 docformatter>=1.5.0 # add support of config file flake8<6 # FIXME: false positive redefinition (https://github.com/PyCQA/pyflakes/issues/757) flynt From 8078839f5f1cdfdc190ef74e60ceb84190c57782 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 9 Oct 2024 19:00:45 -0400 Subject: [PATCH 68/75] update sphinx>7 in docs requirements to avoid conflict resolution against doc8 --- requirements-doc.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-doc.txt b/requirements-doc.txt index 6d3b75a6d..9aaae95cf 100644 --- a/requirements-doc.txt +++ b/requirements-doc.txt @@ -4,7 +4,7 @@ # (see 'docs/source/conf.py') -r requirements.txt jinja2 -sphinx>=6,<8 +sphinx>=7,<8 sphinx-argparse!=0.5.0 sphinx-autoapi>=1.7.0 sphinx-paramlinks>=0.4.1 From 3ae49ed2b95f1f8b5d4c56e2d11b379181e83cf8 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 9 Oct 2024 19:05:48 -0400 Subject: [PATCH 69/75] unpin sphinx_rtd_theme to allow sphinx/docutils compatibility --- requirements-doc.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-doc.txt b/requirements-doc.txt index 9aaae95cf..d80938fcb 100644 --- a/requirements-doc.txt +++ b/requirements-doc.txt @@ -11,4 +11,4 @@ sphinx-paramlinks>=0.4.1 # adds redoc OpenAPI directly served on readthedocs sphinxcontrib-redoc>=1.6.0 sphinx_autodoc_typehints[type_comment]>=1.19 -sphinx_rtd_theme>=1.3.0,<2 +sphinx_rtd_theme>=1.3.0 From 2eca439dc0c1a9a17c40e53fd086d1aba0a34cf4 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 9 Oct 2024 19:49:20 -0400 Subject: [PATCH 70/75] docs and linting fixes --- docs/_static/custom.css | 2 +- docs/_templates/autoapi/index.rst | 4 +--- docs/source/package.rst | 8 ++++---- docs/source/processes.rst | 5 ++++- weaver/cli.py | 1 - 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/_static/custom.css b/docs/_static/custom.css index 39f14d7f0..d6af635de 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -32,7 +32,7 @@ div[class^="highlight"] { } .table-exec-results thead { - background-color: #cccccc; + background-color: #CCCCCC; } .table-exec-results thead tr:nth-child(1) > th:nth-child(1), diff --git a/docs/_templates/autoapi/index.rst b/docs/_templates/autoapi/index.rst index ef9db0074..bde2180d1 100644 --- a/docs/_templates/autoapi/index.rst +++ b/docs/_templates/autoapi/index.rst @@ -8,8 +8,6 @@ This page contains reference documentation of the source code. .. toctree:: :titlesonly: - {% for page in pages %} - {% if page.top_level_object and page.display %} + {% for page in pages|selectattr("is_top_level_object") %} {{ page.include_path }} - {% endif %} {% endfor %} diff --git a/docs/source/package.rst b/docs/source/package.rst index 6aa34db0a..a41ace285 100644 --- a/docs/source/package.rst +++ b/docs/source/package.rst @@ -138,7 +138,7 @@ definitions to the :term:`I/O`, since those will generally be missing descriptiv For pure Python scripts not using |python-argparse|_, the |scriptcwl|_ utility can be considered instead. .. seealso:: - For Python code embedded in |jupyter-notebooks|_, refer to :ref:`app_pkg_jupyter_notebook` for more details. + For Python code embedded in a |jupyter-notebook|_, refer to :ref:`app_pkg_jupyter_notebook` for more details. .. |python-argparse| replace:: ``argparse`` .. _python-argparse: https://docs.python.org/3/library/argparse.html @@ -688,7 +688,7 @@ specific types will be presented in :ref:`cwl-type` and :ref:`cwl-dir` sections. | | | ``uri``, ``url``, | | | | | etc.) [#note5]_ | | +----------------------+-------------------------+------------------------+--------------------------------------------+ -| ``File`` | ``BoundingBox`` | :term:`JSON` [#note6]_ | Partial support available[#bbox-note]_. | +| ``File`` | ``BoundingBox`` | :term:`JSON` [#note6]_ | Partial support available [#noteBBOX]_. | +----------------------+-------------------------+------------------------+--------------------------------------------+ | ``File`` | ``Complex`` | :term:`JSON` [#note6]_ | :ref:`File Reference ` | | | | | with Media-Type validation and staging | @@ -722,7 +722,7 @@ specific types will be presented in :ref:`cwl-type` and :ref:`cwl-dir` sections. .. [#note6] Specific schema required as described in :ref:`oas_json_types`. -.. [#bbox-note] +.. [#noteBBOX] The :term:`WPS` data type ``BoundingBox`` has a schema definition in :term:`WPS` and :term:`OAS` contexts, but is not handled natively by :term:`CWL` types. When the conversion to a :term:`CWL` job occurs, an equivalent ``Complex`` type using a :term:`CWL` ``File`` with ``format: ogc-bbox`` and the contents stored as :term:`JSON` is @@ -757,7 +757,7 @@ expected to be a file reference. } A combination of ``supportedCRS`` objects providing ``crs`` references would -otherwise indicate a ``BoundingBox`` :term:`I/O` (see :ref:`note `). +otherwise indicate a ``BoundingBox`` :term:`I/O` (see :ref:`note `). .. code-block:: json :caption: WPS BoundingBox Data Type diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 6bb45073d..3d204ef04 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -408,6 +408,9 @@ Register a New Process (Deploy) Deployment of a new process is accomplished through the ``POST {WEAVER_URL}/processes`` |deploy-req|_ request. +.. seealso:: + |ogc-api-proc-part2|_ specification. + The request body requires mainly two components: - | ``processDescription``: @@ -1994,6 +1997,7 @@ of the polling-based method on the :ref:`Job Status ` endpoint o .. seealso:: Refer to the |oas-rtd|_ of the |exec-req|_ request for all available ``subscribers`` properties. +.. _proc_op_job_status: .. _proc_op_status: .. _proc_op_monitor: @@ -2033,7 +2037,6 @@ format is employed according to the chosen location. .. seealso:: For the :term:`WPS` endpoint, refer to :ref:`conf_settings`. -.. _proc_op_job_status: .. fixme: add example .. fixme: describe minimum fields and extra fields diff --git a/weaver/cli.py b/weaver/cli.py index 152bd85d4..d43272f54 100644 --- a/weaver/cli.py +++ b/weaver/cli.py @@ -742,7 +742,6 @@ def deploy(self, .. seealso:: - :ref:`proc_op_deploy` - - |ogc-api-proc-part2|_ :param process_id: Desired process identifier. From 6f7aba71763201132d840caaf33e0d3e5bbdc5de Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 9 Oct 2024 23:52:21 -0400 Subject: [PATCH 71/75] fix CLI test --- weaver/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/weaver/cli.py b/weaver/cli.py index d43272f54..d4a9cf604 100644 --- a/weaver/cli.py +++ b/weaver/cli.py @@ -2791,7 +2791,7 @@ def make_parser(): """) ) op_execute.add_argument( - "-oF", "--output-filter", metavar="OUTPUT", dest="output_filter", nargs=1, action="append", + "-oF", "--output-filter", metavar="OUTPUT", dest="output_filter", nargs=1, help=( "Output ID to be omitted in the submitted process execution. " "Subsequent results of the corresponding job will omit the specified output in the responses. " @@ -2952,7 +2952,7 @@ def make_parser(): # FIXME: support filtering outputs on 'jobs/{jobId}/results/{id}' (https://github.com/crim-ca/weaver/issues/18) # reuse same '-oF' parameter as for 'outputs' submitted during 'execute' operation op_results.add_argument( - "-oL", "--output-link", dest="output_links", nargs=1, action="append", + "-oL", "--output-link", dest="output_links", nargs=1, help="Output IDs in 'Link' headers to retrieve as results for matching relationship ('rel') links." ) From d1ab8ba385c5666751f7f1183506dc3399543c0e Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 11 Oct 2024 11:18:19 -0400 Subject: [PATCH 72/75] fix output results S3 bucket missing output ID in path --- CHANGES.rst | 1 + tests/functional/test_wps_package.py | 113 ++++++++++++++++++++++++++- tests/functional/utils.py | 5 ++ weaver/processes/wps_package.py | 16 ++-- 4 files changed, 129 insertions(+), 6 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 890e34b7e..aa9d53b3a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -55,6 +55,7 @@ Fixes: (fixes `##620 `_). - Add the appropriate HTTP error type to respect ``/conf/dru/deploy/unsupported-content-type`` (fixes `#624 `_). +- Fix S3 bucket storage for result file missing the output ID in the path to match local WPS output storage structure. .. _changes_5.9.0: diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 7ed6afc2c..bfba6106f 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -5712,7 +5712,7 @@ def test_execute_with_directory_output(self): # check that outputs are S3 bucket references output_bucket = self.settings["weaver.wps_output_s3_bucket"] - output_loc = results["output_dir"]["href"] + output_loc = results[output_id]["href"] output_ref = f"{output_bucket}/{job_id}/{output_id}/" output_key_base = f"{job_id}/{output_id}/" output_ref_abbrev = f"s3://{output_ref}" @@ -5762,3 +5762,114 @@ def test_execute_with_directory_output(self): assert not os.path.exists(os.path.join(wps_outdir, job_id, output_id, out_file)) assert not os.path.exists(os.path.join(wps_outdir, wps_uuid, output_id, out_file)) assert os.path.isfile(os.path.join(wps_outdir, f"{job_id}.xml")) + + @mocked_aws_config + @mocked_aws_s3 + @setup_aws_s3_bucket(bucket="wps-output-test-bucket") + def test_execute_with_result_representations(self): + """ + Test that an output file stored in an AWS bucket can be retrieved as per their requested ``transmissionMode``. + + .. versionadded:: 6.0 + """ + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_process_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + with contextlib.ExitStack() as stack: + exec_body = { + "mode": ExecuteMode.SYNC, + "response": ExecuteResponse.DOCUMENT, + "inputs": {"message": "test data in bucket"}, + "outputs": { + "output_json": {"transmissionMode": ExecuteTransmissionMode.VALUE}, + "output_text": {"transmissionMode": ExecuteTransmissionMode.REFERENCE}, + }, + } + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + proc_url = f"/processes/{p_id}/execution" + resp = mocked_sub_requests(self.app, "post_json", proc_url, timeout=5, + data=exec_body, headers=self.json_headers, only_local=True) + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" + assert resp.content_type == ContentType.APP_JSON + + # rely on location that should be provided to find the job ID + results = resp.json + results_url = get_header("Content-Location", resp.headers) + assert results_url, ( + "Content-Location should have been provided in" + "results response pointing at where they can be found." + ) + job_id = results_url.rsplit("/results")[0].rsplit("/jobs/")[-1] + assert is_uuid(job_id), f"Failed to retrieve the job ID: [{job_id}] is not a UUID" + + out_path = f"/jobs/{job_id}/outputs" + out_params = {"schema": JobInputsOutputsSchema.OGC_STRICT} + out_resp = self.app.get(out_path, headers=self.json_headers, params=out_params) + outputs = out_resp.json + + # check that outputs by reference are S3 bucket references + # for 'outputs' endpoint, reference always expected for File type + # for 'results' endpoint, only the output requested by reference 'transmissionMode' is expected + for output_id, output_file, outputs_doc in [ + ("output_text", "result.txt", results), + ("output_text", "result.txt", outputs["outputs"]), + ("output_json", "result.json", outputs["outputs"]), + ]: + output_bucket = self.settings["weaver.wps_output_s3_bucket"] + output_loc = outputs_doc[output_id]["href"] + output_key = f"{job_id}/{output_id}/{output_file}" + output_ref = f"{output_bucket}/{output_key}" + output_ref_abbrev = f"s3://{output_ref}" + output_ref_full = f"https://s3.{MOCK_AWS_REGION}.amazonaws.com/{output_ref}" + output_ref_any = [output_ref_abbrev, output_ref_full] # allow any variant weaver can parse + assert output_loc in output_ref_any + + # check that result by 'transmissionMode' value is not a reference, but the contents + assert "output_json" in results + assert "value" in results["output_json"] + assert "href" not in results["output_json"] + assert results["output_json"]["value"] == {"data": "test data in bucket"} + assert results["output_json"]["mediaType"] == ContentType.APP_JSON + + # check that outputs are indeed stored in S3 buckets + mocked_s3 = boto3.client("s3", region_name=MOCK_AWS_REGION) + resp_json = mocked_s3.list_objects_v2(Bucket=output_bucket) + bucket_file_info = {obj["Key"]: obj for obj in resp_json["Contents"]} + expect_out_files = { + f"{job_id}/{output_id}/{output_file}": out_type + for output_id, output_file, out_type + in [ + ("output_json", "result.json", ContentType.APP_JSON), + ("output_text", "result.txt", ContentType.TEXT_PLAIN), + ] + } + assert resp_json["Name"] == output_bucket + assert len(bucket_file_info) == len(expect_out_files), "No extra files expected." + assert all(out_file in bucket_file_info for out_file in expect_out_files) + + # validate that common file extensions could be detected and auto-populated the Content-Type + # (information not available in 'list_objects_v2', so fetch each file individually + for out_file, out_type in expect_out_files.items(): + out_info = mocked_s3.head_object(Bucket=output_bucket, Key=out_file) + assert out_info["ContentType"] == out_type + + # check that outputs are NOT copied locally, but that XML status does exist + # counter validate path with file always present to ensure outputs are not 'missing' because of wrong dir + wps_uuid = str(self.job_store.fetch_by_id(job_id).wps_id) + wps_outdir = self.settings["weaver.wps_output_dir"] + # NOTE: exception for 'output_json' since by-value representation forces it to retrieve it locally + exception_id = ["output_json"] + for out_file in list(expect_out_files): + out_path, out_name = os.path.split(out_file) + _, out_id = os.path.split(out_path) + assert not os.path.exists(os.path.join(wps_outdir, out_name)) + assert not os.path.exists(os.path.join(wps_outdir, job_id, out_name)) + assert not os.path.exists(os.path.join(wps_outdir, wps_uuid, out_name)) + assert not os.path.exists(os.path.join(wps_outdir, out_id, out_name)) + if out_id not in exception_id: + assert not os.path.exists(os.path.join(wps_outdir, job_id, out_id, out_name)) + assert not os.path.exists(os.path.join(wps_outdir, wps_uuid, out_id, out_name)) + assert os.path.isfile(os.path.join(wps_outdir, f"{job_id}.xml")) diff --git a/tests/functional/utils.py b/tests/functional/utils.py index 5e4a0583e..2eaea6880 100644 --- a/tests/functional/utils.py +++ b/tests/functional/utils.py @@ -447,6 +447,11 @@ def fully_qualified_test_process_name(self, name=""): test_name = test_name.replace(".", "-").replace("-_", "_").replace("_-", "-") return test_name + @overload + def monitor_job(self, status_url, **__): + # type: (str, **Any) -> ExecutionResults + ... + @overload def monitor_job(self, status_url, return_status=False, **__): # type: (str, Literal[True], **Any) -> JobStatusResponse diff --git a/weaver/processes/wps_package.py b/weaver/processes/wps_package.py index cb1361843..8b15fe818 100644 --- a/weaver/processes/wps_package.py +++ b/weaver/processes/wps_package.py @@ -2610,7 +2610,7 @@ def make_location_output(self, cwl_result, output_id, index=None): # Therefore, preemptively override "ComplexOutput._storage" to whichever location according to use case. # Override builder per output to allow distinct S3/LocalFile for it and XML status that should remain local. storage_type = STORE_TYPE.S3 if s3_bucket else STORE_TYPE.PATH - storage = self.make_location_storage(storage_type, result_type) + storage = self.make_location_storage(storage_type, result_type, output_id) self.response.outputs[output_id]._storage = storage # noqa: W0212 # pywps will resolve file paths for us using its WPS request UUID @@ -2703,13 +2703,14 @@ def resolve_output_format(output, result_path, result_cwl_format): if output.valid_mode != MODE.NONE and output.validator is emptyvalidator: output.data_format.validate = format_extension_validator - def make_location_storage(self, storage_type, location_type): - # type: (STORE_TYPE, PACKAGE_COMPLEX_TYPES) -> Union[FileStorage, S3Storage, DirectoryNestedStorage] + def make_location_storage(self, storage_type, location_type, output_id): + # type: (STORE_TYPE, PACKAGE_COMPLEX_TYPES, str) -> Union[FileStorage, S3Storage, DirectoryNestedStorage] """ Generates the relevant storage implementation with requested types and references. :param storage_type: Where to store the outputs. :param location_type: Type of output as defined by CWL package type. + :param output_id: expected output identifier that will employ this storage. :return: Storage implementation. """ if location_type == PACKAGE_FILE_TYPE and storage_type == STORE_TYPE.PATH: @@ -2730,10 +2731,15 @@ def make_location_storage(self, storage_type, location_type): output_prefix = self.job.result_path(job_id=output_job_id) # pylint: disable=attribute-defined-outside-init # references to nested storage dynamically created if storage_type == STORE_TYPE.S3: - storage.prefix = output_prefix + # when using S3 storage, the 'prefix' is directly employed with the file name + # however, we want results to be nested under their output ID + # therefore, preemptively adjust the prefix to do as such + storage.prefix = os.path.join(output_prefix, output_id) else: + # when using other storage than S3, the 'target' is automatically built using a join + # of 'prefix' and the output ID stored in the parent result object containing this storage storage.target = os.path.join(storage.target, output_prefix) - storage.output_url = os.path.join(storage.output_url, output_prefix) + storage.output_url = os.path.join(str(storage.output_url), output_prefix) os.makedirs(storage.target, exist_ok=True) # pywps handles Job UUID dir creation, but not nested dirs return storage From 0fef2b693565936173005636451ca36dec35d17c Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 11 Oct 2024 13:48:14 -0400 Subject: [PATCH 73/75] fix missing test combinations/coverage for prefer header execution mode parsing --- tests/test_execute.py | 10 +++++++++- weaver/execute.py | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/test_execute.py b/tests/test_execute.py index d942fed31..69f8685f0 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -9,6 +9,7 @@ @pytest.mark.parametrize( ["headers", "support", "expected", "extra_prefer"], [ + ({}, [], (ExecuteMode.ASYNC, None, {}), ""), # both modes supported (sync attempted upto max/specified wait time, unless async requested explicitly) ({}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], (ExecuteMode.SYNC, 10, {}), ""), # only supported async (enforced) - original behaviour @@ -19,6 +20,9 @@ for (_headers, _support, _expected), _extra in itertools.product( [ + # no mode + ({"Prefer": "respond-async, wait=4"}, [], + (ExecuteMode.ASYNC, None, {})), # both modes supported (sync attempted upto max/specified wait time, unless async requested explicitly) ({"Prefer": ""}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], (ExecuteMode.SYNC, 10, {})), @@ -39,7 +43,11 @@ (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), ({"Prefer": "wait=4"}, [ExecuteControlOption.ASYNC], (ExecuteMode.ASYNC, None, {})), - + # only supported sync (enforced) + ({"Prefer": "wait=4"}, [ExecuteControlOption.SYNC], + (ExecuteMode.SYNC, 4, {"Preference-Applied": "wait=4"})), + ({"Prefer": "respond-async"}, [ExecuteControlOption.SYNC], + (ExecuteMode.SYNC, 10, {})), # 10 is weaver default if not configured otherwise ], [ "", diff --git a/weaver/execute.py b/weaver/execute.py index 4f893593f..01c0b7581 100644 --- a/weaver/execute.py +++ b/weaver/execute.py @@ -198,7 +198,7 @@ def parse_prefer_header_execute_mode( # supported mode is enforced, only indicate if it matches preferences to honour them # otherwise, server is allowed to discard preference since it cannot be honoured mode = ExecuteMode.ASYNC if supported_modes[0] == ExecuteControlOption.ASYNC else ExecuteMode.SYNC - wait = None if mode == ExecuteMode.ASYNC else wait_max + wait = None if mode == ExecuteMode.ASYNC else wait if auto == mode: if auto == ExecuteMode.ASYNC: applied_preferences.append("respond-async") From a4544a35b41ad121fb20ff90ffc3f098c2084add Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 11 Oct 2024 14:03:05 -0400 Subject: [PATCH 74/75] fix output result locations for S3 storage --- tests/functional/test_wps_package.py | 18 +++++++++++------- weaver/processes/wps_package.py | 8 ++++++-- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index bfba6106f..cdbbcd10d 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -5611,32 +5611,36 @@ def test_execute_application_package_process_with_bucket_results(self): # check that outputs are S3 bucket references output_values = {out["id"]: get_any_value(out) for out in outputs["outputs"]} output_bucket = self.settings["weaver.wps_output_s3_bucket"] + output_files = [("output_from_s3", input_file_s3), ("output_from_http", input_file_http)] wps_uuid = str(self.job_store.fetch_by_id(job_id).wps_id) - for out_key, out_file in [("output_from_s3", input_file_s3), ("output_from_http", input_file_http)]: - output_ref = f"{output_bucket}/{wps_uuid}/{out_file}" + for out_id, out_file in output_files: + output_ref = f"{output_bucket}/{wps_uuid}/{out_id}/{out_file}" output_ref_abbrev = f"s3://{output_ref}" output_ref_full = f"https://s3.{MOCK_AWS_REGION}.amazonaws.com/{output_ref}" output_ref_any = [output_ref_abbrev, output_ref_full] # allow any variant weaver can parse # validation on outputs path - assert output_values[out_key] in output_ref_any + assert output_values[out_id] in output_ref_any # validation on results path - assert results[out_key]["href"] in output_ref_any + assert results[out_id]["href"] in output_ref_any # check that outputs are indeed stored in S3 buckets mocked_s3 = boto3.client("s3", region_name=MOCK_AWS_REGION) resp_json = mocked_s3.list_objects_v2(Bucket=output_bucket) bucket_file_keys = [obj["Key"] for obj in resp_json["Contents"]] - for out_file in [input_file_s3, input_file_http]: - out_key = f"{job_id}/{out_file}" + for out_id, out_file in output_files: + out_key = f"{job_id}/{out_id}/{out_file}" assert out_key in bucket_file_keys # check that outputs are NOT copied locally, but that XML status does exist # counter validate path with file always present to ensure outputs are not 'missing' just because of wrong dir wps_outdir = self.settings["weaver.wps_output_dir"] - for out_file in [input_file_s3, input_file_http]: + for out_id, out_file in output_files: assert not os.path.exists(os.path.join(wps_outdir, out_file)) assert not os.path.exists(os.path.join(wps_outdir, job_id, out_file)) assert not os.path.exists(os.path.join(wps_outdir, wps_uuid, out_file)) + assert not os.path.exists(os.path.join(wps_outdir, out_id, out_file)) + assert not os.path.exists(os.path.join(wps_outdir, job_id, out_id, out_file)) + assert not os.path.exists(os.path.join(wps_outdir, wps_uuid, out_id, out_file)) assert os.path.isfile(os.path.join(wps_outdir, f"{job_id}.xml")) @pytest.mark.skip(reason="OAS execute parse/validate values not implemented") diff --git a/weaver/processes/wps_package.py b/weaver/processes/wps_package.py index 8b15fe818..e3ee684ca 100644 --- a/weaver/processes/wps_package.py +++ b/weaver/processes/wps_package.py @@ -2732,9 +2732,13 @@ def make_location_storage(self, storage_type, location_type, output_id): # pylint: disable=attribute-defined-outside-init # references to nested storage dynamically created if storage_type == STORE_TYPE.S3: # when using S3 storage, the 'prefix' is directly employed with the file name - # however, we want results to be nested under their output ID + # results should be nested under their output ID to allow arrays and alternate types # therefore, preemptively adjust the prefix to do as such - storage.prefix = os.path.join(output_prefix, output_id) + # however, do not do it for the case of directories, since the output ID is already the directory itself + if location_type == PACKAGE_DIRECTORY_TYPE: + storage.prefix = output_prefix + else: + storage.prefix = os.path.join(output_prefix, output_id) else: # when using other storage than S3, the 'target' is automatically built using a join # of 'prefix' and the output ID stored in the parent result object containing this storage From ae469c70f353ee977a51fedd4d2408a80b3948ac Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 11 Oct 2024 14:10:37 -0400 Subject: [PATCH 75/75] remove fixme comment from review --- weaver/wps_restapi/jobs/utils.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index b646c83a0..7a42e0e29 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -570,11 +570,6 @@ def get_job_results_response( raise_job_bad_status(job, container) settings = get_settings(container) - # FIXME: apply converters (https://github.com/crim-ca/weaver/pull/548) - # - test_execute_single_output_response_document_alt_format_json - # - test_execute_single_output_response_document_alt_format_yaml - # - test_execute_single_output_multipart_accept_alt_format - results, _ = get_results( job, container, value_key="value",